Copyright (C) 1998-2002, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
The actual code must (of course) be provided in the actual module source
code but certain actions can be written down generically, with some
customization options which are these:
MIN_NEEDED_INPUT minimal number of input bytes needed for the next
conversion.
MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round
of conversion.
MAX_NEEDED_INPUT you guess it, this is the maximal number of input
bytes needed. It defaults to MIN_NEEDED_INPUT
MAX_NEEDED_OUTPUT likewise for output bytes.
LOOPFCT name of the function created. If not specified
the name is `loop' but this prevents the use
of multiple functions in the same file.
BODY this is supposed to expand to the body of the loop.
The user must provide this.
EXTRA_LOOP_DECLS extra arguments passed from converion loop call.
INIT_PARAMS code to define and initialize variables from params.
UPDATE_PARAMS code to store result in params.
ONEBYTE_BODY body of the specialized conversion function for a
single byte from the current character set to INTERNAL.
*/
#include <assert.h>
#include <endian.h>
#include <gconv.h>
#include <stdint.h>
#include <string.h>
#include <wchar.h>
#include <sys/param.h>
#define __need_size_t
#include <stddef.h>
unaligned memory accesses. Some of the character encodings have
representations with a fixed width of 2 or 4 bytes. But if we cannot
access unaligned memory we still have to read byte-wise. */
#undef FCTNAME2
#if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED
# define get16(addr) *((__const uint16_t *) (addr))
# define get32(addr) *((__const uint32_t *) (addr))
# define put16(addr, val) *((uint16_t *) (addr)) = (val)
# define put32(addr, val) *((uint32_t *) (addr)) = (val)
# define FCTNAME2(name) name
#else
# if __BYTE_ORDER == __LITTLE_ENDIAN
# define get16(addr) \
(((__const unsigned char *) (addr))[1] << 8 \
| ((__const unsigned char *) (addr))[0])
# define get32(addr) \
(((((__const unsigned char *) (addr))[3] << 8 \
| ((__const unsigned char *) (addr))[2]) << 8 \
| ((__const unsigned char *) (addr))[1]) << 8 \
| ((__const unsigned char *) (addr))[0])
# define put16(addr, val) \
({ uint16_t __val = (val); \
((unsigned char *) (addr))[0] = __val; \
((unsigned char *) (addr))[1] = __val >> 8; \
(void) 0; })
# define put32(addr, val) \
({ uint32_t __val = (val); \
((unsigned char *) (addr))[0] = __val; \
__val >>= 8; \
((unsigned char *) (addr))[1] = __val; \
__val >>= 8; \
((unsigned char *) (addr))[2] = __val; \
__val >>= 8; \
((unsigned char *) (addr))[3] = __val; \
(void) 0; })
# else
# define get16(addr) \
(((__const unsigned char *) (addr))[0] << 8 \
| ((__const unsigned char *) (addr))[1])
# define get32(addr) \
(((((__const unsigned char *) (addr))[0] << 8 \
| ((__const unsigned char *) (addr))[1]) << 8 \
| ((__const unsigned char *) (addr))[2]) << 8 \
| ((__const unsigned char *) (addr))[3])
# define put16(addr, val) \
({ uint16_t __val = (val); \
((unsigned char *) (addr))[1] = __val; \
((unsigned char *) (addr))[0] = __val >> 8; \
(void) 0; })
# define put32(addr, val) \
({ uint32_t __val = (val); \
((unsigned char *) (addr))[3] = __val; \
__val >>= 8; \
((unsigned char *) (addr))[2] = __val; \
__val >>= 8; \
((unsigned char *) (addr))[1] = __val; \
__val >>= 8; \
((unsigned char *) (addr))[0] = __val; \
(void) 0; })
# endif
# define FCTNAME2(name) name##_unaligned
#endif
#define FCTNAME(name) FCTNAME2(name)
#ifndef MIN_NEEDED_INPUT
# error "MIN_NEEDED_INPUT definition missing"
#elif MIN_NEEDED_INPUT < 1
# error "MIN_NEEDED_INPUT must be >= 1"
#endif
#ifndef MAX_NEEDED_INPUT
# define MAX_NEEDED_INPUT MIN_NEEDED_INPUT
#endif
#ifndef MIN_NEEDED_OUTPUT
# error "MIN_NEEDED_OUTPUT definition missing"
#elif MIN_NEEDED_OUTPUT < 1
# error "MIN_NEEDED_OUTPUT must be >= 1"
#endif
#ifndef MAX_NEEDED_OUTPUT
# define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT
#endif
#ifndef LOOPFCT
# define LOOPFCT loop
#endif
#ifndef BODY
# error "Definition of BODY missing for function" LOOPFCT
#endif
as empty. */
#ifndef EXTRA_LOOP_DECLS
# define EXTRA_LOOP_DECLS
#endif
to test whether we have to ignore errors. */
#define ignore_errors_p() \
(irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
Note that we cannot use the do while (0) trick since `break' and
`continue' must reach certain points. */
#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
\
if (! ignore_errors_p ()) \
break; \
\
/* We ignore the invalid input byte sequence. */ \
inptr += (Incr); \
++*irreversible; \
/* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
that "iconv -c" must give the same exitcode as "iconv". */ \
continue; \
}
transcription functions and ignoring of errors. Note that we cannot use
the do while (0) trick since `break' and `continue' must reach certain
points. */
#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
\
if (irreversible == NULL) \
/* This means we are in call from __gconv_transliterate. In this \
case we are not doing any error recovery outself. */ \
break; \
\
/* See whether we have to ignore the error. If not, stop. */ \
if (! ignore_errors_p ()) \
break; \
\
/* When we come here it means we ignore the character. */ \
++*irreversible; \
inptr += Incr; \
/* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
that "iconv -c" must give the same exitcode as "iconv". */ \
continue; \
}
"If language codes are not relevant to the particular processing
operation, then they should be ignored." This macro is usually
called right before STANDARD_TO_LOOP_ERR_HANDLER (Incr). */
#define UNICODE_TAG_HANDLER(Character, Incr) \
{ \
/* TAG characters are those in the range U+E0000..U+E007F. */ \
if (((Character) >> 7) == (0xe0000 >> 7)) \
{ \
inptr += Incr; \
continue; \
} \
}
static inline int
FCTNAME (LOOPFCT) (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
unsigned char **outptrp, const unsigned char *outend,
size_t *irreversible EXTRA_LOOP_DECLS)
{
#ifdef LOOP_NEED_STATE
mbstate_t *state = step_data->__statep;
#endif
#ifdef LOOP_NEED_FLAGS
int flags = step_data->__flags;
#endif
#ifdef LOOP_NEED_DATA
void *data = step->__data;
#endif
int result = __GCONV_EMPTY_INPUT;
const unsigned char *inptr = *inptrp;
unsigned char *outptr = *outptrp;
#ifdef INIT_PARAMS
INIT_PARAMS;
#endif
while (inptr != inend)
{
compiler generating better code. They will be optimized away
since MIN_NEEDED_OUTPUT is always a constant. */
if (MIN_NEEDED_INPUT > 1
&& __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
{
character. */
result = __GCONV_INCOMPLETE_INPUT;
break;
}
if ((MIN_NEEDED_OUTPUT != 1
&& __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
|| (MIN_NEEDED_OUTPUT == 1
&& __builtin_expect (outptr >= outend, 0)))
{
result = __GCONV_FULL_OUTPUT;
break;
}
RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
input characters vary in size), GCONV_ILLEGAL_INPUT, or
GCONV_FULL_OUTPUT (if the output characters vary in size). */
BODY
}
*inptrp = inptr;
*outptrp = outptr;
#ifdef UPDATE_PARAMS
UPDATE_PARAMS;
#endif
return result;
}
unaligned access. */
#if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \
&& MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \
&& MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0
# undef get16
# undef get32
# undef put16
# undef put32
# undef unaligned
# define DEFINE_UNALIGNED
# include "loop.c"
# undef DEFINE_UNALIGNED
#endif
#if MAX_NEEDED_INPUT > 1
# define SINGLE(fct) SINGLE2 (fct)
# define SINGLE2(fct) fct##_single
static inline int
SINGLE(LOOPFCT) (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char **inptrp, const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
size_t *irreversible EXTRA_LOOP_DECLS)
{
mbstate_t *state = step_data->__statep;
#ifdef LOOP_NEED_FLAGS
int flags = step_data->__flags;
#endif
#ifdef LOOP_NEED_DATA
void *data = step->__data;
#endif
int result = __GCONV_OK;
unsigned char bytebuf[MAX_NEEDED_INPUT];
const unsigned char *inptr = *inptrp;
unsigned char *outptr = *outptrp;
size_t inlen;
#ifdef INIT_PARAMS
INIT_PARAMS;
#endif
#ifdef UNPACK_BYTES
UNPACK_BYTES
#else
for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
bytebuf[inlen] = state->__value.__wchb[inlen];
#endif
if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
{
*inptrp = inend;
#ifdef STORE_REST
inptr = bytebuf;
inptrp = &inptr;
inend = &bytebuf[inlen];
STORE_REST
#else
character. */
while (inptr < inend)
state->__value.__wchb[inlen++] = *inptr++;
#endif
return __GCONV_INCOMPLETE_INPUT;
}
if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
|| (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
return __GCONV_FULL_OUTPUT;
do
bytebuf[inlen++] = *inptr++;
while (inlen < MAX_NEEDED_INPUT && inptr < inend);
inptr = bytebuf;
inend = &bytebuf[inlen];
do
{
BODY
}
while (0);
bytes from the state and at least one more, or the character is still
incomplete, or we have some other error (like illegal input character,
no space in output buffer). */
if (__builtin_expect (inptr != bytebuf, 1))
{
assert (inptr - bytebuf > (state->__count & 7));
*inptrp += inptr - bytebuf - (state->__count & 7);
*outptrp = outptr;
result = __GCONV_OK;
#ifdef CLEAR_STATE
CLEAR_STATE;
#else
state->__count &= ~7;
#endif
}
else if (result == __GCONV_INCOMPLETE_INPUT)
{
available. */
assert (inend != &bytebuf[MAX_NEEDED_INPUT]);
*inptrp += inend - bytebuf - (state->__count & 7);
#ifdef STORE_REST
inptrp = &inptr;
STORE_REST
#else
character. */
while (inptr < inend)
state->__value.__wchb[inlen++] = *inptr++;
#endif
}
return result;
}
# undef SINGLE
# undef SINGLE2
#endif
#ifdef ONEBYTE_BODY
static wint_t
gconv_btowc (struct __gconv_step *step, unsigned char c)
ONEBYTE_BODY
# define FROM_ONEBYTE gconv_btowc
#endif
for the definition of another function. */
#undef MIN_NEEDED_INPUT
#undef MAX_NEEDED_INPUT
#undef MIN_NEEDED_OUTPUT
#undef MAX_NEEDED_OUTPUT
#undef LOOPFCT
#undef BODY
#undef LOOPFCT
#undef EXTRA_LOOP_DECLS
#undef INIT_PARAMS
#undef UPDATE_PARAMS
#undef ONEBYTE_BODY
#undef UNPACK_BYTES
#undef CLEAR_STATE
#undef LOOP_NEED_STATE
#undef LOOP_NEED_FLAGS
#undef LOOP_NEED_DATA
#undef get16
#undef get32
#undef put16
#undef put32
#undef unaligned