summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-04-21 06:46:40 +0000
committerUlrich Drepper <drepper@redhat.com>2000-04-21 06:46:40 +0000
commit4a069c3345ea4e832c814a46267942226e53f9a2 (patch)
treeaca2ccb8f71c86ed142be0d8eb027d62c3eba15e
parent978ce92b89ad3433b4cec6986b3b9f535a52c83b (diff)
downloadglibc-4a069c3345ea4e832c814a46267942226e53f9a2.tar.gz
Update.
* iconv/gconv_simple.c: Define separate functions to convert from UCS4/UCS4-LE to the internal encoding. * iconv/gconv_builtin.h: Use separate functions to convert from UCS4/UCS4-LE to the internal encoding. * iconv/gconv_int.h: Declare __gconv_transform_ucs4_internal and __gconv_transform_ucs4le_internal. * iconv/gconv_simple.c (internal_utf8_loop): Correct check for output buffer overflow. Reported by Ulrich.Brink@sap.com.
-rw-r--r--ChangeLog10
-rw-r--r--iconv/gconv_builtin.h6
-rw-r--r--iconv/gconv_int.h2
-rw-r--r--iconv/gconv_simple.c321
4 files changed, 333 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index 1b54df5372..268c7ea9b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2000-04-20 Ulrich Drepper <drepper@redhat.com>
+ * iconv/gconv_simple.c: Define separate functions to convert from
+ UCS4/UCS4-LE to the internal encoding.
+ * iconv/gconv_builtin.h: Use separate functions to convert from
+ UCS4/UCS4-LE to the internal encoding.
+ * iconv/gconv_int.h: Declare __gconv_transform_ucs4_internal and
+ __gconv_transform_ucs4le_internal.
+
+ * iconv/gconv_simple.c (internal_utf8_loop): Correct check for
+ output buffer overflow. Reported by Ulrich.Brink@sap.com.
+
* iconv/skeleton.c: Add some more __builtin_expect.
* iconv/loop.c: Likewise.
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index 781d2014d8..351d6a0342 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -35,9 +35,8 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
4, 4, 4, 4)
BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15,
"INTERNAL", 1, "=ucs4->INTERNAL",
- __gconv_transform_internal_ucs4, NULL, NULL,
+ __gconv_transform_ucs4_internal, NULL, NULL,
4, 4, 4, 4)
-/* Please note that we need only one function for both direction. */
BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
"UCS-4LE//", 1, "=INTERNAL->ucs4le",
@@ -45,9 +44,8 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8,
4, 4, 4, 4)
BUILTIN_TRANSFORMATION (NULL, "UCS-4LE//", 15,
"INTERNAL", 1, "=ucs4le->INTERNAL",
- __gconv_transform_internal_ucs4le, NULL, NULL,
+ __gconv_transform_ucs4le_internal, NULL, NULL,
4, 4, 4, 4)
-/* Please note that we need only one function for both direction. */
BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/")
BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/")
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index bc5d003325..4c8024be3e 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -172,7 +172,9 @@ __BUILTIN_TRANS (__gconv_transform_internal_ucs2);
__BUILTIN_TRANS (__gconv_transform_ucs2reverse_internal);
__BUILTIN_TRANS (__gconv_transform_internal_ucs2reverse);
__BUILTIN_TRANS (__gconv_transform_internal_ucs4);
+__BUILTIN_TRANS (__gconv_transform_ucs4_internal);
__BUILTIN_TRANS (__gconv_transform_internal_ucs4le);
+__BUILTIN_TRANS (__gconv_transform_ucs4le_internal);
__BUILTIN_TRANS (__gconv_transform_internal_utf16);
__BUILTIN_TRANS (__gconv_transform_utf16_internal);
# undef __BUITLIN_TRANS
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index d06db5a316..a8c07f1cbb 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -187,7 +187,173 @@ internal_ucs4_loop_single (const unsigned char **inptrp,
#include <iconv/skeleton.c>
-/* Similarly for the other byte order. */
+/* Transform from UCS4 to the internal, UCS4-like format. Unlike
+ for the other direction we have to check for correct values here. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 4
+#define MIN_NEEDED_TO 4
+#define FROM_DIRECTION 1
+#define FROM_LOOP ucs4_internal_loop
+#define TO_LOOP ucs4_internal_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_ucs4_internal
+
+
+static inline int
+ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
+ unsigned char **outptrp, unsigned char *outend,
+ mbstate_t *state, void *data, size_t *converted)
+{
+ const unsigned char *inptr = *inptrp;
+ unsigned char *outptr = *outptrp;
+ size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
+ int result;
+ size_t cnt;
+
+ for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
+ {
+ uint32_t inval;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ inval = bswap_32 (*(uint32_t *) inptr);
+#else
+ inval = *(uint32_t *) inptr;
+#endif
+
+ if (inval > 0x7fffffff)
+ {
+ *inptrp = inptr;
+ *outptrp = outptr;
+ return __GCONV_ILLEGAL_INPUT;
+ }
+
+ *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
+ }
+
+ *inptrp = inptr;
+ *outptrp = outptr;
+
+ /* Determine the status. */
+ if (*outptrp == outend)
+ result = __GCONV_FULL_OUTPUT;
+ else if (*inptrp == inend)
+ result = __GCONV_EMPTY_INPUT;
+ else
+ result = __GCONV_INCOMPLETE_INPUT;
+
+ return result;
+}
+
+#ifndef _STRING_ARCH_unaligned
+static inline int
+ucs4_internal_loop_unaligned (const unsigned char **inptrp,
+ const unsigned char *inend,
+ unsigned char **outptrp, unsigned char *outend,
+ mbstate_t *state, void *data, size_t *converted)
+{
+ const unsigned char *inptr = *inptrp;
+ unsigned char *outptr = *outptrp;
+ size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
+ int result;
+ size_t cnt;
+
+ for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
+ {
+ if (inptr[0] > 0x80)
+ {
+ /* The value is too large. */
+ *inptrp = inptr;
+ *outptrp = outptr;
+ return __GCONV_ILLEGAL_INPUT;
+ }
+
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ outptr[3] = inptr[0];
+ outptr[2] = inptr[1];
+ outptr[1] = inptr[2];
+ outptr[0] = inptr[3];
+# else
+ outptr[0] = inptr[0];
+ outptr[1] = inptr[1];
+ outptr[2] = inptr[2];
+ outptr[3] = inptr[3];
+# endif
+
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+ outptr[3] = inptr[0];
+ outptr[2] = inptr[1];
+ outptr[1] = inptr[2];
+ outptr[0] = inptr[3];
+# else
+ outptr[0] = inptr[0];
+ outptr[1] = inptr[1];
+ outptr[2] = inptr[2];
+ outptr[3] = inptr[3];
+# endif
+ }
+
+ *inptrp = inptr;
+ *outptrp = outptr;
+
+ /* Determine the status. */
+ if (*outptrp == outend)
+ result = __GCONV_FULL_OUTPUT;
+ else if (*inptrp == inend)
+ result = __GCONV_EMPTY_INPUT;
+ else
+ result = __GCONV_INCOMPLETE_INPUT;
+
+ return result;
+}
+#endif
+
+
+static inline int
+ucs4_internal_loop_single (const unsigned char **inptrp,
+ const unsigned char *inend,
+ unsigned char **outptrp, unsigned char *outend,
+ mbstate_t *state, void *data, size_t *converted)
+{
+ size_t cnt = state->__count & 7;
+
+ while (*inptrp < inend && cnt < 4)
+ state->__value.__wchb[cnt++] = *(*inptrp)++;
+
+ if (cnt < 4)
+ {
+ /* Still not enough bytes. Store the ones in the input buffer. */
+ state->__count &= ~7;
+ state->__count |= cnt;
+
+ return __GCONV_INCOMPLETE_INPUT;
+ }
+
+ if (((unsigned char *) state->__value.__wchb)[0] > 0x80)
+ /* The value is too large. */
+ return __GCONV_ILLEGAL_INPUT;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ (*outptrp)[0] = state->__value.__wchb[3];
+ (*outptrp)[1] = state->__value.__wchb[2];
+ (*outptrp)[2] = state->__value.__wchb[1];
+ (*outptrp)[3] = state->__value.__wchb[0];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ (*outptrp)[0] = state->__value.__wchb[0];
+ (*outptrp)[1] = state->__value.__wchb[1];
+ (*outptrp)[2] = state->__value.__wchb[2];
+ (*outptrp)[3] = state->__value.__wchb[3];
+#endif
+
+ /* Clear the state buffer. */
+ state->__count &= ~7;
+
+ return __GCONV_OK;
+}
+
+#include <iconv/skeleton.c>
+
+
+/* Similarly for the little endian form. */
#define DEFINE_INIT 0
#define DEFINE_FINI 0
#define MIN_NEEDED_FROM 4
@@ -323,6 +489,157 @@ internal_ucs4le_loop_single (const unsigned char **inptrp,
#include <iconv/skeleton.c>
+/* And finally from UCS4-LE to the internal encoding. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 4
+#define MIN_NEEDED_TO 4
+#define FROM_DIRECTION 1
+#define FROM_LOOP ucs4le_internal_loop
+#define TO_LOOP ucs4le_internal_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_ucs4le_internal
+
+
+static inline int
+ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
+ unsigned char **outptrp, unsigned char *outend,
+ mbstate_t *state, void *data, size_t *converted)
+{
+ const unsigned char *inptr = *inptrp;
+ unsigned char *outptr = *outptrp;
+ size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
+ int result;
+ size_t cnt;
+
+ for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
+ {
+ uint32_t inval;
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+ inval = bswap_32 (*(uint32_t *) inptr);
+#else
+ inval = *(uint32_t *) inptr;
+#endif
+
+ if (inval > 0x7fffffff)
+ return __GCONV_ILLEGAL_INPUT;
+
+ *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr);
+ }
+
+ *inptrp = inptr;
+ *outptrp = outptr;
+
+ /* Determine the status. */
+ if (*outptrp == outend)
+ result = __GCONV_FULL_OUTPUT;
+ else if (*inptrp == inend)
+ result = __GCONV_EMPTY_INPUT;
+ else
+ result = __GCONV_INCOMPLETE_INPUT;
+
+ return result;
+}
+
+#ifndef _STRING_ARCH_unaligned
+static inline int
+ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
+ const unsigned char *inend,
+ unsigned char **outptrp, unsigned char *outend,
+ mbstate_t *state, void *data,
+ size_t *converted)
+{
+ const unsigned char *inptr = *inptrp;
+ unsigned char *outptr = *outptrp;
+ size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
+ int result;
+ size_t cnt;
+
+ for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
+ {
+ if (inptr[3] > 0x80)
+ {
+ /* The value is too large. */
+ *inptrp = inptr;
+ *outptrp = outptr;
+ return __GCONV_ILLEGAL_INPUT;
+ }
+
+
+# if __BYTE_ORDER == __BIG_ENDIAN
+ outptr[3] = inptr[0];
+ outptr[2] = inptr[1];
+ outptr[1] = inptr[2];
+ outptr[0] = inptr[3];
+# else
+ outptr[0] = inptr[0];
+ outptr[1] = inptr[1];
+ outptr[2] = inptr[2];
+ outptr[3] = inptr[3];
+# endif
+ }
+
+ *inptrp = inptr;
+ *outptrp = outptr;
+
+ /* Determine the status. */
+ if (*outptrp == outend)
+ result = __GCONV_FULL_OUTPUT;
+ else if (*inptrp == inend)
+ result = __GCONV_EMPTY_INPUT;
+ else
+ result = __GCONV_INCOMPLETE_INPUT;
+
+ return result;
+}
+#endif
+
+
+static inline int
+ucs4le_internal_loop_single (const unsigned char **inptrp,
+ const unsigned char *inend,
+ unsigned char **outptrp, unsigned char *outend,
+ mbstate_t *state, void *data, size_t *converted)
+{
+ size_t cnt = state->__count & 7;
+
+ while (*inptrp < inend && cnt < 4)
+ state->__value.__wchb[cnt++] = *(*inptrp)++;
+
+ if (cnt < 4)
+ {
+ /* Still not enough bytes. Store the ones in the input buffer. */
+ state->__count &= ~7;
+ state->__count |= cnt;
+
+ return __GCONV_INCOMPLETE_INPUT;
+ }
+
+ if (((unsigned char *) state->__value.__wchb)[3] > 0x80)
+ /* The value is too large. */
+ return __GCONV_ILLEGAL_INPUT;
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+ (*outptrp)[0] = state->__value.__wchb[3];
+ (*outptrp)[1] = state->__value.__wchb[2];
+ (*outptrp)[2] = state->__value.__wchb[1];
+ (*outptrp)[3] = state->__value.__wchb[0];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ (*outptrp)[0] = state->__value.__wchb[0];
+ (*outptrp)[1] = state->__value.__wchb[1];
+ (*outptrp)[2] = state->__value.__wchb[2];
+ (*outptrp)[3] = state->__value.__wchb[3];
+#endif
+
+ /* Clear the state buffer. */
+ state->__count &= ~7;
+
+ return __GCONV_OK;
+}
+
+#include <iconv/skeleton.c>
+
+
/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */
#define DEFINE_INIT 0
#define DEFINE_FINI 0
@@ -419,7 +736,7 @@ internal_ucs4le_loop_single (const unsigned char **inptrp,
if ((wc & encoding_mask[step - 2]) == 0) \
break; \
\
- if (outptr + step >= outend) \
+ if (outptr + step > outend) \
{ \
/* Too long. */ \
result = __GCONV_FULL_OUTPUT; \