summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2007-01-23 01:09:41 +0000
committerBruno Haible <bruno@clisp.org>2007-01-23 01:09:41 +0000
commit6d6f8a5cd56ca54a9e9d49516cd2e849f8a6b1c6 (patch)
tree2071a3b2b769c6a17fd032bb536625df13edaef4
parent34155a41ab5499528457bfe0b723532c48b8bb93 (diff)
downloadgnulib-6d6f8a5cd56ca54a9e9d49516cd2e849f8a6b1c6.tar.gz
Add optional offsets argument to conversion routines.
-rw-r--r--ChangeLog14
-rw-r--r--lib/striconveh.c149
-rw-r--r--lib/striconveh.h10
-rw-r--r--lib/striconveha.c5
-rw-r--r--lib/striconveha.h5
-rw-r--r--tests/test-striconveh.c625
6 files changed, 583 insertions, 225 deletions
diff --git a/ChangeLog b/ChangeLog
index e6f93bf213..56d829480f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
2007-01-22 Bruno Haible <bruno@clisp.org>
+ * lib/striconveh.h (mem_cd_iconveh, mem_iconveh): Add 'offsets'
+ argument.
+ * lib/striconveh.c (iconv_carefully_1): New function.
+ (mem_cd_iconveh_internal, mem_cd_iconveh, mem_iconveh): Add 'offsets'
+ argument.
+ (str_cd_iconveh): Update.
+ * lib/striconveha.h (mem_iconveha): Add 'offsets' argument.
+ * lib/striconveha.c (mem_iconveha): Add 'offsets' argument.
+ * tests/test-striconveh.c (MAGIC): New macro.
+ (new_offsets): New function.
+ (main): Test call with and without offsets.
+
+2007-01-22 Bruno Haible <bruno@clisp.org>
+
* modules/sys_stat (Makefile.am): Use @MKDIR_P@ instead of $(MKDIR_P).
* modules/sys_select (Makefile.am): Likewise.
* modules/sys_socket (Makefile.am): Likewise.
diff --git a/lib/striconveh.c b/lib/striconveh.c
index b02a182bbd..7feb768168 100644
--- a/lib/striconveh.c
+++ b/lib/striconveh.c
@@ -119,11 +119,68 @@ iconv_carefully (iconv_t cd,
iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
# endif
+/* iconv_carefully_1 is like iconv_carefully, except that it stops after
+ converting one character. */
+static size_t
+iconv_carefully_1 (iconv_t cd,
+ const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft,
+ bool *incremented)
+{
+ const char *inptr = *inbuf;
+ const char *inptr_end = inptr + *inbytesleft;
+ char *outptr = *outbuf;
+ size_t outsize = *outbytesleft;
+ const char *inptr_before = inptr;
+ size_t res = (size_t)(-1);
+ size_t insize;
+
+ for (insize = 1; inptr + insize <= inptr_end; insize++)
+ {
+ res = iconv (cd,
+ (ICONV_CONST char **) &inptr, &insize,
+ &outptr, &outsize);
+ if (!(res == (size_t)(-1) && errno == EINVAL))
+ break;
+ /* We expect that no input bytes have been consumed so far. */
+ if (inptr != inptr_before)
+ abort ();
+ }
+
+ *inbuf = inptr;
+ *inbytesleft = inptr_end - inptr;
+# if !defined _LIBICONV_VERSION && !defined __GLIBC__
+ /* Irix iconv() inserts a NUL byte if it cannot convert.
+ NetBSD iconv() inserts a question mark if it cannot convert.
+ Only GNU libiconv and GNU libc are known to prefer to fail rather
+ than doing a lossy conversion. */
+ if (res != (size_t)(-1) && res > 0)
+ {
+ /* iconv() has already incremented INPTR. We cannot go back to a
+ previous INPTR, otherwise the state inside CD would become invalid,
+ if FROM_CODESET is a stateful encoding. So, tell the caller that
+ *INBUF has already been incremented. */
+ *incremented = (inptr > inptr_before);
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+# endif
+
+ if (res != (size_t)(-1))
+ {
+ *outbuf = outptr;
+ *outbytesleft = outsize;
+ }
+ *incremented = false;
+ return res;
+}
+
static int
mem_cd_iconveh_internal (const char *src, size_t srclen,
iconv_t cd, iconv_t cd1, iconv_t cd2,
enum iconv_ilseq_handler handler,
size_t extra_alloc,
+ size_t *offsets,
char **resultp, size_t *lengthp)
{
/* When a conversion error occurs, we cannot start using CD1 and CD2 at
@@ -141,6 +198,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
char *result;
size_t allocated;
size_t length;
+ size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */
if (*lengthp >= sizeof (tmpbuf))
{
@@ -153,6 +211,16 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
allocated = sizeof (tmpbuf);
}
result = initial_result;
+
+ if (offsets != NULL)
+ {
+ size_t i;
+
+ for (i = 0; i < srclen; i++)
+ offsets[i] = (size_t)(-1);
+
+ last_length = (size_t)(-1);
+ }
length = 0;
/* First, try a direct conversion, and see whether a conversion error
@@ -176,16 +244,29 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
size_t res;
bool grow;
- /* Use iconv_carefully instead of iconv here, because:
- - If TO_CODESET is UTF-8, we can do the error handling in this loop,
- no need for a second loop,
- - With iconv() implementations other than GNU libiconv and GNU libc,
- if we use iconv() in a big swoop, checking for an E2BIG return,
- we lose the number of irreversible conversions. */
- res = iconv_carefully (cd,
- &inptr, &insize,
- &outptr, &outsize,
- &incremented);
+ if (offsets != NULL)
+ {
+ if (length != last_length) /* ensure that offset[] be increasing */
+ {
+ offsets[inptr - src] = length;
+ last_length = length;
+ }
+ res = iconv_carefully_1 (cd,
+ &inptr, &insize,
+ &outptr, &outsize,
+ &incremented);
+ }
+ else
+ /* Use iconv_carefully instead of iconv here, because:
+ - If TO_CODESET is UTF-8, we can do the error handling in this
+ loop, no need for a second loop,
+ - With iconv() implementations other than GNU libiconv and GNU
+ libc, if we use iconv() in a big swoop, checking for an E2BIG
+ return, we lose the number of irreversible conversions. */
+ res = iconv_carefully (cd,
+ &inptr, &insize,
+ &outptr, &outsize,
+ &incremented);
length = outptr - result;
grow = (length + extra_alloc > allocated / 2);
@@ -332,6 +413,15 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
/* The direct conversion failed, handler != iconveh_error,
and cd2 != (iconv_t)(-1).
Use a conversion through UTF-8. */
+ if (offsets != NULL)
+ {
+ size_t i;
+
+ for (i = 0; i < srclen; i++)
+ offsets[i] = (size_t)(-1);
+
+ last_length = (size_t)(-1);
+ }
length = 0;
{
# define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
@@ -362,11 +452,25 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
/* Conversion step 1: from FROM_CODESET to UTF-8. */
if (in1size > 0)
{
+ if (offsets != NULL
+ && length != last_length) /* ensure that offset[] be increasing */
+ {
+ offsets[in1ptr - src] = length;
+ last_length = length;
+ }
if (cd1 != (iconv_t)(-1))
- res1 = iconv_carefully (cd1,
- (ICONV_CONST char **) &in1ptr, &in1size,
- &out1ptr, &out1size,
- &incremented1);
+ {
+ if (offsets != NULL)
+ res1 = iconv_carefully_1 (cd1,
+ &in1ptr, &in1size,
+ &out1ptr, &out1size,
+ &incremented1);
+ else
+ res1 = iconv_carefully (cd1,
+ &in1ptr, &in1size,
+ &out1ptr, &out1size,
+ &incremented1);
+ }
else
{
/* FROM_CODESET is UTF-8. */
@@ -418,7 +522,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
out1ptr += m;
out1size -= m;
}
- while (in1size > 0);
+ while (offsets == NULL && in1size > 0);
}
}
else if (do_final_flush1)
@@ -469,7 +573,8 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
errno1 = errno;
utf8len = out1ptr - utf8buf;
- if (in1size == 0
+ if (offsets != NULL
+ || in1size == 0
|| utf8len > utf8bufsize / 2
|| (res1 == (size_t)(-1) && errno1 == E2BIG))
{
@@ -726,10 +831,11 @@ int
mem_cd_iconveh (const char *src, size_t srclen,
iconv_t cd, iconv_t cd1, iconv_t cd2,
enum iconv_ilseq_handler handler,
+ size_t *offsets,
char **resultp, size_t *lengthp)
{
return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
- resultp, lengthp);
+ offsets, resultp, lengthp);
}
char *
@@ -744,7 +850,7 @@ str_cd_iconveh (const char *src,
char *result = NULL;
size_t length = 0;
int retval = mem_cd_iconveh_internal (src, strlen (src),
- cd, cd1, cd2, handler, 1,
+ cd, cd1, cd2, handler, 1, NULL,
&result, &length);
if (retval < 0)
@@ -770,6 +876,7 @@ int
mem_iconveh (const char *src, size_t srclen,
const char *from_codeset, const char *to_codeset,
enum iconv_ilseq_handler handler,
+ size_t *offsets,
char **resultp, size_t *lengthp)
{
if (srclen == 0)
@@ -778,7 +885,7 @@ mem_iconveh (const char *src, size_t srclen,
*lengthp = 0;
return 0;
}
- else if (c_strcasecmp (from_codeset, to_codeset) == 0)
+ else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
{
char *result;
@@ -854,8 +961,8 @@ mem_iconveh (const char *src, size_t srclen,
result = *resultp;
length = *lengthp;
- retval =
- mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, &result, &length);
+ retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets,
+ &result, &length);
if (retval < 0)
{
diff --git a/lib/striconveh.h b/lib/striconveh.h
index 2ab1e6feda..e97890c0ea 100644
--- a/lib/striconveh.h
+++ b/lib/striconveh.h
@@ -47,6 +47,10 @@ enum iconv_ilseq_handler
(iconv_t)(-1) if FROM_CODESET is UTF-8).
CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
if TO_CODESET is UTF-8).
+ If OFFSET is not NULL, it should point to an array of SRCLEN integers; this
+ array is filled with offsets into the result, i.e. the character starting
+ at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
+ and other offsets are set to (size_t)(-1).
*RESULTP and *LENGTH should initially be a scratch buffer and its size,
or *RESULTP can initially be NULL.
May erase the contents of the memory at *RESULTP.
@@ -58,6 +62,7 @@ extern int
mem_cd_iconveh (const char *src, size_t srclen,
iconv_t cd, iconv_t cd1, iconv_t cd2,
enum iconv_ilseq_handler handler,
+ size_t *offsets,
char **resultp, size_t *lengthp);
/* Convert an entire string from one encoding to another, using iconv.
@@ -81,6 +86,10 @@ extern char *
/* Convert an entire string from one encoding to another, using iconv.
The original string is at [SRC,...,SRC+SRCLEN-1].
+ If OFFSET is not NULL, it should point to an array of SRCLEN integers; this
+ array is filled with offsets into the result, i.e. the character starting
+ at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
+ and other offsets are set to (size_t)(-1).
*RESULTP and *LENGTH should initially be a scratch buffer and its size,
or *RESULTP can initially be NULL.
May erase the contents of the memory at *RESULTP.
@@ -92,6 +101,7 @@ extern int
mem_iconveh (const char *src, size_t srclen,
const char *from_codeset, const char *to_codeset,
enum iconv_ilseq_handler handler,
+ size_t *offsets,
char **resultp, size_t *lengthp);
/* Convert an entire string from one encoding to another, using iconv.
diff --git a/lib/striconveha.c b/lib/striconveha.c
index 9da18c93ef..12a7bfc2fe 100644
--- a/lib/striconveha.c
+++ b/lib/striconveha.c
@@ -147,10 +147,11 @@ int
mem_iconveha (const char *src, size_t srclen,
const char *from_codeset, const char *to_codeset,
enum iconv_ilseq_handler handler,
+ size_t *offsets,
char **resultp, size_t *lengthp)
{
int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
- resultp, lengthp);
+ offsets, resultp, lengthp);
if (retval >= 0 || errno != EINVAL)
return retval;
else
@@ -168,7 +169,7 @@ mem_iconveha (const char *src, size_t srclen,
{
retval = mem_iconveha (src, srclen,
from_codeset, to_codeset, handler,
- resultp, lengthp);
+ offsets, resultp, lengthp);
if (!(retval < 0 && errno == EILSEQ))
return retval;
encodings++;
diff --git a/lib/striconveha.h b/lib/striconveha.h
index 28fc7e6b13..b4d7ce496d 100644
--- a/lib/striconveha.h
+++ b/lib/striconveha.h
@@ -30,6 +30,10 @@ extern "C" {
/* Convert an entire string from one encoding to another, using iconv.
The original string is at [SRC,...,SRC+SRCLEN-1].
The "from" encoding can also be a name defined for autodetection.
+ If OFFSET is not NULL, it should point to an array of SRCLEN integers; this
+ array is filled with offsets into the result, i.e. the character starting
+ at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
+ and other offsets are set to (size_t)(-1).
*RESULTP and *LENGTH should initially be a scratch buffer and its size,
or *RESULTP can initially be NULL.
May erase the contents of the memory at *RESULTP.
@@ -41,6 +45,7 @@ extern int
mem_iconveha (const char *src, size_t srclen,
const char *from_codeset, const char *to_codeset,
enum iconv_ilseq_handler handler,
+ size_t *offsets,
char **resultp, size_t *lengthp);
/* Convert an entire string from one encoding to another, using iconv.
diff --git a/tests/test-striconveh.c b/tests/test-striconveh.c
index 7d45273729..31b7b721d1 100644
--- a/tests/test-striconveh.c
+++ b/tests/test-striconveh.c
@@ -34,12 +34,25 @@
#define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
#define ASSERT(expr) if (!(expr)) abort ();
+/* Magic number for detecting bounds violations. */
+#define MAGIC 0x1983EFF1
+
+static size_t *
+new_offsets (size_t n)
+{
+ size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
+ offsets[n] = MAGIC;
+ return offsets;
+}
+
int
main ()
{
static enum iconv_ilseq_handler handlers[] =
{ iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
size_t h;
+ size_t o;
+ size_t i;
#if HAVE_ICONV
/* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
@@ -66,17 +79,29 @@ main ()
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
- char *result = NULL;
- size_t length = 0;
- int retval = mem_cd_iconveh (input, strlen (input),
- cd_88592_to_88591,
- cd_88592_to_utf8, cd_utf8_to_88591,
- handler,
- &result, &length);
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, strlen (input),
+ cd_88592_to_88591,
+ cd_88592_to_utf8, cd_utf8_to_88591,
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 37; i++)
+ ASSERT (offsets[i] == i);
+ ASSERT (offsets[37] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
}
/* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
@@ -84,37 +109,59 @@ main ()
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
- char *result = NULL;
- size_t length = 0;
- int retval = mem_cd_iconveh (input, strlen (input),
- cd_88592_to_88591,
- cd_88592_to_utf8, cd_utf8_to_88591,
- handler,
- &result, &length);
- switch (handler)
+ for (o = 0; o < 2; o++)
{
- case iconveh_error:
- ASSERT (retval == -1 && errno == EILSEQ);
- ASSERT (result == NULL);
- break;
- case iconveh_question_mark:
- {
- static const char expected[] = "Rafa? Maszkowski";
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
- }
- break;
- case iconveh_escape_sequence:
- {
- static const char expected[] = "Rafa\\u0142 Maszkowski";
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
- }
- break;
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, strlen (input),
+ cd_88592_to_88591,
+ cd_88592_to_utf8, cd_utf8_to_88591,
+ handler,
+ offsets,
+ &result, &length);
+ switch (handler)
+ {
+ case iconveh_error:
+ ASSERT (retval == -1 && errno == EILSEQ);
+ ASSERT (result == NULL);
+ if (o)
+ free (offsets);
+ break;
+ case iconveh_question_mark:
+ {
+ static const char expected[] = "Rafa? Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 16; i++)
+ ASSERT (offsets[i] == i);
+ ASSERT (offsets[16] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ case iconveh_escape_sequence:
+ {
+ static const char expected[] = "Rafa\\u0142 Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 16; i++)
+ ASSERT (offsets[i] == (i < 5 ? i :
+ i + 5));
+ ASSERT (offsets[16] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ }
}
}
@@ -124,17 +171,32 @@ main ()
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
- char *result = NULL;
- size_t length = 0;
- int retval = mem_cd_iconveh (input, strlen (input),
- cd_88591_to_utf8,
- cd_88591_to_utf8, (iconv_t)(-1),
- handler,
- &result, &length);
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, strlen (input),
+ cd_88591_to_utf8,
+ cd_88591_to_utf8, (iconv_t)(-1),
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 37; i++)
+ ASSERT (offsets[i] == (i < 1 ? i :
+ i < 12 ? i + 1 :
+ i < 18 ? i + 2 :
+ i + 3));
+ ASSERT (offsets[37] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
}
/* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
@@ -143,17 +205,36 @@ main ()
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
- char *result = NULL;
- size_t length = 0;
- int retval = mem_cd_iconveh (input, strlen (input),
- cd_utf8_to_88591,
- (iconv_t)(-1), cd_utf8_to_88591,
- handler,
- &result, &length);
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, strlen (input),
+ cd_utf8_to_88591,
+ (iconv_t)(-1), cd_utf8_to_88591,
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 41; i++)
+ ASSERT (offsets[i] == (i < 1 ? i :
+ i == 1 ? (size_t)(-1) :
+ i < 13 ? i - 1 :
+ i == 13 ? (size_t)(-1) :
+ i < 20 ? i - 2 :
+ i == 20 ? (size_t)(-1) :
+ i < 40 ? i - 3 :
+ (size_t)(-1)));
+ ASSERT (offsets[41] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
}
/* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
@@ -161,37 +242,62 @@ main ()
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
- char *result = NULL;
- size_t length = 0;
- int retval = mem_cd_iconveh (input, strlen (input),
- cd_utf8_to_88591,
- (iconv_t)(-1), cd_utf8_to_88591,
- handler,
- &result, &length);
- switch (handler)
+ for (o = 0; o < 2; o++)
{
- case iconveh_error:
- ASSERT (retval == -1 && errno == EILSEQ);
- ASSERT (result == NULL);
- break;
- case iconveh_question_mark:
- {
- static const char expected[] = "Rafa? Maszkowski";
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
- }
- break;
- case iconveh_escape_sequence:
- {
- static const char expected[] = "Rafa\\u0142 Maszkowski";
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
- }
- break;
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, strlen (input),
+ cd_utf8_to_88591,
+ (iconv_t)(-1), cd_utf8_to_88591,
+ handler,
+ offsets,
+ &result, &length);
+ switch (handler)
+ {
+ case iconveh_error:
+ ASSERT (retval == -1 && errno == EILSEQ);
+ ASSERT (result == NULL);
+ if (o)
+ free (offsets);
+ break;
+ case iconveh_question_mark:
+ {
+ static const char expected[] = "Rafa? Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 17; i++)
+ ASSERT (offsets[i] == (i < 5 ? i :
+ i == 5 ? (size_t)(-1) :
+ i - 1));
+ ASSERT (offsets[17] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ case iconveh_escape_sequence:
+ {
+ static const char expected[] = "Rafa\\u0142 Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 17; i++)
+ ASSERT (offsets[i] == (i < 5 ? i :
+ i == 5 ? (size_t)(-1) :
+ i + 4));
+ ASSERT (offsets[17] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ }
}
}
@@ -200,17 +306,28 @@ main ()
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\342";
- char *result = NULL;
- size_t length = 0;
- int retval = mem_cd_iconveh (input, strlen (input),
- cd_utf8_to_88591,
- (iconv_t)(-1), cd_utf8_to_88591,
- handler,
- &result, &length);
- ASSERT (retval == 0);
- ASSERT (length == 0);
- if (result != NULL)
- free (result);
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh (input, strlen (input),
+ cd_utf8_to_88591,
+ (iconv_t)(-1), cd_utf8_to_88591,
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == 0);
+ if (o)
+ {
+ ASSERT (offsets[0] == 0);
+ ASSERT (offsets[1] == MAGIC);
+ free (offsets);
+ }
+ if (result != NULL)
+ free (result);
+ }
}
/* ------------------------ Test str_cd_iconveh() ------------------------ */
@@ -355,16 +472,28 @@ main ()
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
- char *result = NULL;
- size_t length = 0;
- int retval = mem_iconveh (input, strlen (input),
- "ISO-8859-2", "ISO-8859-1",
- handler,
- &result, &length);
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_iconveh (input, strlen (input),
+ "ISO-8859-2", "ISO-8859-1",
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 37; i++)
+ ASSERT (offsets[i] == i);
+ ASSERT (offsets[37] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
}
/* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
@@ -372,36 +501,58 @@ main ()
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
- char *result = NULL;
- size_t length = 0;
- int retval = mem_iconveh (input, strlen (input),
- "ISO-8859-2", "ISO-8859-1",
- handler,
- &result, &length);
- switch (handler)
+ for (o = 0; o < 2; o++)
{
- case iconveh_error:
- ASSERT (retval == -1 && errno == EILSEQ);
- ASSERT (result == NULL);
- break;
- case iconveh_question_mark:
- {
- static const char expected[] = "Rafa? Maszkowski";
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
- }
- break;
- case iconveh_escape_sequence:
- {
- static const char expected[] = "Rafa\\u0142 Maszkowski";
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
- }
- break;
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_iconveh (input, strlen (input),
+ "ISO-8859-2", "ISO-8859-1",
+ handler,
+ offsets,
+ &result, &length);
+ switch (handler)
+ {
+ case iconveh_error:
+ ASSERT (retval == -1 && errno == EILSEQ);
+ ASSERT (result == NULL);
+ if (o)
+ free (offsets);
+ break;
+ case iconveh_question_mark:
+ {
+ static const char expected[] = "Rafa? Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 16; i++)
+ ASSERT (offsets[i] == i);
+ ASSERT (offsets[16] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ case iconveh_escape_sequence:
+ {
+ static const char expected[] = "Rafa\\u0142 Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 16; i++)
+ ASSERT (offsets[i] == (i < 5 ? i :
+ i + 5));
+ ASSERT (offsets[16] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ }
}
}
@@ -411,16 +562,31 @@ main ()
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
- char *result = NULL;
- size_t length = 0;
- int retval = mem_iconveh (input, strlen (input),
- "ISO-8859-1", "UTF-8",
- handler,
- &result, &length);
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_iconveh (input, strlen (input),
+ "ISO-8859-1", "UTF-8",
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 37; i++)
+ ASSERT (offsets[i] == (i < 1 ? i :
+ i < 12 ? i + 1 :
+ i < 18 ? i + 2 :
+ i + 3));
+ ASSERT (offsets[37] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
}
/* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
@@ -429,16 +595,35 @@ main ()
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
- char *result = NULL;
- size_t length = 0;
- int retval = mem_iconveh (input, strlen (input),
- "UTF-8", "ISO-8859-1",
- handler,
- &result, &length);
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_iconveh (input, strlen (input),
+ "UTF-8", "ISO-8859-1",
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 41; i++)
+ ASSERT (offsets[i] == (i < 1 ? i :
+ i == 1 ? (size_t)(-1) :
+ i < 13 ? i - 1 :
+ i == 13 ? (size_t)(-1) :
+ i < 20 ? i - 2 :
+ i == 20 ? (size_t)(-1) :
+ i < 40 ? i - 3 :
+ (size_t)(-1)));
+ ASSERT (offsets[41] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
}
/* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
@@ -446,36 +631,61 @@ main ()
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
- char *result = NULL;
- size_t length = 0;
- int retval = mem_iconveh (input, strlen (input),
- "UTF-8", "ISO-8859-1",
- handler,
- &result, &length);
- switch (handler)
+ for (o = 0; o < 2; o++)
{
- case iconveh_error:
- ASSERT (retval == -1 && errno == EILSEQ);
- ASSERT (result == NULL);
- break;
- case iconveh_question_mark:
- {
- static const char expected[] = "Rafa? Maszkowski";
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
- }
- break;
- case iconveh_escape_sequence:
- {
- static const char expected[] = "Rafa\\u0142 Maszkowski";
- ASSERT (retval == 0);
- ASSERT (length == strlen (expected));
- ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
- free (result);
- }
- break;
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_iconveh (input, strlen (input),
+ "UTF-8", "ISO-8859-1",
+ handler,
+ offsets,
+ &result, &length);
+ switch (handler)
+ {
+ case iconveh_error:
+ ASSERT (retval == -1 && errno == EILSEQ);
+ ASSERT (result == NULL);
+ if (o)
+ free (offsets);
+ break;
+ case iconveh_question_mark:
+ {
+ static const char expected[] = "Rafa? Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 17; i++)
+ ASSERT (offsets[i] == (i < 5 ? i :
+ i == 5 ? (size_t)(-1) :
+ i - 1));
+ ASSERT (offsets[17] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ case iconveh_escape_sequence:
+ {
+ static const char expected[] = "Rafa\\u0142 Maszkowski";
+ ASSERT (retval == 0);
+ ASSERT (length == strlen (expected));
+ ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+ if (o)
+ {
+ for (i = 0; i < 17; i++)
+ ASSERT (offsets[i] == (i < 5 ? i :
+ i == 5 ? (size_t)(-1) :
+ i + 4));
+ ASSERT (offsets[17] == MAGIC);
+ free (offsets);
+ }
+ free (result);
+ }
+ break;
+ }
}
}
@@ -484,16 +694,27 @@ main ()
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\342";
- char *result = NULL;
- size_t length = 0;
- int retval = mem_iconveh (input, strlen (input),
- "UTF-8", "ISO-8859-1",
- handler,
- &result, &length);
- ASSERT (retval == 0);
- ASSERT (length == 0);
- if (result != NULL)
- free (result);
+ for (o = 0; o < 2; o++)
+ {
+ size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_iconveh (input, strlen (input),
+ "UTF-8", "ISO-8859-1",
+ handler,
+ offsets,
+ &result, &length);
+ ASSERT (retval == 0);
+ ASSERT (length == 0);
+ if (o)
+ {
+ ASSERT (offsets[0] == 0);
+ ASSERT (offsets[1] == MAGIC);
+ free (offsets);
+ }
+ if (result != NULL)
+ free (result);
+ }
}
/* ------------------------- Test str_iconveh() ------------------------- */