summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-27 10:51:09 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-27 10:51:09 +0000
commit7a7abf9d520b43be4b6802c6d914f689156a735f (patch)
tree8b74e61a6408404beb94d826e5cc635e6a553046
parentf66b79f11b7947f4d36cf78abbdaa0451e5f7bc2 (diff)
downloadpcre-7a7abf9d520b43be4b6802c6d914f689156a735f.tar.gz
Update pcretest for non-UTF 16-bit wide values, for testing table-lookups
better. Add tests for bad mode. git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@827 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_dfa_exec.c2
-rw-r--r--pcre_fullinfo.c4
-rw-r--r--pcretest.c348
-rw-r--r--testdata/saved16bin0 -> 70 bytes
-rw-r--r--testdata/saved8bin0 -> 61 bytes
-rw-r--r--testdata/testinput142
-rw-r--r--testdata/testinput172
-rw-r--r--testdata/testoutput146
-rw-r--r--testdata/testoutput1712
9 files changed, 221 insertions, 155 deletions
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 8c46bf4..663865b 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -2222,7 +2222,7 @@ for (;;)
}
else
#endif /* SUPPORT_UTF */
- otherd = fcc[d];
+ otherd = TABLE_GET(d, fcc, d);
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
{
diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c
index f0f6b21..b10189c 100644
--- a/pcre_fullinfo.c
+++ b/pcre_fullinfo.c
@@ -91,6 +91,9 @@ means that the pattern is likely compiled with different endianness. */
if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+
+/* Check that this pattern was compiled in the correct bit mode */
+
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
switch (what)
@@ -117,7 +120,6 @@ switch (what)
#else
*((size_t *)where) = 0;
#endif
-
break;
case PCRE_INFO_CAPTURECOUNT:
diff --git a/pcretest.c b/pcretest.c
index a33ccdc..c43d01c 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -36,15 +36,15 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
-/* This program now supports the testing of both the 8-bit and 16-bit PCRE
-libraries in a single program. This is different from the modules such as
-pcre_compile.c in the library itself, which are compiled separately for each
-mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
-(the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
-compiled only once. Therefore, it must not make use of any of the macros from
-pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
-however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
-only supported library functions. */
+/* This program now supports the testing of both the 8-bit and 16-bit PCRE
+libraries in a single program. This is different from the modules such as
+pcre_compile.c in the library itself, which are compiled separately for each
+mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
+(the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
+compiled only once. Therefore, it must not make use of any of the macros from
+pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
+however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
+only supported library functions. */
#ifdef HAVE_CONFIG_H
@@ -185,12 +185,12 @@ only from one place and is handled differently). I couldn't dream up any way of
using a single macro to do this in a generic way, because of the many different
argument requirements. We know that at least one of SUPPORT_PCRE8 and
SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
-use these in the definitions of generic macros.
+use these in the definitions of generic macros.
-**** Special note about the PCHARSxxx macros: the address of the string to be
+**** Special note about the PCHARSxxx macros: the address of the string to be
printed is always given as two arguments: a base address followed by an offset.
The base address is cast to the correct data size for 8 or 16 bit data; the
-offset is in units of this size. If the string were given as base+offset in one
+offset is in units of this size. If the string were given as base+offset in one
argument, the casting might be incorrectly applied. */
#ifdef SUPPORT_PCRE8
@@ -343,7 +343,7 @@ argument, the casting might be incorrectly applied. */
#endif /* SUPPORT_PCRE16 */
-/* ----- Both modes are supported; a runtime test is needed, except for
+/* ----- Both modes are supported; a runtime test is needed, except for
pcre_config(), and the JIT stack functions, when it doesn't matter which
version is called. ----- */
@@ -362,12 +362,12 @@ version is called. ----- */
PCHARSV16(p, offset, len, f); \
else \
PCHARSV8(p, offset, len, f)
-
+
#define READ_CAPTURE_NAME(p, cn8, cn16, re) \
if (use_pcre16) \
READ_CAPTURE_NAME16(p, cn8, cn16, re); \
else \
- READ_CAPTURE_NAME8(p, cn8, cn16, re)
+ READ_CAPTURE_NAME8(p, cn8, cn16, re)
#define SET_PCRE_CALLOUT(callout) \
if (use_pcre16) \
@@ -384,8 +384,8 @@ version is called. ----- */
PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
else \
PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
-
-#define PCRE_CONFIG pcre_config
+
+#define PCRE_CONFIG pcre_config
#define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
namesptr, cbuffer, size) \
@@ -500,7 +500,7 @@ version is called. ----- */
#define STRLEN STRLEN8
#define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
#define PCRE_COMPILE PCRE_COMPILE8
-#define PCRE_CONFIG pcre_config
+#define PCRE_CONFIG pcre_config
#define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
#define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
#define PCRE_DFA_EXEC PCRE_DFA_EXEC8
@@ -530,7 +530,7 @@ version is called. ----- */
#define STRLEN STRLEN16
#define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
#define PCRE_COMPILE PCRE_COMPILE16
-#define PCRE_CONFIG pcre16_config
+#define PCRE_CONFIG pcre16_config
#define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
#define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
#define PCRE_DFA_EXEC PCRE_DFA_EXEC16
@@ -666,7 +666,7 @@ static const char *errtexts[] = {
NULL, /* SHORTUTF8/16 is handled specially */
"nested recursion at the same subject position",
"JIT stack limit reached",
- "pattern compiled in wrong mode (8-bit/16-bit error)"
+ "pattern compiled in wrong mode: 8-bit/16-bit error"
};
@@ -1133,22 +1133,27 @@ double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
result is always left in buffer16.
-Note that this function does not object to surrogate values. This is
-deliberate; it makes it possible to construct UTF-16 strings that are invalid,
+Note that this function does not object to surrogate values. This is
+deliberate; it makes it possible to construct UTF-16 strings that are invalid,
for the purpose of testing that they are correctly faulted.
+Patterns to be converted are either plain ASCII or UTF-8; data lines are always
+in UTF-8 so that values greater than 255 can be handled.
+
Arguments:
+ data TRUE if converting a data line; FALSE for a regex
p points to a byte string
utf true if UTF-8 (to be converted to UTF-16)
len number of bytes in the string (excluding trailing zero)
Returns: number of 16-bit data items used (excluding trailing zero)
OR -1 if a UTF-8 string is malformed
- OR -2 if a value > 0x10ffff is encountered
+ OR -2 if a value > 0x10ffff is encountered
+ OR -3 if a value > 0xffff is encountered when not in UTF mode
*/
static int
-to16(pcre_uint8 *p, int utf, int len)
+to16(int data, pcre_uint8 *p, int utf, int len)
{
pcre_uint16 *pp;
@@ -1166,12 +1171,11 @@ if (buffer16_size < 2*len + 2)
pp = buffer16;
-if (!utf)
+if (!utf && !data)
{
while (len-- > 0) *pp++ = *p++;
}
-#ifdef SUPPORT_UTF
else
{
int c = 0;
@@ -1184,13 +1188,13 @@ else
len -= chlen;
if (c < 0x10000) *pp++ = c; else
{
+ if (!utf) return -3;
c -= 0x10000;
*pp++ = 0xD800 | (c >> 10);
*pp++ = 0xDC00 | (c & 0x3ff);
}
}
}
-#endif
*pp = 0;
return pp - buffer16;
@@ -1480,8 +1484,8 @@ if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
PCHARSV(*pp, 0, -1, outfile);
fprintf(outfile, "\"\n");
}
-
-*pp = npp;
+
+*pp = npp;
return p;
}
#endif /* SUPPORT_PCRE8 */
@@ -1508,7 +1512,7 @@ if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
PCHARSV(*pp, 0, -1, outfile);
fprintf(outfile, "\"\n");
}
-*pp = npp;
+*pp = npp;
return p;
}
#endif /* SUPPORT_PCRE16 */
@@ -1673,9 +1677,19 @@ free(block);
/* Get one piece of information from the pcre_fullinfo() function. When only
one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
-value, but the code is defensive. */
+value, but the code is defensive.
+
+Arguments:
+ re compiled regex
+ study study data
+ option PCRE_INFO_xxx option
+ ptr where to put the data
-static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
+Returns: 0 when OK, < 0 on error
+*/
+
+static int
+new_info(pcre *re, pcre_extra *study, int option, void *ptr)
{
int rc;
@@ -1692,8 +1706,16 @@ else
rc = PCRE_ERROR_BADMODE;
#endif
-if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
- use_pcre16? "16" : "", option);
+if (rc < 0)
+ {
+ fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
+ use_pcre16? "16" : "", option);
+ if (rc == PCRE_ERROR_BADMODE)
+ fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
+ "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
+ }
+
+return rc;
}
@@ -2118,10 +2140,10 @@ pcre_jit_stack *jit_stack = NULL;
/* These vectors store, end-to-end, a list of zero-terminated captured
substring names, each list itself being terminated by an empty name. Assume
-that 1024 is plenty long enough for the few names we'll be testing. It is
-easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
-for the actual memory, to ensure alignment. By defining these variables always
-(whether or not 8-bit or 16-bit is supported), we avoid too much mess with
+that 1024 is plenty long enough for the few names we'll be testing. It is
+easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
+for the actual memory, to ensure alignment. By defining these variables always
+(whether or not 8-bit or 16-bit is supported), we avoid too much mess with
#ifdefs in the code. */
pcre_uint16 copynames[1024];
@@ -2561,9 +2583,9 @@ while (!done)
PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
}
- /* Need to know if UTF-8 for printing data strings */
+ /* Need to know if UTF-8 for printing data strings. */
- new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
+ if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
use_utf = (get_options & PCRE_UTF8) != 0;
fclose(f);
@@ -2798,20 +2820,25 @@ while (!done)
#ifdef SUPPORT_PCRE16
if (use_pcre16)
{
- switch(to16(p, options & PCRE_UTF8, (int)strlen((char *)p)))
+ switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
{
- case -1:
+ case -1:
fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
"converted to UTF-16\n");
goto SKIP_DATA;
-
+
case -2:
fprintf(outfile, "**Failed: character value greater than 0x10ffff "
"cannot be converted to UTF-16\n");
goto SKIP_DATA;
-
+
+ case -3: /* "Impossible error" when to16 is called arg1 FALSE */
+ fprintf(outfile, "**Failed: character value greater than 0xffff "
+ "cannot be converted to 16-bit in non-UTF mode\n");
+ goto SKIP_DATA;
+
default:
- break;
+ break;
}
p = (pcre_uint8 *)buffer16;
}
@@ -2867,7 +2894,8 @@ while (!done)
within the regex; check for this so that we know how to process the data
lines. */
- new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
+ if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
+ goto SKIP_DATA;
if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
/* Extract the size for possible writing before possibly flipping it,
@@ -2918,8 +2946,8 @@ while (!done)
if (log_store)
{
size_t jitsize;
- new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
- if (jitsize != 0)
+ if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
+ jitsize != 0)
fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
}
}
@@ -2958,17 +2986,19 @@ while (!done)
int nameentrysize, namecount;
const pcre_uint8 *nametable;
- new_info(re, NULL, PCRE_INFO_SIZE, &size);
- new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
- new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
- new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
- new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
- new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
- new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
- new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
- new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
- new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
- new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
+ if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
+ new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
+ new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
+ new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
+ new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
+ new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
+ new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
+ new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
+ new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
+ new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
+ new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
+ != 0)
+ goto SKIP_DATA;
if (size != regex_gotten_store) fprintf(outfile,
"Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
@@ -3123,39 +3153,41 @@ while (!done)
pcre_uint8 *start_bits = NULL;
int minlength;
- new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
- fprintf(outfile, "Subject length lower bound = %d\n", minlength);
+ if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
+ fprintf(outfile, "Subject length lower bound = %d\n", minlength);
- new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
- if (start_bits == NULL)
- fprintf(outfile, "No set of starting bytes\n");
- else
+ if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
{
- int i;
- int c = 24;
- fprintf(outfile, "Starting byte set: ");
- for (i = 0; i < 256; i++)
+ if (start_bits == NULL)
+ fprintf(outfile, "No set of starting bytes\n");
+ else
{
- if ((start_bits[i/8] & (1<<(i&7))) != 0)
+ int i;
+ int c = 24;
+ fprintf(outfile, "Starting byte set: ");
+ for (i = 0; i < 256; i++)
{
- if (c > 75)
- {
- fprintf(outfile, "\n ");
- c = 2;
- }
- if (PRINTOK(i) && i != ' ')
+ if ((start_bits[i/8] & (1<<(i&7))) != 0)
{
- fprintf(outfile, "%c ", i);
- c += 2;
- }
- else
- {
- fprintf(outfile, "\\x%02x ", i);
- c += 5;
+ if (c > 75)
+ {
+ fprintf(outfile, "\n ");
+ c = 2;
+ }
+ if (PRINTOK(i) && i != ' ')
+ {
+ fprintf(outfile, "%c ", i);
+ c += 2;
+ }
+ else
+ {
+ fprintf(outfile, "\\x%02x ", i);
+ c += 5;
+ }
}
}
+ fprintf(outfile, "\n");
}
- fprintf(outfile, "\n");
}
}
@@ -3164,15 +3196,17 @@ while (!done)
if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
{
int jit;
- new_info(re, extra, PCRE_INFO_JIT, &jit);
- if (jit)
- fprintf(outfile, "JIT study was successful\n");
- else
+ if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
+ {
+ if (jit)
+ fprintf(outfile, "JIT study was successful\n");
+ else
#ifdef SUPPORT_JIT
- fprintf(outfile, "JIT study was not successful\n");
+ fprintf(outfile, "JIT study was not successful\n");
#else
- fprintf(outfile, "JIT support is not available in this version of PCRE\n");
+ fprintf(outfile, "JIT support is not available in this version of PCRE\n");
#endif
+ }
}
}
}
@@ -3265,7 +3299,7 @@ while (!done)
*copynames = 0;
*getnames = 0;
-
+
cn16ptr = copynames;
gn16ptr = getnames;
cn8ptr = copynames8;
@@ -3314,8 +3348,24 @@ while (!done)
{
int i = 0;
int n = 0;
-
- if (c == '\\') switch ((c = *p++))
+
+ /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
+ In non-UTF mode, allow the value of the byte to fall through to later,
+ where values greater than 127 are turned into UTF-8 when running in
+ 16-bit mode. */
+
+ if (c != '\\')
+ {
+ if (use_utf)
+ {
+ *q++ = c;
+ continue;
+ }
+ }
+
+ /* Handle backslash escapes */
+
+ else switch ((c = *p++))
{
case 'a': c = 7; break;
case 'b': c = '\b'; break;
@@ -3331,24 +3381,9 @@ while (!done)
c -= '0';
while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
c = c * 8 + *p++ - '0';
-
-#if !defined NOUTF
- if (use_utf && c > 255)
- {
- pcre_uint8 buff8[8];
- int ii, utn;
- utn = ord2utf8(c, buff8);
- for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
- c = buff8[ii]; /* Last byte */
- }
-#endif
break;
case 'x':
-
- /* Handle \x{..} specially - new Perl thing for utf8 */
-
-#if !defined NOUTF
if (*p == '{')
{
pcre_uint8 *pt = p;
@@ -3363,39 +3398,17 @@ while (!done)
c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
if (*pt == '}')
{
- pcre_uint8 buff8[8];
- int ii, utn;
- if (use_utf)
- {
- utn = ord2utf8(c, buff8);
- for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
- c = buff8[ii]; /* Last byte */
- }
- else
- {
- if (c > 255)
- {
- if (use_pcre16)
- fprintf(outfile, "** Character \\x{%x} is greater than 255.\n"
- "** Because its input is first processed as 8-bit, pcretest "
- "does not\n** support such characters in 16-bit mode when "
- "UTF-16 is not set.\n", c);
- else
- fprintf(outfile, "** Character \\x{%x} is greater than 255 "
- "and UTF-8 mode is not enabled.\n", c);
-
- fprintf(outfile, "** Truncation will probably give the wrong "
- "result.\n");
- }
- }
p = pt + 1;
break;
}
- /* Not correct form; fall through */
+ /* Not correct form for \x{...}; fall through */
}
-#endif
- /* Ordinary \x */
+ /* \x without {} always defines just one byte in 8-bit mode. This
+ allows UTF-8 characters to be constructed byte by byte, and also allows
+ invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
+ Otherwise, pass it down to later code so that it can be turned into
+ UTF-8 when running in 16-bit mode. */
c = 0;
while (i++ < 2 && isxdigit(*p))
@@ -3403,6 +3416,11 @@ while (!done)
c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
p++;
}
+ if (use_utf)
+ {
+ *q++ = c;
+ continue;
+ }
break;
case 0: /* \ followed by EOF allows for an empty line */
@@ -3606,8 +3624,36 @@ while (!done)
}
continue;
}
- *q++ = c;
+
+ /* We now have a character value in c that may be greater than 255. In
+ 16-bit mode, we always convert characters to UTF-8 so that values greater
+ than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
+ convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
+ mode must have come from \x{...} or octal constructs because values from
+ \x.. get this far only in non-UTF mode. */
+
+ if (use_pcre16 || use_utf)
+ {
+ pcre_uint8 buff8[8];
+ int ii, utn;
+ utn = ord2utf8(c, buff8);
+ for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
+ }
+ else
+ {
+ if (c > 255)
+ {
+ fprintf(outfile, "** Character \\x{%x} is greater than 255 "
+ "and UTF-8 mode is not enabled.\n", c);
+ fprintf(outfile, "** Truncation will probably give the wrong "
+ "result.\n");
+ }
+ *q++ = c;
+ }
}
+
+ /* Reached end of subject string */
+
*q = 0;
len = (int)(q - dbuffer);
@@ -3693,21 +3739,26 @@ while (!done)
#ifdef SUPPORT_PCRE16
if (use_pcre16)
{
- len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
+ len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
switch(len)
{
- case -1:
+ case -1:
fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
"converted to UTF-16\n");
goto NEXT_DATA;
-
+
case -2:
fprintf(outfile, "**Failed: character value greater than 0x10ffff "
"cannot be converted to UTF-16\n");
goto NEXT_DATA;
-
+
+ case -3:
+ fprintf(outfile, "**Failed: character value greater than 0xffff "
+ "cannot be converted to 16-bit in non-UTF mode\n");
+ goto NEXT_DATA;
+
default:
- break;
+ break;
}
bptr = (pcre_uint8 *)buffer16;
}
@@ -3825,7 +3876,7 @@ while (!done)
if (count >= 0)
{
int i, maxcount;
- void *cnptr, *gnptr;
+ void *cnptr, *gnptr;
#if !defined NODFA
if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
@@ -3852,7 +3903,8 @@ while (!done)
if (do_allcaps)
{
- new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
+ if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
+ goto SKIP_DATA;
count++; /* Allow for full match */
if (count * 2 > use_size_offsets) count = use_size_offsets/2;
}
@@ -3917,7 +3969,7 @@ while (!done)
for (;;)
{
int rc;
- char copybuffer[256];
+ char copybuffer[256];
if (use_pcre16)
{
@@ -4000,7 +4052,7 @@ while (!done)
PCRE_FREE_SUBSTRING(substring);
putc('\n', outfile);
}
-
+
gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
}
@@ -4142,11 +4194,11 @@ while (!done)
use_offsets[1]);
fprintf(outfile, "\n");
break;
-
+
case PCRE_ERROR_BADUTF8_OFFSET:
fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
use_pcre16? "16" : "8");
- break;
+ break;
default:
if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
diff --git a/testdata/saved16 b/testdata/saved16
new file mode 100644
index 0000000..ff5b11d
--- /dev/null
+++ b/testdata/saved16
Binary files differ
diff --git a/testdata/saved8 b/testdata/saved8
new file mode 100644
index 0000000..9b63b1d
--- /dev/null
+++ b/testdata/saved8
Binary files differ
diff --git a/testdata/testinput14 b/testdata/testinput14
index b672996..3789e5e 100644
--- a/testdata/testinput14
+++ b/testdata/testinput14
@@ -283,6 +283,8 @@
\) )* # optional trailing comment
/xSI
+<testdata/saved16
+
/\h/SI
/\v/SI
diff --git a/testdata/testinput17 b/testdata/testinput17
index 38dc556..bdebe18 100644
--- a/testdata/testinput17
+++ b/testdata/testinput17
@@ -213,6 +213,8 @@
\) )* # optional trailing comment
/xSI
+<testdata/saved8
+
/\h/SI
/\v/SI
diff --git a/testdata/testoutput14 b/testdata/testoutput14
index 171bd17..ff9a404 100644
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@@ -355,6 +355,12 @@ Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
+<testdata/saved16
+Compiled pattern loaded from testdata/saved16
+No study data
+Error -28 from pcre_fullinfo(0)
+Running in 8-bit mode but pattern was compiled in 16-bit mode
+
/\h/SI
Capturing subpattern count = 0
No options
diff --git a/testdata/testoutput17 b/testdata/testoutput17
index 4bb9986..0cc7b14 100644
--- a/testdata/testoutput17
+++ b/testdata/testoutput17
@@ -12,11 +12,7 @@
/\x{ffff}/
A\x{ffff}B
-** Character \x{ffff} is greater than 255.
-** Because its input is first processed as 8-bit, pcretest does not
-** support such characters in 16-bit mode when UTF-16 is not set.
-** Truncation will probably give the wrong result.
-No match
+ 0: \x{ffff}
/\x{10000}/
Failed: character value in \x{...} sequence is too large at offset 8
@@ -244,6 +240,12 @@ Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
+<testdata/saved8
+Compiled pattern loaded from testdata/saved8
+No study data
+Error -28 from pcre16_fullinfo(0)
+Running in 16-bit mode but pattern was compiled in 8-bit mode
+
/\h/SI
Capturing subpattern count = 0
No options