diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-27 10:51:09 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-27 10:51:09 +0000 |
commit | 7a7abf9d520b43be4b6802c6d914f689156a735f (patch) | |
tree | 8b74e61a6408404beb94d826e5cc635e6a553046 | |
parent | f66b79f11b7947f4d36cf78abbdaa0451e5f7bc2 (diff) | |
download | pcre-7a7abf9d520b43be4b6802c6d914f689156a735f.tar.gz |
Update pcretest for non-UTF 16-bit wide values, for testing table-lookups
better. Add tests for bad mode.
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@827 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | pcre_dfa_exec.c | 2 | ||||
-rw-r--r-- | pcre_fullinfo.c | 4 | ||||
-rw-r--r-- | pcretest.c | 348 | ||||
-rw-r--r-- | testdata/saved16 | bin | 0 -> 70 bytes | |||
-rw-r--r-- | testdata/saved8 | bin | 0 -> 61 bytes | |||
-rw-r--r-- | testdata/testinput14 | 2 | ||||
-rw-r--r-- | testdata/testinput17 | 2 | ||||
-rw-r--r-- | testdata/testoutput14 | 6 | ||||
-rw-r--r-- | testdata/testoutput17 | 12 |
9 files changed, 221 insertions, 155 deletions
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 8c46bf4..663865b 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -2222,7 +2222,7 @@ for (;;) } else #endif /* SUPPORT_UTF */ - otherd = fcc[d]; + otherd = TABLE_GET(d, fcc, d); } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c index f0f6b21..b10189c 100644 --- a/pcre_fullinfo.c +++ b/pcre_fullinfo.c @@ -91,6 +91,9 @@ means that the pattern is likely compiled with different endianness. */ if (re->magic_number != MAGIC_NUMBER) return re->magic_number == REVERSED_MAGIC_NUMBER? PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; + +/* Check that this pattern was compiled in the correct bit mode */ + if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; switch (what) @@ -117,7 +120,6 @@ switch (what) #else *((size_t *)where) = 0; #endif - break; case PCRE_INFO_CAPTURECOUNT: @@ -36,15 +36,15 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -/* This program now supports the testing of both the 8-bit and 16-bit PCRE -libraries in a single program. This is different from the modules such as -pcre_compile.c in the library itself, which are compiled separately for each -mode. If both modes are enabled, for example, pcre_compile.c is compiled twice -(the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is -compiled only once. Therefore, it must not make use of any of the macros from -pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does, -however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls -only supported library functions. */ +/* This program now supports the testing of both the 8-bit and 16-bit PCRE +libraries in a single program. This is different from the modules such as +pcre_compile.c in the library itself, which are compiled separately for each +mode. If both modes are enabled, for example, pcre_compile.c is compiled twice +(the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is +compiled only once. Therefore, it must not make use of any of the macros from +pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does, +however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls +only supported library functions. */ #ifdef HAVE_CONFIG_H @@ -185,12 +185,12 @@ only from one place and is handled differently). I couldn't dream up any way of using a single macro to do this in a generic way, because of the many different argument requirements. We know that at least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 must be set. First define macros for each individual mode; then -use these in the definitions of generic macros. +use these in the definitions of generic macros. -**** Special note about the PCHARSxxx macros: the address of the string to be +**** Special note about the PCHARSxxx macros: the address of the string to be printed is always given as two arguments: a base address followed by an offset. The base address is cast to the correct data size for 8 or 16 bit data; the -offset is in units of this size. If the string were given as base+offset in one +offset is in units of this size. If the string were given as base+offset in one argument, the casting might be incorrectly applied. */ #ifdef SUPPORT_PCRE8 @@ -343,7 +343,7 @@ argument, the casting might be incorrectly applied. */ #endif /* SUPPORT_PCRE16 */ -/* ----- Both modes are supported; a runtime test is needed, except for +/* ----- Both modes are supported; a runtime test is needed, except for pcre_config(), and the JIT stack functions, when it doesn't matter which version is called. ----- */ @@ -362,12 +362,12 @@ version is called. ----- */ PCHARSV16(p, offset, len, f); \ else \ PCHARSV8(p, offset, len, f) - + #define READ_CAPTURE_NAME(p, cn8, cn16, re) \ if (use_pcre16) \ READ_CAPTURE_NAME16(p, cn8, cn16, re); \ else \ - READ_CAPTURE_NAME8(p, cn8, cn16, re) + READ_CAPTURE_NAME8(p, cn8, cn16, re) #define SET_PCRE_CALLOUT(callout) \ if (use_pcre16) \ @@ -384,8 +384,8 @@ version is called. ----- */ PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \ else \ PCRE_COMPILE8(re, pat, options, error, erroffset, tables) - -#define PCRE_CONFIG pcre_config + +#define PCRE_CONFIG pcre_config #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ namesptr, cbuffer, size) \ @@ -500,7 +500,7 @@ version is called. ----- */ #define STRLEN STRLEN8 #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack #define PCRE_COMPILE PCRE_COMPILE8 -#define PCRE_CONFIG pcre_config +#define PCRE_CONFIG pcre_config #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8 @@ -530,7 +530,7 @@ version is called. ----- */ #define STRLEN STRLEN16 #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack #define PCRE_COMPILE PCRE_COMPILE16 -#define PCRE_CONFIG pcre16_config +#define PCRE_CONFIG pcre16_config #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16 @@ -666,7 +666,7 @@ static const char *errtexts[] = { NULL, /* SHORTUTF8/16 is handled specially */ "nested recursion at the same subject position", "JIT stack limit reached", - "pattern compiled in wrong mode (8-bit/16-bit error)" + "pattern compiled in wrong mode: 8-bit/16-bit error" }; @@ -1133,22 +1133,27 @@ double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The result is always left in buffer16. -Note that this function does not object to surrogate values. This is -deliberate; it makes it possible to construct UTF-16 strings that are invalid, +Note that this function does not object to surrogate values. This is +deliberate; it makes it possible to construct UTF-16 strings that are invalid, for the purpose of testing that they are correctly faulted. +Patterns to be converted are either plain ASCII or UTF-8; data lines are always +in UTF-8 so that values greater than 255 can be handled. + Arguments: + data TRUE if converting a data line; FALSE for a regex p points to a byte string utf true if UTF-8 (to be converted to UTF-16) len number of bytes in the string (excluding trailing zero) Returns: number of 16-bit data items used (excluding trailing zero) OR -1 if a UTF-8 string is malformed - OR -2 if a value > 0x10ffff is encountered + OR -2 if a value > 0x10ffff is encountered + OR -3 if a value > 0xffff is encountered when not in UTF mode */ static int -to16(pcre_uint8 *p, int utf, int len) +to16(int data, pcre_uint8 *p, int utf, int len) { pcre_uint16 *pp; @@ -1166,12 +1171,11 @@ if (buffer16_size < 2*len + 2) pp = buffer16; -if (!utf) +if (!utf && !data) { while (len-- > 0) *pp++ = *p++; } -#ifdef SUPPORT_UTF else { int c = 0; @@ -1184,13 +1188,13 @@ else len -= chlen; if (c < 0x10000) *pp++ = c; else { + if (!utf) return -3; c -= 0x10000; *pp++ = 0xD800 | (c >> 10); *pp++ = 0xDC00 | (c & 0x3ff); } } } -#endif *pp = 0; return pp - buffer16; @@ -1480,8 +1484,8 @@ if (pcre_get_stringnumber(re, (char *)(*pp)) < 0) PCHARSV(*pp, 0, -1, outfile); fprintf(outfile, "\"\n"); } - -*pp = npp; + +*pp = npp; return p; } #endif /* SUPPORT_PCRE8 */ @@ -1508,7 +1512,7 @@ if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0) PCHARSV(*pp, 0, -1, outfile); fprintf(outfile, "\"\n"); } -*pp = npp; +*pp = npp; return p; } #endif /* SUPPORT_PCRE16 */ @@ -1673,9 +1677,19 @@ free(block); /* Get one piece of information from the pcre_fullinfo() function. When only one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct -value, but the code is defensive. */ +value, but the code is defensive. + +Arguments: + re compiled regex + study study data + option PCRE_INFO_xxx option + ptr where to put the data -static void new_info(pcre *re, pcre_extra *study, int option, void *ptr) +Returns: 0 when OK, < 0 on error +*/ + +static int +new_info(pcre *re, pcre_extra *study, int option, void *ptr) { int rc; @@ -1692,8 +1706,16 @@ else rc = PCRE_ERROR_BADMODE; #endif -if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, - use_pcre16? "16" : "", option); +if (rc < 0) + { + fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, + use_pcre16? "16" : "", option); + if (rc == PCRE_ERROR_BADMODE) + fprintf(outfile, "Running in %s-bit mode but pattern was compiled in " + "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16"); + } + +return rc; } @@ -2118,10 +2140,10 @@ pcre_jit_stack *jit_stack = NULL; /* These vectors store, end-to-end, a list of zero-terminated captured substring names, each list itself being terminated by an empty name. Assume -that 1024 is plenty long enough for the few names we'll be testing. It is -easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version -for the actual memory, to ensure alignment. By defining these variables always -(whether or not 8-bit or 16-bit is supported), we avoid too much mess with +that 1024 is plenty long enough for the few names we'll be testing. It is +easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version +for the actual memory, to ensure alignment. By defining these variables always +(whether or not 8-bit or 16-bit is supported), we avoid too much mess with #ifdefs in the code. */ pcre_uint16 copynames[1024]; @@ -2561,9 +2583,9 @@ while (!done) PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL); } - /* Need to know if UTF-8 for printing data strings */ + /* Need to know if UTF-8 for printing data strings. */ - new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options); + if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue; use_utf = (get_options & PCRE_UTF8) != 0; fclose(f); @@ -2798,20 +2820,25 @@ while (!done) #ifdef SUPPORT_PCRE16 if (use_pcre16) { - switch(to16(p, options & PCRE_UTF8, (int)strlen((char *)p))) + switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p))) { - case -1: + case -1: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " "converted to UTF-16\n"); goto SKIP_DATA; - + case -2: fprintf(outfile, "**Failed: character value greater than 0x10ffff " "cannot be converted to UTF-16\n"); goto SKIP_DATA; - + + case -3: /* "Impossible error" when to16 is called arg1 FALSE */ + fprintf(outfile, "**Failed: character value greater than 0xffff " + "cannot be converted to 16-bit in non-UTF mode\n"); + goto SKIP_DATA; + default: - break; + break; } p = (pcre_uint8 *)buffer16; } @@ -2867,7 +2894,8 @@ while (!done) within the regex; check for this so that we know how to process the data lines. */ - new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options); + if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) + goto SKIP_DATA; if ((get_options & PCRE_UTF8) != 0) use_utf = 1; /* Extract the size for possible writing before possibly flipping it, @@ -2918,8 +2946,8 @@ while (!done) if (log_store) { size_t jitsize; - new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize); - if (jitsize != 0) + if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 && + jitsize != 0) fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize); } } @@ -2958,17 +2986,19 @@ while (!done) int nameentrysize, namecount; const pcre_uint8 *nametable; - new_info(re, NULL, PCRE_INFO_SIZE, &size); - new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count); - new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax); - new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char); - new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char); - new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); - new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount); - new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable); - new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial); - new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged); - new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf); + if (new_info(re, NULL, PCRE_INFO_SIZE, &size) + + new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) + + new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) + + new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) + + new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) + + new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) + + new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) + + new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) + + new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) + + new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) + + new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) + != 0) + goto SKIP_DATA; if (size != regex_gotten_store) fprintf(outfile, "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n", @@ -3123,39 +3153,41 @@ while (!done) pcre_uint8 *start_bits = NULL; int minlength; - new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength); - fprintf(outfile, "Subject length lower bound = %d\n", minlength); + if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0) + fprintf(outfile, "Subject length lower bound = %d\n", minlength); - new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits); - if (start_bits == NULL) - fprintf(outfile, "No set of starting bytes\n"); - else + if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0) { - int i; - int c = 24; - fprintf(outfile, "Starting byte set: "); - for (i = 0; i < 256; i++) + if (start_bits == NULL) + fprintf(outfile, "No set of starting bytes\n"); + else { - if ((start_bits[i/8] & (1<<(i&7))) != 0) + int i; + int c = 24; + fprintf(outfile, "Starting byte set: "); + for (i = 0; i < 256; i++) { - if (c > 75) - { - fprintf(outfile, "\n "); - c = 2; - } - if (PRINTOK(i) && i != ' ') + if ((start_bits[i/8] & (1<<(i&7))) != 0) { - fprintf(outfile, "%c ", i); - c += 2; - } - else - { - fprintf(outfile, "\\x%02x ", i); - c += 5; + if (c > 75) + { + fprintf(outfile, "\n "); + c = 2; + } + if (PRINTOK(i) && i != ' ') + { + fprintf(outfile, "%c ", i); + c += 2; + } + else + { + fprintf(outfile, "\\x%02x ", i); + c += 5; + } } } + fprintf(outfile, "\n"); } - fprintf(outfile, "\n"); } } @@ -3164,15 +3196,17 @@ while (!done) if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0) { int jit; - new_info(re, extra, PCRE_INFO_JIT, &jit); - if (jit) - fprintf(outfile, "JIT study was successful\n"); - else + if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0) + { + if (jit) + fprintf(outfile, "JIT study was successful\n"); + else #ifdef SUPPORT_JIT - fprintf(outfile, "JIT study was not successful\n"); + fprintf(outfile, "JIT study was not successful\n"); #else - fprintf(outfile, "JIT support is not available in this version of PCRE\n"); + fprintf(outfile, "JIT support is not available in this version of PCRE\n"); #endif + } } } } @@ -3265,7 +3299,7 @@ while (!done) *copynames = 0; *getnames = 0; - + cn16ptr = copynames; gn16ptr = getnames; cn8ptr = copynames8; @@ -3314,8 +3348,24 @@ while (!done) { int i = 0; int n = 0; - - if (c == '\\') switch ((c = *p++)) + + /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes. + In non-UTF mode, allow the value of the byte to fall through to later, + where values greater than 127 are turned into UTF-8 when running in + 16-bit mode. */ + + if (c != '\\') + { + if (use_utf) + { + *q++ = c; + continue; + } + } + + /* Handle backslash escapes */ + + else switch ((c = *p++)) { case 'a': c = 7; break; case 'b': c = '\b'; break; @@ -3331,24 +3381,9 @@ while (!done) c -= '0'; while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') c = c * 8 + *p++ - '0'; - -#if !defined NOUTF - if (use_utf && c > 255) - { - pcre_uint8 buff8[8]; - int ii, utn; - utn = ord2utf8(c, buff8); - for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii]; - c = buff8[ii]; /* Last byte */ - } -#endif break; case 'x': - - /* Handle \x{..} specially - new Perl thing for utf8 */ - -#if !defined NOUTF if (*p == '{') { pcre_uint8 *pt = p; @@ -3363,39 +3398,17 @@ while (!done) c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10); if (*pt == '}') { - pcre_uint8 buff8[8]; - int ii, utn; - if (use_utf) - { - utn = ord2utf8(c, buff8); - for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii]; - c = buff8[ii]; /* Last byte */ - } - else - { - if (c > 255) - { - if (use_pcre16) - fprintf(outfile, "** Character \\x{%x} is greater than 255.\n" - "** Because its input is first processed as 8-bit, pcretest " - "does not\n** support such characters in 16-bit mode when " - "UTF-16 is not set.\n", c); - else - fprintf(outfile, "** Character \\x{%x} is greater than 255 " - "and UTF-8 mode is not enabled.\n", c); - - fprintf(outfile, "** Truncation will probably give the wrong " - "result.\n"); - } - } p = pt + 1; break; } - /* Not correct form; fall through */ + /* Not correct form for \x{...}; fall through */ } -#endif - /* Ordinary \x */ + /* \x without {} always defines just one byte in 8-bit mode. This + allows UTF-8 characters to be constructed byte by byte, and also allows + invalid UTF-8 sequences to be made. Just copy the byte in UTF mode. + Otherwise, pass it down to later code so that it can be turned into + UTF-8 when running in 16-bit mode. */ c = 0; while (i++ < 2 && isxdigit(*p)) @@ -3403,6 +3416,11 @@ while (!done) c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10); p++; } + if (use_utf) + { + *q++ = c; + continue; + } break; case 0: /* \ followed by EOF allows for an empty line */ @@ -3606,8 +3624,36 @@ while (!done) } continue; } - *q++ = c; + + /* We now have a character value in c that may be greater than 255. In + 16-bit mode, we always convert characters to UTF-8 so that values greater + than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we + convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF + mode must have come from \x{...} or octal constructs because values from + \x.. get this far only in non-UTF mode. */ + + if (use_pcre16 || use_utf) + { + pcre_uint8 buff8[8]; + int ii, utn; + utn = ord2utf8(c, buff8); + for (ii = 0; ii < utn; ii++) *q++ = buff8[ii]; + } + else + { + if (c > 255) + { + fprintf(outfile, "** Character \\x{%x} is greater than 255 " + "and UTF-8 mode is not enabled.\n", c); + fprintf(outfile, "** Truncation will probably give the wrong " + "result.\n"); + } + *q++ = c; + } } + + /* Reached end of subject string */ + *q = 0; len = (int)(q - dbuffer); @@ -3693,21 +3739,26 @@ while (!done) #ifdef SUPPORT_PCRE16 if (use_pcre16) { - len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len); + len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len); switch(len) { - case -1: + case -1: fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " "converted to UTF-16\n"); goto NEXT_DATA; - + case -2: fprintf(outfile, "**Failed: character value greater than 0x10ffff " "cannot be converted to UTF-16\n"); goto NEXT_DATA; - + + case -3: + fprintf(outfile, "**Failed: character value greater than 0xffff " + "cannot be converted to 16-bit in non-UTF mode\n"); + goto NEXT_DATA; + default: - break; + break; } bptr = (pcre_uint8 *)buffer16; } @@ -3825,7 +3876,7 @@ while (!done) if (count >= 0) { int i, maxcount; - void *cnptr, *gnptr; + void *cnptr, *gnptr; #if !defined NODFA if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else @@ -3852,7 +3903,8 @@ while (!done) if (do_allcaps) { - new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count); + if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0) + goto SKIP_DATA; count++; /* Allow for full match */ if (count * 2 > use_size_offsets) count = use_size_offsets/2; } @@ -3917,7 +3969,7 @@ while (!done) for (;;) { int rc; - char copybuffer[256]; + char copybuffer[256]; if (use_pcre16) { @@ -4000,7 +4052,7 @@ while (!done) PCRE_FREE_SUBSTRING(substring); putc('\n', outfile); } - + gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE; } @@ -4142,11 +4194,11 @@ while (!done) use_offsets[1]); fprintf(outfile, "\n"); break; - + case PCRE_ERROR_BADUTF8_OFFSET: fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count, use_pcre16? "16" : "8"); - break; + break; default: if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *)) diff --git a/testdata/saved16 b/testdata/saved16 Binary files differnew file mode 100644 index 0000000..ff5b11d --- /dev/null +++ b/testdata/saved16 diff --git a/testdata/saved8 b/testdata/saved8 Binary files differnew file mode 100644 index 0000000..9b63b1d --- /dev/null +++ b/testdata/saved8 diff --git a/testdata/testinput14 b/testdata/testinput14 index b672996..3789e5e 100644 --- a/testdata/testinput14 +++ b/testdata/testinput14 @@ -283,6 +283,8 @@ \) )* # optional trailing comment /xSI +<testdata/saved16 + /\h/SI /\v/SI diff --git a/testdata/testinput17 b/testdata/testinput17 index 38dc556..bdebe18 100644 --- a/testdata/testinput17 +++ b/testdata/testinput17 @@ -213,6 +213,8 @@ \) )* # optional trailing comment /xSI +<testdata/saved8 + /\h/SI /\v/SI diff --git a/testdata/testoutput14 b/testdata/testoutput14 index 171bd17..ff9a404 100644 --- a/testdata/testoutput14 +++ b/testdata/testoutput14 @@ -355,6 +355,12 @@ Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f +<testdata/saved16 +Compiled pattern loaded from testdata/saved16 +No study data +Error -28 from pcre_fullinfo(0) +Running in 8-bit mode but pattern was compiled in 16-bit mode + /\h/SI Capturing subpattern count = 0 No options diff --git a/testdata/testoutput17 b/testdata/testoutput17 index 4bb9986..0cc7b14 100644 --- a/testdata/testoutput17 +++ b/testdata/testoutput17 @@ -12,11 +12,7 @@ /\x{ffff}/ A\x{ffff}B -** Character \x{ffff} is greater than 255. -** Because its input is first processed as 8-bit, pcretest does not -** support such characters in 16-bit mode when UTF-16 is not set. -** Truncation will probably give the wrong result. -No match + 0: \x{ffff} /\x{10000}/ Failed: character value in \x{...} sequence is too large at offset 8 @@ -244,6 +240,12 @@ Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff +<testdata/saved8 +Compiled pattern loaded from testdata/saved8 +No study data +Error -28 from pcre16_fullinfo(0) +Running in 16-bit mode but pattern was compiled in 8-bit mode + /\h/SI Capturing subpattern count = 0 No options |