diff options
Diffstat (limited to 'ext/pcre/pcrelib/pcretest.c')
-rw-r--r-- | ext/pcre/pcrelib/pcretest.c | 382 |
1 files changed, 314 insertions, 68 deletions
diff --git a/ext/pcre/pcrelib/pcretest.c b/ext/pcre/pcrelib/pcretest.c index f5c6cdd2e1..431378c6dc 100644 --- a/ext/pcre/pcrelib/pcretest.c +++ b/ext/pcre/pcrelib/pcretest.c @@ -44,6 +44,10 @@ POSSIBILITY OF SUCH DAMAGE. #include <locale.h> #include <errno.h> +#ifndef _WIN32 +#include <sys/resource.h> +#endif + #define PCRE_SPY /* For Win32 build, import data, not export */ /* We include pcre_internal.h because we need the internal info for displaying @@ -101,11 +105,6 @@ function (define NOINFOCHECK). */ #define LOOPREPEAT 500000 -#define BUFFER_SIZE 30000 -#define PBUFFER_SIZE BUFFER_SIZE -#define DBUFFER_SIZE BUFFER_SIZE - - /* Static variables */ static FILE *outfile; @@ -119,11 +118,96 @@ static int show_malloc; static int use_utf8; static size_t gotten_store; +/* The buffers grow automatically if very long input lines are encountered. */ + +static int buffer_size = 50000; +static uschar *buffer = NULL; +static uschar *dbuffer = NULL; static uschar *pbuffer = NULL; /************************************************* +* Read or extend an input line * +*************************************************/ + +/* Input lines are read into buffer, but both patterns and data lines can be +continued over multiple input lines. In addition, if the buffer fills up, we +want to automatically expand it so as to be able to handle extremely large +lines that are needed for certain stress tests. When the input buffer is +expanded, the other two buffers must also be expanded likewise, and the +contents of pbuffer, which are a copy of the input for callouts, must be +preserved (for when expansion happens for a data line). This is not the most +optimal way of handling this, but hey, this is just a test program! + +Arguments: + f the file to read + start where in buffer to start (this *must* be within buffer) + +Returns: pointer to the start of new data + could be a copy of start, or could be moved + NULL if no data read and EOF reached +*/ + +static uschar * +extend_inputline(FILE *f, uschar *start) +{ +uschar *here = start; + +for (;;) + { + int rlen = buffer_size - (here - buffer); + if (rlen > 1000) + { + int dlen; + if (fgets((char *)here, rlen, f) == NULL) + return (here == start)? NULL : start; + dlen = (int)strlen((char *)here); + if (dlen > 0 && here[dlen - 1] == '\n') return start; + here += dlen; + } + + else + { + int new_buffer_size = 2*buffer_size; + uschar *new_buffer = (unsigned char *)malloc(new_buffer_size); + uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size); + uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size); + + if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL) + { + fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size); + exit(1); + } + + memcpy(new_buffer, buffer, buffer_size); + memcpy(new_pbuffer, pbuffer, buffer_size); + + buffer_size = new_buffer_size; + + start = new_buffer + (start - buffer); + here = new_buffer + (here - buffer); + + free(buffer); + free(dbuffer); + free(pbuffer); + + buffer = new_buffer; + dbuffer = new_dbuffer; + pbuffer = new_pbuffer; + } + } + +return NULL; /* Control never gets here */ +} + + + + + + + +/************************************************* * Read number from string * *************************************************/ @@ -159,19 +243,19 @@ return(result); and returns the value of the character. Argument: - buffer a pointer to the byte vector - vptr a pointer to an int to receive the value + utf8bytes a pointer to the byte vector + vptr a pointer to an int to receive the value -Returns: > 0 => the number of bytes consumed - -6 to 0 => malformed UTF-8 character at offset = (-return) +Returns: > 0 => the number of bytes consumed + -6 to 0 => malformed UTF-8 character at offset = (-return) */ #if !defined NOUTF8 static int -utf82ord(unsigned char *buffer, int *vptr) +utf82ord(unsigned char *utf8bytes, int *vptr) { -int c = *buffer++; +int c = *utf8bytes++; int d = c; int i, j, s; @@ -191,7 +275,7 @@ d = (c & utf8_table3[i]) << s; for (j = 0; j < i; j++) { - c = *buffer++; + c = *utf8bytes++; if ((c & 0xc0) != 0x80) return -(j+1); s -= 6; d |= (c & 0x3f) << s; @@ -222,24 +306,24 @@ and encodes it as a UTF-8 character in 0 to 6 bytes. Arguments: cvalue the character value - buffer pointer to buffer for result - at least 6 bytes long + utf8bytes pointer to buffer for result - at least 6 bytes long Returns: number of characters placed in the buffer */ static int -ord2utf8(int cvalue, uschar *buffer) +ord2utf8(int cvalue, uschar *utf8bytes) { register int i, j; for (i = 0; i < utf8_table1_size; i++) if (cvalue <= utf8_table1[i]) break; -buffer += i; +utf8bytes += i; for (j = i; j > 0; j--) { - *buffer-- = 0x80 | (cvalue & 0x3f); + *utf8bytes-- = 0x80 | (cvalue & 0x3f); cvalue >>= 6; } -*buffer = utf8_table2[i] | cvalue; +*utf8bytes = utf8_table2[i] | cvalue; return i + 1; } @@ -461,8 +545,8 @@ if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0) * Byte flipping function * *************************************************/ -static long int -byteflip(long int value, int n) +static unsigned long int +byteflip(unsigned long int value, int n) { if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8); return ((value & 0x000000ff) << 24) | @@ -526,6 +610,32 @@ return count; /************************************************* +* Check newline indicator * +*************************************************/ + +/* This is used both at compile and run-time to check for <xxx> escapes, where +xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match. + +Arguments: + p points after the leading '<' + f file for error message + +Returns: appropriate PCRE_NEWLINE_xxx flags, or 0 +*/ + +static int +check_newline(uschar *p, FILE *f) +{ +if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR; +if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF; +if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF; +fprintf(f, "Unknown newline type at: <%s\n", p); +return 0; +} + + + +/************************************************* * Main Program * *************************************************/ @@ -553,16 +663,23 @@ int debug = 0; int done = 0; int all_use_dfa = 0; int yield = 0; +int stack_size; -unsigned char *buffer; -unsigned char *dbuffer; +/* These vectors store, end-to-end, a list of captured substring names. Assume +that 1024 is plenty long enough for the few names we'll be testing. */ + +uschar copynames[1024]; +uschar getnames[1024]; + +uschar *copynamesptr; +uschar *getnamesptr; /* Get buffers from malloc() so that Electric Fence will check their misuse -when I am debugging. */ +when I am debugging. They grow automatically when very long lines are read. */ -buffer = (unsigned char *)malloc(BUFFER_SIZE); -dbuffer = (unsigned char *)malloc(DBUFFER_SIZE); -pbuffer = (unsigned char *)malloc(PBUFFER_SIZE); +buffer = (unsigned char *)malloc(buffer_size); +dbuffer = (unsigned char *)malloc(buffer_size); +pbuffer = (unsigned char *)malloc(buffer_size); /* The outfile variable is static so that new_malloc can use it. The _setmode() stuff is some magic that I don't understand, but which apparently does good @@ -596,6 +713,28 @@ while (argc > 1 && argv[op][0] == '-') op++; argc--; } + else if (strcmp(argv[op], "-S") == 0 && argc > 2 && + ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)), + *endptr == 0)) + { +#ifdef _WIN32 + printf("PCRE: -S not supported on this OS\n"); + exit(1); +#else + int rc; + struct rlimit rlim; + getrlimit(RLIMIT_STACK, &rlim); + rlim.rlim_cur = stack_size * 1024 * 1024; + rc = setrlimit(RLIMIT_STACK, &rlim); + if (rc != 0) + { + printf("PCRE: setrlimit() failed with error %d\n", rc); + exit(1); + } + op++; + argc--; +#endif + } #if !defined NOPOSIX else if (strcmp(argv[op], "-p") == 0) posix = 1; #endif @@ -609,7 +748,8 @@ while (argc > 1 && argv[op][0] == '-') (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); printf(" %sUnicode properties support\n", rc? "" : "No "); (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc); - printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF"); + printf(" Newline sequence is %s\n", (rc == '\r')? "CR" : + (rc == '\n')? "LF" : "CRLF"); (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc); printf(" Internal link size = %d\n", rc); (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc); @@ -625,7 +765,7 @@ while (argc > 1 && argv[op][0] == '-') else { printf("** Unknown or malformed option %s\n", argv[op]); - printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n"); + printf("Usage: pcretest [options] [<input> [<output>]]\n"); printf(" -C show PCRE compile-time options and exit\n"); printf(" -d debug: show compiled code; implies -i\n"); #if !defined NODFA @@ -637,6 +777,7 @@ while (argc > 1 && argv[op][0] == '-') #if !defined NOPOSIX printf(" -p use POSIX interface\n"); #endif + printf(" -S <n> set stack size to <n> megabytes\n"); printf(" -s output store (memory) used information\n" " -t time compilation and execution\n"); yield = 1; @@ -723,7 +864,7 @@ while (!done) use_utf8 = 0; if (infile == stdin) printf(" re> "); - if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break; + if (extend_inputline(infile, buffer) == NULL) break; if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); fflush(outfile); @@ -735,7 +876,7 @@ while (!done) if (*p == '<' && strchr((char *)(p+1), '<') == NULL) { - unsigned long int magic; + unsigned long int magic, get_options; uschar sbuf[8]; FILE *f; @@ -783,8 +924,8 @@ while (!done) /* Need to know if UTF-8 for printing data strings */ - new_info(re, NULL, PCRE_INFO_OPTIONS, &options); - use_utf8 = (options & PCRE_UTF8) != 0; + new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options); + use_utf8 = (get_options & PCRE_UTF8) != 0; /* Now see if there is any following study data */ @@ -838,16 +979,8 @@ while (!done) pp++; } if (*pp != 0) break; - - len = BUFFER_SIZE - (pp - buffer); - if (len < 256) - { - fprintf(outfile, "** Expression too long - missing delimiter?\n"); - goto SKIP_DATA; - } - if (infile == stdin) printf(" > "); - if (fgets((char *)pp, len, infile) == NULL) + if ((pp = extend_inputline(infile, pp)) == NULL) { fprintf(outfile, "** Unexpected EOF\n"); done = 1; @@ -893,6 +1026,7 @@ while (!done) case 'F': do_flip = 1; break; case 'G': do_G = 1; break; case 'I': do_showinfo = 1; break; + case 'J': options |= PCRE_DUPNAMES; break; case 'M': log_store = 1; break; case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; @@ -927,6 +1061,15 @@ while (!done) *pp = 0; break; + case '<': + { + int x = check_newline(pp, outfile); + if (x == 0) goto SKIP_DATA; + options |= x; + while (*pp++ != '>'); + } + break; + case '\r': /* So that it works in Windows */ case '\n': case ' ': @@ -961,7 +1104,7 @@ while (!done) if (rc != 0) { - (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE); + (void)regerror(rc, &preg, (char *)buffer, buffer_size); fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer); goto SKIP_DATA; } @@ -1002,7 +1145,7 @@ while (!done) { for (;;) { - if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) + if (extend_inputline(infile, buffer) == NULL) { done = 1; goto CONTINUE; @@ -1163,13 +1306,13 @@ while (!done) if (do_flip) { all_options = byteflip(all_options, sizeof(all_options)); - } + } if ((all_options & PCRE_NOPARTIAL) != 0) fprintf(outfile, "Partial matching not supported\n"); if (get_options == 0) fprintf(outfile, "No options\n"); - else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n", + else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n", ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "", ((get_options & PCRE_CASELESS) != 0)? " caseless" : "", ((get_options & PCRE_EXTENDED) != 0)? " extended" : "", @@ -1181,14 +1324,30 @@ while (!done) ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "", ((get_options & PCRE_UTF8) != 0)? " utf8" : "", - ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : ""); + ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "", + ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : ""); + + switch (get_options & PCRE_NEWLINE_CRLF) + { + case PCRE_NEWLINE_CR: + fprintf(outfile, "Forced newline sequence: CR\n"); + break; + + case PCRE_NEWLINE_LF: + fprintf(outfile, "Forced newline sequence: LF\n"); + break; - if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0) - fprintf(outfile, "Case state changes\n"); + case PCRE_NEWLINE_CRLF: + fprintf(outfile, "Forced newline sequence: CRLF\n"); + break; + + default: + break; + } if (first_char == -1) { - fprintf(outfile, "First char at start or follows \\n\n"); + fprintf(outfile, "First char at start or follows newline\n"); } else if (first_char < 0) { @@ -1343,6 +1502,12 @@ while (!done) options = 0; + *copynames = 0; + *getnames = 0; + + copynamesptr = copynames; + getnamesptr = getnames; + pcre_callout = callout; first_callout = 1; callout_extra = 0; @@ -1351,15 +1516,24 @@ while (!done) callout_fail_id = -1; show_malloc = 0; - if (infile == stdin) printf("data> "); - if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) + if (extra != NULL) extra->flags &= + ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION); + + len = 0; + for (;;) { - done = 1; - goto CONTINUE; + if (infile == stdin) printf("data> "); + if (extend_inputline(infile, buffer + len) == NULL) + { + if (len > 0) break; + done = 1; + goto CONTINUE; + } + if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); + len = (int)strlen((char *)buffer); + if (buffer[len-1] == '\n') break; } - if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); - len = (int)strlen((char *)buffer); while (len > 0 && isspace(buffer[len-1])) len--; buffer[len] = 0; if (len == 0) break; @@ -1389,6 +1563,17 @@ while (!done) c -= '0'; while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') c = c * 8 + *p++ - '0'; + +#if !defined NOUTF8 + if (use_utf8 && c > 255) + { + unsigned char buff8[8]; + int ii, utn; + utn = ord2utf8(c, buff8); + for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii]; + c = buff8[ii]; /* Last byte */ + } +#endif break; case 'x': @@ -1450,14 +1635,14 @@ while (!done) } else if (isalnum(*p)) { - uschar name[256]; - uschar *npp = name; + uschar *npp = copynamesptr; while (isalnum(*p)) *npp++ = *p++; + *npp++ = 0; *npp = 0; - n = pcre_get_stringnumber(re, (char *)name); + n = pcre_get_stringnumber(re, (char *)copynamesptr); if (n < 0) - fprintf(outfile, "no parentheses with name \"%s\"\n", name); - else copystrings |= 1 << n; + fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr); + copynamesptr = npp; } else if (*p == '+') { @@ -1518,14 +1703,14 @@ while (!done) } else if (isalnum(*p)) { - uschar name[256]; - uschar *npp = name; + uschar *npp = getnamesptr; while (isalnum(*p)) *npp++ = *p++; + *npp++ = 0; *npp = 0; - n = pcre_get_stringnumber(re, (char *)name); + n = pcre_get_stringnumber(re, (char *)getnamesptr); if (n < 0) - fprintf(outfile, "no parentheses with name \"%s\"\n", name); - else getstrings |= 1 << n; + fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr); + getnamesptr = npp; } continue; @@ -1564,6 +1749,28 @@ while (!done) options |= PCRE_PARTIAL; continue; + case 'Q': + while(isdigit(*p)) n = n * 10 + *p++ - '0'; + if (extra == NULL) + { + extra = (pcre_extra *)malloc(sizeof(pcre_extra)); + extra->flags = 0; + } + extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; + extra->match_limit_recursion = n; + continue; + + case 'q': + while(isdigit(*p)) n = n * 10 + *p++ - '0'; + if (extra == NULL) + { + extra = (pcre_extra *)malloc(sizeof(pcre_extra)); + extra->flags = 0; + } + extra->flags |= PCRE_EXTRA_MATCH_LIMIT; + extra->match_limit = n; + continue; + #if !defined NODFA case 'R': options |= PCRE_DFA_RESTART; @@ -1581,6 +1788,15 @@ while (!done) case '?': options |= PCRE_NO_UTF8_CHECK; continue; + + case '<': + { + int x = check_newline(p, outfile); + if (x == 0) goto NEXT_DATA; + options |= x; + while (*p++ != '>'); + } + continue; } *q++ = c; } @@ -1611,7 +1827,7 @@ while (!done) if (rc != 0) { - (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE); + (void)regerror(rc, &preg, (char *)buffer, buffer_size); fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer); } else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE) @@ -1690,7 +1906,7 @@ while (!done) extra->flags = 0; } - count = check_match_limit(re, extra, bptr, len, start_offset, + (void)check_match_limit(re, extra, bptr, len, start_offset, options|g_notempty, use_offsets, use_size_offsets, PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit), PCRE_ERROR_MATCHLIMIT, "match()"); @@ -1778,7 +1994,7 @@ while (!done) { if ((copystrings & (1 << i)) != 0) { - char copybuffer[16]; + char copybuffer[256]; int rc = pcre_copy_substring((char *)bptr, use_offsets, count, i, copybuffer, sizeof(copybuffer)); if (rc < 0) @@ -1788,6 +2004,19 @@ while (!done) } } + for (copynamesptr = copynames; + *copynamesptr != 0; + copynamesptr += (int)strlen((char*)copynamesptr) + 1) + { + char copybuffer[256]; + int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets, + count, (char *)copynamesptr, copybuffer, sizeof(copybuffer)); + if (rc < 0) + fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc); + else + fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr); + } + for (i = 0; i < 32; i++) { if ((getstrings & (1 << i)) != 0) @@ -1800,12 +2029,27 @@ while (!done) else { fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc); - /* free((void *)substring); */ pcre_free_substring(substring); } } } + for (getnamesptr = getnames; + *getnamesptr != 0; + getnamesptr += (int)strlen((char*)getnamesptr) + 1) + { + const char *substring; + int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets, + count, (char *)getnamesptr, &substring); + if (rc < 0) + fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc); + else + { + fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr); + pcre_free_substring(substring); + } + } + if (getlist) { const char **stringlist; @@ -1905,6 +2149,8 @@ while (!done) len -= use_offsets[1]; } } /* End of loop for /g and /G */ + + NEXT_DATA: continue; } /* End of loop for data lines */ CONTINUE: |