summaryrefslogtreecommitdiff
path: root/pcretest.c
diff options
context:
space:
mode:
authornigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:40:45 +0000
committernigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:40:45 +0000
commit97cb05691b9cabed35f1a853c74d48c692aaabcf (patch)
treecb7c68a44f0b79c6d90d9a18a7ec640c8435a5e7 /pcretest.c
parent455fcc7e13a175722acfd2cca6ab99caa9606a22 (diff)
downloadpcre-97cb05691b9cabed35f1a853c74d48c692aaabcf.tar.gz
Load pcre-6.0 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@77 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcretest.c')
-rw-r--r--pcretest.c221
1 files changed, 121 insertions, 100 deletions
diff --git a/pcretest.c b/pcretest.c
index e531cc1..0e9a38f 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -4,7 +4,7 @@
/* This program was hacked up as a tester for PCRE. I really should have
written it more tidily in the first place. Will I ever learn? It has grown and
-been extended and consequently is now rather untidy in places.
+been extended and consequently is now rather, er, *very* untidy in places.
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -44,11 +44,15 @@ POSSIBILITY OF SUCH DAMAGE.
#include <locale.h>
#include <errno.h>
-/* We need the internal info for displaying the results of pcre_study(). Also
-for getting the opcodes for showing compiled code. */
-
#define PCRE_SPY /* For Win32 build, import data, not export */
-#include "internal.h"
+
+/* We need the internal info for displaying the results of pcre_study() and
+other internal data; pcretest also uses some of the fixed tables, and generally
+has "inside information" compared to a program that strictly follows the PCRE
+API. */
+
+#include "pcre_internal.h"
+
/* It is possible to compile this test program without including support for
testing the POSIX interface, though this is not available via the standard
@@ -87,34 +91,6 @@ static size_t gotten_store;
static uschar *pbuffer = NULL;
-static const int utf8_table1[] = {
- 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
-
-static const int utf8_table2[] = {
- 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
-
-static const int utf8_table3[] = {
- 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
-
-
-
-/*************************************************
-* Print compiled regex *
-*************************************************/
-
-/* The code for doing this is held in a separate file that is also included in
-pcre.c when it is compiled with the debug switch. It defines a function called
-print_internals(), which uses a table of opcode lengths defined by the macro
-OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
-Unicode property names to numbers; this is kept in a separate file. */
-
-static uschar OP_lengths[] = { OP_LENGTHS };
-
-#include "ucp.h"
-#include "ucptypetable.c"
-#include "printint.c"
-
-
/*************************************************
* Read number from string *
@@ -143,42 +119,6 @@ return(result);
-/*************************************************
-* Convert character value to UTF-8 *
-*************************************************/
-
-/* This function takes an integer value in the range 0 - 0x7fffffff
-and encodes it as a UTF-8 character in 0 to 6 bytes.
-
-Arguments:
- cvalue the character value
- buffer pointer to buffer for result - at least 6 bytes long
-
-Returns: number of characters placed in the buffer
- -1 if input character is negative
- 0 if input character is positive but too big (only when
- int is longer than 32 bits)
-*/
-
-static int
-ord2utf8(int cvalue, unsigned char *buffer)
-{
-register int i, j;
-for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
- if (cvalue <= utf8_table1[i]) break;
-if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
-if (cvalue < 0) return -1;
-
-buffer += i;
-for (j = i; j > 0; j--)
- {
- *buffer-- = 0x80 | (cvalue & 0x3f);
- cvalue >>= 6;
- }
-*buffer = utf8_table2[i] | cvalue;
-return i + 1;
-}
-
/*************************************************
* Convert UTF-8 string to value *
@@ -214,7 +154,7 @@ if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
/* i now has a value in the range 1-5 */
s = 6*i;
-d = (c & utf8_table3[i]) << s;
+d = (c & _pcre_utf8_table3[i]) << s;
for (j = 0; j < i; j++)
{
@@ -226,8 +166,8 @@ for (j = 0; j < i; j++)
/* Check that encoding was the correct unique one */
-for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
- if (d <= utf8_table1[j]) break;
+for (j = 0; j < _pcre_utf8_table1_size; j++)
+ if (d <= _pcre_utf8_table1[j]) break;
if (j != i) return -(i+1);
/* Valid value */
@@ -403,7 +343,7 @@ static void *new_malloc(size_t size)
void *block = malloc(size);
gotten_store = size;
if (show_malloc)
- fprintf(outfile, "malloc %3d %p\n", size, block);
+ fprintf(outfile, "malloc %3d %p\n", (int)size, block);
return block;
}
@@ -421,7 +361,7 @@ static void *stack_malloc(size_t size)
{
void *block = malloc(size);
if (show_malloc)
- fprintf(outfile, "stack_malloc %3d %p\n", size, block);
+ fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
return block;
}
@@ -484,12 +424,14 @@ int showinfo = 0;
int showstore = 0;
int size_offsets = 45;
int size_offsets_max;
-int *offsets;
+int *offsets = NULL;
#if !defined NOPOSIX
int posix = 0;
#endif
int debug = 0;
int done = 0;
+int all_use_dfa = 0;
+int yield = 0;
unsigned char *buffer;
unsigned char *dbuffer;
@@ -522,6 +464,7 @@ while (argc > 1 && argv[op][0] == '-')
else if (strcmp(argv[op], "-t") == 0) timeit = 1;
else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
+ else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
*endptr == 0))
@@ -558,8 +501,9 @@ while (argc > 1 && argv[op][0] == '-')
printf("** Unknown or malformed option %s\n", argv[op]);
printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
printf(" -C show PCRE compile-time options and exit\n");
- printf(" -d debug: show compiled code; implies -i\n"
- " -i show information about compiled pattern\n"
+ printf(" -d debug: show compiled code; implies -i\n");
+ printf(" -dfa force DFA matching for all subjects\n");
+ printf(" -i show information about compiled pattern\n"
" -m output memory used information\n"
" -o <n> set size of offsets vector to <n>\n");
#if !defined NOPOSIX
@@ -567,7 +511,8 @@ while (argc > 1 && argv[op][0] == '-')
#endif
printf(" -s output store (memory) used information\n"
" -t time compilation and execution\n");
- return 1;
+ yield = 1;
+ goto EXIT;
}
op++;
argc--;
@@ -581,7 +526,8 @@ if (offsets == NULL)
{
printf("** Failed to get %d bytes of memory for offsets vector\n",
size_offsets_max * sizeof(int));
- return 1;
+ yield = 1;
+ goto EXIT;
}
/* Sort out the input and output files */
@@ -592,7 +538,8 @@ if (argc > 1)
if (infile == NULL)
{
printf("** Failed to open %s\n", argv[op]);
- return 1;
+ yield = 1;
+ goto EXIT;
}
}
@@ -602,7 +549,8 @@ if (argc > 2)
if (outfile == NULL)
{
printf("** Failed to open %s\n", argv[op+1]);
- return 1;
+ yield = 1;
+ goto EXIT;
}
}
@@ -802,6 +750,7 @@ while (!done)
{
switch (*pp++)
{
+ case 'f': options |= PCRE_FIRSTLINE; break;
case 'g': do_g = 1; break;
case 'i': options |= PCRE_CASELESS; break;
case 'm': options |= PCRE_MULTILINE; break;
@@ -831,7 +780,8 @@ while (!done)
case 'L':
ppp = pp;
- while (*ppp != '\n' && *ppp != ' ') ppp++;
+ /* The '\r' test here is so that it works on Windows */
+ while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
*ppp = 0;
if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
{
@@ -849,7 +799,10 @@ while (!done)
*pp = 0;
break;
- case '\n': case ' ': break;
+ case '\r': /* So that it works in Windows */
+ case '\n':
+ case ' ':
+ break;
default:
fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
@@ -869,6 +822,7 @@ while (!done)
if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
+ if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
rc = regcomp(&preg, (char *)p, cflags);
/* Compilation failed; go back for another re, skipping to blank line
@@ -1016,7 +970,7 @@ while (!done)
if (do_debug)
{
fprintf(outfile, "------------------------------------------------------------------\n");
- print_internals(re, outfile);
+ _pcre_printint(re, outfile);
}
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
@@ -1049,7 +1003,7 @@ while (!done)
if (size != regex_gotten_store) fprintf(outfile,
"Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
- size, regex_gotten_store);
+ (int)size, (int)regex_gotten_store);
fprintf(outfile, "Capturing subpattern count = %d\n", count);
if (backrefmax > 0)
@@ -1080,11 +1034,12 @@ while (!done)
fprintf(outfile, "Partial matching not supported\n");
if (get_options == 0) fprintf(outfile, "No options\n");
- else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
+ else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
+ ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
((get_options & PCRE_EXTRA) != 0)? " extra" : "",
@@ -1222,6 +1177,10 @@ while (!done)
}
fclose(f);
}
+
+ new_free(re);
+ if (extra != NULL) new_free(extra);
+ if (tables != NULL) new_free((void *)tables);
continue; /* With next regex */
}
} /* End of non-POSIX compile */
@@ -1244,6 +1203,7 @@ while (!done)
int gmatched = 0;
int start_offset = 0;
int g_notempty = 0;
+ int use_dfa = 0;
options = 0;
@@ -1309,7 +1269,7 @@ while (!done)
{
unsigned char buff8[8];
int ii, utn;
- utn = ord2utf8(c, buff8);
+ utn = _pcre_ord2utf8(c, buff8);
for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
c = buff8[ii]; /* Last byte */
p = pt + 1;
@@ -1397,6 +1357,17 @@ while (!done)
}
continue;
+ case 'D':
+ if (posix || do_posix)
+ printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
+ else
+ use_dfa = 1;
+ continue;
+
+ case 'F':
+ options |= PCRE_DFA_SHORTEST;
+ continue;
+
case 'G':
if (isdigit(*p))
{
@@ -1439,7 +1410,8 @@ while (!done)
{
printf("** Failed to get %d bytes of memory for offsets vector\n",
size_offsets_max * sizeof(int));
- return 1;
+ yield = 1;
+ goto EXIT;
}
}
use_size_offsets = n;
@@ -1450,6 +1422,10 @@ while (!done)
options |= PCRE_PARTIAL;
continue;
+ case 'R':
+ options |= PCRE_DFA_RESTART;
+ continue;
+
case 'S':
show_malloc = 1;
continue;
@@ -1467,6 +1443,12 @@ while (!done)
*q = 0;
len = q - dbuffer;
+ if ((all_use_dfa || use_dfa) && find_match_limit)
+ {
+ printf("**Match limit not relevant for DFA matching: ignored\n");
+ find_match_limit = 0;
+ }
+
/* Handle matching via the POSIX interface, which does not
support timing or playing with the match limit or callout data. */
@@ -1524,9 +1506,21 @@ while (!done)
register int i;
clock_t time_taken;
clock_t start_time = clock();
+
+ if (all_use_dfa || use_dfa)
+ {
+ int workspace[1000];
+ for (i = 0; i < LOOPREPEAT; i++)
+ count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
+ options | g_notempty, use_offsets, use_size_offsets, workspace,
+ sizeof(workspace)/sizeof(int));
+ }
+ else
+
for (i = 0; i < LOOPREPEAT; i++)
count = pcre_exec(re, extra, (char *)bptr, len,
start_offset, options | g_notempty, use_offsets, use_size_offsets);
+
time_taken = clock() - start_time;
fprintf(outfile, "Execute time %.3f milliseconds\n",
(((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
@@ -1597,16 +1591,28 @@ while (!done)
/* The normal case is just to do the match once, with the default
value of match_limit. */
- else
+ else if (all_use_dfa || use_dfa)
{
- count = pcre_exec(re, extra, (char *)bptr, len,
- start_offset, options | g_notempty, use_offsets, use_size_offsets);
+ int workspace[1000];
+ count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
+ options | g_notempty, use_offsets, use_size_offsets, workspace,
+ sizeof(workspace)/sizeof(int));
+ if (count == 0)
+ {
+ fprintf(outfile, "Matched, but too many subsidiary matches\n");
+ count = use_size_offsets/2;
+ }
}
- if (count == 0)
+ else
{
- fprintf(outfile, "Matched, but too many substrings\n");
- count = use_size_offsets/3;
+ count = pcre_exec(re, extra, (char *)bptr, len,
+ start_offset, options | g_notempty, use_offsets, use_size_offsets);
+ if (count == 0)
+ {
+ fprintf(outfile, "Matched, but too many substrings\n");
+ count = use_size_offsets/3;
+ }
}
/* Matched */
@@ -1692,7 +1698,11 @@ while (!done)
else if (count == PCRE_ERROR_PARTIAL)
{
- fprintf(outfile, "Partial match\n");
+ fprintf(outfile, "Partial match");
+ if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
+ fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
+ bptr + use_offsets[0]);
+ fprintf(outfile, "\n");
break; /* Out of the /g loop */
}
@@ -1770,17 +1780,28 @@ while (!done)
if (posix || do_posix) regfree(&preg);
#endif
- if (re != NULL) free(re);
- if (extra != NULL) free(extra);
+ if (re != NULL) new_free(re);
+ if (extra != NULL) new_free(extra);
if (tables != NULL)
{
- free((void *)tables);
+ new_free((void *)tables);
setlocale(LC_CTYPE, "C");
}
}
if (infile == stdin) fprintf(outfile, "\n");
-return 0;
+
+EXIT:
+
+if (infile != NULL && infile != stdin) fclose(infile);
+if (outfile != NULL && outfile != stdout) fclose(outfile);
+
+free(buffer);
+free(dbuffer);
+free(pbuffer);
+free(offsets);
+
+return yield;
}
-/* End */
+/* End of pcretest.c */