summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-04-19 17:30:38 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-04-19 17:30:38 +0000
commitb45e17ad7d67d522e1d38780aeb74c31a83e8ac7 (patch)
tree0df7093935ea4c4a4e499bca3e93880157bb6edc
parent0f3d6a0cb51bff6c1c8ba82df3d5e8f7bb87465e (diff)
downloadpcre-b45e17ad7d67d522e1d38780aeb74c31a83e8ac7.tar.gz
Fix long-standing DFA testing restart bug in pcretest, and add some
plausibility checks when restarting in pcre_dfa_exec(). git-svn-id: svn://vcs.exim.org/pcre/code/trunk@960 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog5
-rw-r--r--doc/pcreapi.317
-rw-r--r--pcre.h.in1
-rw-r--r--pcre_dfa_exec.c39
-rw-r--r--pcretest.c110
-rw-r--r--testdata/testinput85
-rw-r--r--testdata/testoutput86
7 files changed, 118 insertions, 65 deletions
diff --git a/ChangeLog b/ChangeLog
index 0f6701b..3540768 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -82,6 +82,11 @@ Version 8.31
23. Support PCRE_NO_START_OPTIMIZE in JIT as (*MARK) support requires it.
+24. Fixed a very old bug in pcretest that caused errors with restarted DFA
+ matches in certain environments (the workspace was not being correctly
+ retained). Also added to pcre_dfa_exec() a simple plausibility check on
+ some of the workspace data at the beginning of a restart.
+
Version 8.30 04-February-2012
-----------------------------
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index d95ad58..87e7faa 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "14 April 2012" "PCRE 8.31"
+.TH PCREAPI 3 "19 April 2012" "PCRE 8.31"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
@@ -2104,19 +2104,19 @@ just-in-time processing stack is not large enough. See the
.\"
documentation for more details.
.sp
- PCRE_ERROR_BADMODE (-28)
+ PCRE_ERROR_BADMODE (-28)
.sp
This error is given if a pattern that was compiled by the 8-bit library is
passed to a 16-bit library function, or vice versa.
.sp
- PCRE_ERROR_BADENDIANNESS (-29)
+ PCRE_ERROR_BADENDIANNESS (-29)
.sp
This error is given if a pattern that was compiled and saved is reloaded on a
host with different endianness. The utility function
\fBpcre_pattern_to_host_byte_order()\fP can be used to convert such a pattern
so that it runs on the new host.
.P
-Error numbers -16 to -20 and -22 are not used by \fBpcre_exec()\fP.
+Error numbers -16 to -20, -22, and -30 are not used by \fBpcre_exec()\fP.
.
.
.\" HTML <a name="badutf8reasons"></a>
@@ -2634,6 +2634,13 @@ When a recursive subpattern is processed, the matching function calls itself
recursively, using private vectors for \fIovector\fP and \fIworkspace\fP. This
error is given if the output vector is not large enough. This should be
extremely rare, as a vector of size 1000 is used.
+.sp
+ PCRE_ERROR_DFA_BADRESTART (-30)
+.sp
+When \fBpcre_dfa_exec()\fP is called with the \fBPCRE_DFA_RESTART\fP option,
+some plausibility checks are made on the contents of the workspace, which
+should contain data about the previous partial match. If any of these checks
+fail, this error is given.
.
.
.SH "SEE ALSO"
@@ -2658,6 +2665,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 14 April 2012
+Last updated: 19 April 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/pcre.h.in b/pcre.h.in
index 7306f99..3588cd3 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -179,6 +179,7 @@ compiling). */
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
#define PCRE_ERROR_BADMODE (-28)
#define PCRE_ERROR_BADENDIANNESS (-29)
+#define PCRE_ERROR_DFA_BADRESTART (-30)
/* Specific error codes for UTF-8 validity checks */
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 840a04c..efe4681 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains the external function pcre_dfa_exec(), which is an
alternative matching function that uses a sort of DFA algorithm (not a true
-FSM). This is NOT Perl- compatible, but it has advantages in certain
+FSM). This is NOT Perl-compatible, but it has advantages in certain
applications. */
@@ -282,7 +282,7 @@ typedef struct stateblock {
int data; /* Some use extra data */
} stateblock;
-#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
+#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
#ifdef PCRE_DEBUG
@@ -3162,10 +3162,27 @@ if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
-/* We need to find the pointer to any study data before we test for byte
-flipping, so we scan the extra_data block first. This may set two fields in the
-match block, so we must initialize them beforehand. However, the other fields
-in the match block must not be set until after the byte flipping. */
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */
+
+if (re->magic_number != MAGIC_NUMBER)
+ return re->magic_number == REVERSED_MAGIC_NUMBER?
+ PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+
+/* If restarting after a partial match, do some sanity checks on the contents
+of the workspace. */
+
+if ((options & PCRE_DFA_RESTART) != 0)
+ {
+ if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
+ workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
+ return PCRE_ERROR_DFA_BADRESTART;
+ }
+
+/* Set up study, callout, and table data */
md->tables = re->tables;
md->callout_data = NULL;
@@ -3184,16 +3201,6 @@ if (extra_data != NULL)
md->tables = extra_data->tables;
}
-/* Check that the first field in the block is the magic number. If it is not,
-return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
-
-if (re->magic_number != MAGIC_NUMBER)
- return re->magic_number == REVERSED_MAGIC_NUMBER?
- PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
-if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
-
/* Set some local values */
current_subject = (const pcre_uchar *)subject + start_offset;
diff --git a/pcretest.c b/pcretest.c
index c479763..62ae69a 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -46,7 +46,6 @@ pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
only supported library functions. */
-
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
@@ -60,8 +59,8 @@ only supported library functions. */
#include <errno.h>
/* Both libreadline and libedit are optionally supported. The user-supplied
-original patch uses readline/readline.h for libedit, but in at least one system
-it is installed as editline/readline.h, so the configuration code now looks for
+original patch uses readline/readline.h for libedit, but in at least one system
+it is installed as editline/readline.h, so the configuration code now looks for
that first, falling back to readline/readline.h. */
#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
@@ -611,6 +610,10 @@ version is called. ----- */
#endif
#endif
+#if !defined NODFA
+#define DFA_WS_DIMENSION 1000
+#endif
+
/* This is the default loop count for timing. */
#define LOOPREPEAT 500000
@@ -625,7 +628,7 @@ static int callout_fail_count;
static int callout_fail_id;
static int debug_lengths;
static int first_callout;
-static int jit_was_used;
+static int jit_was_used;
static int locale_set = 0;
static int show_malloc;
static int use_utf;
@@ -690,16 +693,16 @@ static int use_pcre16 = 1;
/* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
static int jit_study_bits[] =
- {
- PCRE_STUDY_JIT_COMPILE,
+ {
+ PCRE_STUDY_JIT_COMPILE,
PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
- PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
- PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
- PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
- PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
- PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
-};
+ PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
+ PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
+ PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
+ PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
+ PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
+};
/* Textual explanations for runtime error codes */
@@ -732,7 +735,9 @@ static const char *errtexts[] = {
NULL, /* SHORTUTF8/16 is handled specially */
"nested recursion at the same subject position",
"JIT stack limit reached",
- "pattern compiled in wrong mode: 8-bit/16-bit error"
+ "pattern compiled in wrong mode: 8-bit/16-bit error",
+ "pattern compiled with other endianness",
+ "invalid data in workspace for DFA restart"
};
@@ -2160,10 +2165,10 @@ printf(" -q quiet: do not output PCRE version number at start\n");
printf(" -S <n> set stack size to <n> megabytes\n");
printf(" -s force each pattern to be studied at basic level\n"
" -s+ force each pattern to be studied, using JIT if available\n"
- " -s++ ditto, verifying when JIT was actually used\n"
+ " -s++ ditto, verifying when JIT was actually used\n"
" -s+n force each pattern to be studied, using JIT if available,\n"
- " where 1 <= n <= 7 selects JIT options\n"
- " -s++n ditto, verifying when JIT was actually used\n"
+ " where 1 <= n <= 7 selects JIT options\n"
+ " -s++n ditto, verifying when JIT was actually used\n"
" -t time compilation and execution\n");
printf(" -t <n> time compilation and execution, repeating <n> times\n");
printf(" -tm time execution (matching) only\n");
@@ -2198,9 +2203,6 @@ int quiet = 0;
int size_offsets = 45;
int size_offsets_max;
int *offsets = NULL;
-#if !defined NOPOSIX
-int posix = 0;
-#endif
int debug = 0;
int done = 0;
int all_use_dfa = 0;
@@ -2208,6 +2210,13 @@ int verify_jit = 0;
int yield = 0;
int stack_size;
+#if !defined NOPOSIX
+int posix = 0;
+#endif
+#if !defined NODFA
+int *dfa_workspace = NULL;
+#endif
+
pcre_jit_stack *jit_stack = NULL;
/* These vectors store, end-to-end, a list of zero-terminated captured
@@ -2266,20 +2275,20 @@ version = pcre16_version();
while (argc > 1 && argv[op][0] == '-')
{
pcre_uint8 *endptr;
- char *arg = argv[op];
+ char *arg = argv[op];
if (strcmp(arg, "-m") == 0) showstore = 1;
else if (strcmp(arg, "-s") == 0) force_study = 0;
-
+
else if (strncmp(arg, "-s+", 3) == 0)
{
arg += 3;
if (*arg == '+') { arg++; verify_jit = TRUE; }
force_study = 1;
if (*arg == 0)
- force_study_options = jit_study_bits[6];
+ force_study_options = jit_study_bits[6];
else if (*arg >= '1' && *arg <= '7')
- force_study_options = jit_study_bits[*arg - '1'];
+ force_study_options = jit_study_bits[*arg - '1'];
else goto BAD_ARG;
}
else if (strcmp(arg, "-16") == 0)
@@ -2493,7 +2502,7 @@ are set, either both UTFs are supported or both are not supported. */
}
else
{
- BAD_ARG:
+ BAD_ARG:
printf("** Unknown or malformed option %s\n", arg);
usage();
yield = 1;
@@ -2591,6 +2600,10 @@ while (!done)
int do_showcaprest = 0;
int do_flip = 0;
int erroroffset, len, delimiter, poffset;
+
+#if !defined NODFA
+ int dfa_matched = 0;
+#endif
use_utf = 0;
debug_lengths = 1;
@@ -2813,12 +2826,12 @@ while (!done)
if (*(++pp) == '+')
{
verify_jit = TRUE;
- pp++;
- }
+ pp++;
+ }
if (*pp >= '1' && *pp <= '7')
study_options |= jit_study_bits[*pp++ - '1'];
- else
- study_options |= jit_study_bits[6];
+ else
+ study_options |= jit_study_bits[6];
}
}
else
@@ -3126,7 +3139,7 @@ while (!done)
new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
- new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
+ new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
!= 0)
goto SKIP_DATA;
@@ -3265,9 +3278,9 @@ while (!done)
fprintf(outfile, "%s\n", caseless);
}
}
-
- if (maxlookbehind > 0)
- fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
+
+ if (maxlookbehind > 0)
+ fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
/* Don't output study size; at present it is in any case a fixed
value, but it varies, depending on the computer architecture, and
@@ -3908,7 +3921,7 @@ while (!done)
}
#endif
- /* Ensure that there is a JIT callback if we want to verify that JIT was
+ /* Ensure that there is a JIT callback if we want to verify that JIT was
actually used. If jit_stack == NULL, no stack has yet been assigned. */
if (verify_jit && jit_stack == NULL && extra != NULL)
@@ -3917,7 +3930,7 @@ while (!done)
for (;; gmatched++) /* Loop for /g or /G */
{
markptr = NULL;
- jit_was_used = FALSE;
+ jit_was_used = FALSE;
if (timeitm > 0)
{
@@ -3928,12 +3941,18 @@ while (!done)
#if !defined NODFA
if (all_use_dfa || use_dfa)
{
- int workspace[1000];
+ if ((options & PCRE_DFA_RESTART) != 0)
+ {
+ fprintf(outfile, "Timing DFA restarts is not supported\n");
+ break;
+ }
+ if (dfa_workspace == NULL)
+ dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
for (i = 0; i < timeitm; i++)
{
PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
- (options | g_notempty), use_offsets, use_size_offsets, workspace,
- (sizeof(workspace)/sizeof(int)));
+ (options | g_notempty), use_offsets, use_size_offsets,
+ dfa_workspace, DFA_WS_DIMENSION);
}
}
else
@@ -3999,10 +4018,13 @@ while (!done)
#if !defined NODFA
else if (all_use_dfa || use_dfa)
{
- int workspace[1000];
+ if (dfa_workspace == NULL)
+ dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
+ if (dfa_matched++ == 0)
+ dfa_workspace[0] = -1; /* To catch bad restart */
PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
- (options | g_notempty), use_offsets, use_size_offsets, workspace,
- (sizeof(workspace)/sizeof(int)));
+ (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
+ DFA_WS_DIMENSION);
if (count == 0)
{
fprintf(outfile, "Matched, but too many subsidiary matches\n");
@@ -4021,7 +4043,7 @@ while (!done)
count = use_size_offsets/3;
}
}
-
+
/* Matched */
if (count >= 0)
@@ -4079,7 +4101,7 @@ while (!done)
fprintf(outfile, "%2d: ", i/2);
PCHARSV(bptr, use_offsets[i],
use_offsets[i+1] - use_offsets[i], outfile);
- if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
+ if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
fprintf(outfile, "\n");
if (do_showcaprest || (i == 0 && do_showrest))
{
@@ -4246,7 +4268,7 @@ while (!done)
PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
outfile);
}
- if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
+ if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
fprintf(outfile, "\n");
break; /* Out of the /g loop */
}
@@ -4333,7 +4355,7 @@ while (!done)
fprintf(outfile, "No match, mark = ");
PCHARSV(markptr, 0, -1, outfile);
}
- if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
+ if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
putc('\n', outfile);
}
break;
diff --git a/testdata/testinput8 b/testdata/testinput8
index 5bef7a2..2b628a3 100644
--- a/testdata/testinput8
+++ b/testdata/testinput8
@@ -4782,4 +4782,9 @@
\r\r\r\P
\r\r\r\P\P
+/-- Test simple validity check for restarts --/
+
+/abcdef/
+ abc\R
+
/-- End of testinput8 --/
diff --git a/testdata/testoutput8 b/testdata/testoutput8
index fc64c56..c5f01e1 100644
--- a/testdata/testoutput8
+++ b/testdata/testoutput8
@@ -7990,4 +7990,10 @@ Partial match: \x0d\x0d
\r\r\r\P\P
Partial match: \x0d\x0d\x0d
+/-- Test simple validity check for restarts --/
+
+/abcdef/
+ abc\R
+Error -30 (invalid data in workspace for DFA restart)
+
/-- End of testinput8 --/