summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-09-06 15:02:07 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-09-06 15:02:07 +0000
commitdd829a0cc64612821a1a41e5077aab091b72c87b (patch)
tree61d2f2304af31e5ef548f099a2e459be109d86de
parent82bb1634d1b0763e2209729fd268548ecb81e83b (diff)
downloadpcre-dd829a0cc64612821a1a41e5077aab091b72c87b.tar.gz
Update pcregrep to use JIT by default with options to disable.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@685 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--CMakeLists.txt11
-rw-r--r--README5
-rwxr-xr-xRunGrepTest4
-rw-r--r--config-cmake.h.in1
-rw-r--r--configure.ac14
-rw-r--r--doc/pcrebuild.39
-rw-r--r--doc/pcregrep.110
-rw-r--r--doc/pcrejit.31
-rw-r--r--pcregrep.c43
-rw-r--r--testdata/grepoutput6
10 files changed, 88 insertions, 16 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 51ef0cd..b618b93 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -40,6 +40,7 @@
# 2011-08-01 PH added PCREGREP_BUFSIZE
# 2011-08-22 PH added PCRE_SUPPORT_JIT
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
+# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
PROJECT(PCRE C CXX)
@@ -117,6 +118,9 @@ SET(PCRE_POSIX_MALLOC_THRESHOLD "10" CACHE STRING
SET(PCRE_SUPPORT_JIT OFF CACHE BOOL
"Enable support for Just-in-time compiling.")
+SET(PCRE_SUPPORT_PCREGREP_JIT ON CACHE BOOL
+ "Enable use of Just-in-time compiling in pcregrep.")
+
SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL
"Enable support for Unicode properties. (If set, UTF-8 support will be enabled as well)")
@@ -213,8 +217,14 @@ ENDIF(PCRE_SUPPORT_UNICODE_PROPERTIES)
IF(PCRE_SUPPORT_JIT)
SET(SUPPORT_JIT 1)
+ELSE
+ SET(PCRE_SUPPORT_PCREGREP_JIT 0)
ENDIF(PCRE_SUPPORT_JIT)
+IF(PCRE_SUPPORT_PCREGREP_JIT)
+ SET(SUPPORT_PCREGREP_JIT 1)
+ENDIF(PCRE_SUPPORT_PCREGREP_JIT)
+
# This next one used to contain
# SET(PCRETEST_LIBS ${READLINE_LIBRARY})
# but I was advised to add the NCURSES test as well, along with
@@ -586,6 +596,7 @@ IF(PCRE_SHOW_REPORT)
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
+ MESSAGE(STATUS " Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}")
MESSAGE(STATUS " Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}")
MESSAGE(STATUS " Build tests (implies pcretest) .. : ${PCRE_BUILD_TESTS}")
IF(ZLIB_FOUND)
diff --git a/README b/README
index d154c96..63c14e2 100644
--- a/README
+++ b/README
@@ -179,6 +179,9 @@ library. They are also documented in the pcrebuild man page.
"configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
will be a compile time error.
+
+. When JIT support is enabled, pcregrep automatically makes use of it, unless
+ you add --disable-pcregrep-jit to the "configure" command.
. If you want to make use of the support for UTF-8 Unicode character strings in
PCRE, you must add --enable-utf8 to the "configure" command. Without it, the
@@ -839,4 +842,4 @@ The distribution should contain the following files:
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 27 August 2011
+Last updated: 06 September 2011
diff --git a/RunGrepTest b/RunGrepTest
index e8f57e4..b70689e 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -305,11 +305,11 @@ echo "---------------------------- Test 61 -----------------------------" >>test
echo "RC=$?" >>testtry
echo "---------------------------- Test 62 -----------------------------" >>testtry
-(cd $srcdir; $valgrind $pcregrep --match-limit=1000 -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
+(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 63 -----------------------------" >>testtry
-(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
+(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 64 ------------------------------" >>testtry
diff --git a/config-cmake.h.in b/config-cmake.h.in
index e6d94dc..d790ea3 100644
--- a/config-cmake.h.in
+++ b/config-cmake.h.in
@@ -19,6 +19,7 @@
#cmakedefine PCRE_STATIC 1
#cmakedefine SUPPORT_JIT 1
+#cmakedefine SUPPORT_PCREGREP_JIT 1
#cmakedefine SUPPORT_UTF8 1
#cmakedefine SUPPORT_UCP 1
#cmakedefine EBCDIC 1
diff --git a/configure.ac b/configure.ac
index 192adc9..de027bb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -118,6 +118,12 @@ AC_ARG_ENABLE(jit,
[enable Just-In-Time compiling support]),
, enable_jit=no)
+# Handle --disable-pcregrep-jit (enabled by default)
+AC_ARG_ENABLE(pcregrep-jit,
+ AS_HELP_STRING([--disable-pcregrep-jit],
+ [disable JIT support in pcregrep]),
+ , enable_pcregrep_jit=yes)
+
# Handle --enable-rebuild-chartables
AC_ARG_ENABLE(rebuild-chartables,
AS_HELP_STRING([--enable-rebuild-chartables],
@@ -478,6 +484,13 @@ AC_SUBST(PCRE_STATIC_CFLAG)
if test "$enable_jit" = "yes"; then
AC_DEFINE([SUPPORT_JIT], [], [
Define to enable support for Just-In-Time compiling.])
+else
+ enable_pcregrep_jit="no"
+fi
+
+if test "$enable_pcregrep_jit" = "yes"; then
+ AC_DEFINE([SUPPORT_PCREGREP_JIT], [], [
+ Define to enable JIT support in pcregrep.])
fi
if test "$enable_utf8" = "yes"; then
@@ -757,6 +770,7 @@ $PACKAGE-$VERSION configuration summary:
Match limit recursion ........... : ${with_match_limit_recursion}
Build shared libs ............... : ${enable_shared}
Build static libs ............... : ${enable_static}
+ Use JIT in pcregrep ............. : ${enable_pcregrep_jit}
Buffer size for pcregrep ........ : ${with_pcregrep_bufsize}
Link pcregrep with libz ......... : ${enable_pcregrep_libz}
Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}
diff --git a/doc/pcrebuild.3 b/doc/pcrebuild.3
index b8c12b7..e63b41a 100644
--- a/doc/pcrebuild.3
+++ b/doc/pcrebuild.3
@@ -111,7 +111,12 @@ See the
.\" HREF
\fBpcrejit\fP
.\"
-documentation for a discussion of JIT usage.
+documentation for a discussion of JIT usage. When JIT support is enabled,
+pcregrep automatically makes use of it, unless you add
+.sp
+ --disable-pcregrep-jit
+.sp
+to the "configure" command.
.
.
.SH "CODE VALUE OF NEWLINE"
@@ -383,6 +388,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 27 August 2011
+Last updated: 06 September 2011
Copyright (c) 1997-2011 University of Cambridge.
.fi
diff --git a/doc/pcregrep.1 b/doc/pcregrep.1
index 514e94f..c446a8a 100644
--- a/doc/pcregrep.1
+++ b/doc/pcregrep.1
@@ -83,6 +83,7 @@ If the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variable is set,
\fBpcregrep\fP uses the value to set a locale when calling the PCRE library.
The \fB--locale\fP option can be used to override this.
.
+.
.SH "SUPPORT FOR COMPRESSED FILES"
.rs
.sp
@@ -387,6 +388,13 @@ for matching lines or a hyphen for context lines. If the filename is also being
output, it precedes the line number. This option is forced if
\fB--line-offsets\fP is used.
.TP
+\fB--no-jit\fP
+If the PCRE library is built with support for just-in-time compiling (which
+speeds up matching), \fBpcregrep\fP automatically makes use of this, unless it
+was explicitly disabled at build time. This option can be used to disable the
+use of JIT at run time. It is provided for testing and working round problems.
+It should never be needed in normal use.
+.TP
\fB-o\fP, \fB--only-matching\fP
Show only the part of the line that matched a pattern instead of the whole
line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
@@ -569,6 +577,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 30 July 2011
+Last updated: 06 September 2011
Copyright (c) 1997-2011 University of Cambridge.
.fi
diff --git a/doc/pcrejit.3 b/doc/pcrejit.3
index 78d0513..522fbef 100644
--- a/doc/pcrejit.3
+++ b/doc/pcrejit.3
@@ -232,6 +232,7 @@ callback.
/* Check results */
pcre_free(re);
pcre_free_study(extra);
+ pcre_jit_stack_free(jit_stack);
.sp
.
.
diff --git a/pcregrep.c b/pcregrep.c
index 64f1a00..69e00c7 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -168,7 +168,12 @@ static int error_count = 0;
static int filenames = FN_DEFAULT;
static int only_matching = -1;
static int process_options = 0;
+
+#ifdef SUPPORT_PCREGREP_JIT
+static int study_options = PCRE_STUDY_JIT_COMPILE;
+#else
static int study_options = 0;
+#endif
static unsigned long int match_limit = 0;
static unsigned long int match_limit_recursion = 0;
@@ -219,6 +224,7 @@ used to identify them. */
#define N_M_LIMIT (-13)
#define N_M_LIMIT_REC (-14)
#define N_BUFSIZE (-15)
+#define N_NOJIT (-16)
static option_item optionlist[] = {
{ OP_NODATA, N_NULL, NULL, "", " terminate options" },
@@ -239,7 +245,11 @@ static option_item optionlist[] = {
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
- { OP_NODATA, 'j', NULL, "jit", "use JIT compiler if available" },
+#ifdef SUPPORT_PCREGREP_JIT
+ { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
+#else
+ { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
+#endif
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
@@ -317,8 +327,9 @@ pcregrep_exit(int rc)
{
if (resource_error)
{
- fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
- "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
+ fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
+ "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
+ PCRE_ERROR_JIT_STACKLIMIT);
fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
}
@@ -977,7 +988,8 @@ for (i = 0; i < pattern_count; i++)
fprintf(stderr, "%s", msg);
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
fprintf(stderr, "\n\n");
- if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
+ if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
+ *mrc == PCRE_ERROR_JIT_STACKLIMIT)
resource_error = TRUE;
if (error_count++ > 20)
{
@@ -1857,14 +1869,14 @@ switch(letter)
{
case N_FOFFSETS: file_offsets = TRUE; break;
case N_HELP: help(); pcregrep_exit(0);
- case N_LOFFSETS: line_offsets = number = TRUE; break;
case N_LBUFFER: line_buffered = TRUE; break;
+ case N_LOFFSETS: line_offsets = number = TRUE; break;
+ case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
case 'c': count_only = TRUE; break;
case 'F': process_options |= PO_FIXED_STRINGS; break;
case 'H': filenames = FN_FORCE; break;
case 'h': filenames = FN_NONE; break;
case 'i': options |= PCRE_CASELESS; break;
- case 'j': study_options |= PCRE_STUDY_JIT_COMPILE; break;
case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
case 'L': filenames = FN_NOMATCH_ONLY; break;
case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
@@ -2048,6 +2060,10 @@ char *patterns[MAX_PATTERN_COUNT];
const char *locale_from = "--locale";
const char *error;
+#ifdef SUPPORT_PCREGREP_JIT
+pcre_jit_stack *jit_stack = NULL;
+#endif
+
/* Set the default line ending value from the default in the PCRE library;
"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
Note that the return values from pcre_config(), though derived from the ASCII
@@ -2570,8 +2586,14 @@ if (pattern_filename != NULL)
if (f != stdin) fclose(f);
}
-/* Study the regular expressions, as we will be running them many times */
+/* Study the regular expressions, as we will be running them many times. Unless
+JIT has been explicitly disabled, arrange a stack for it to use. */
+#ifdef SUPPORT_PCREGREP_JIT
+if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
+ jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
+#endif
+
for (j = 0; j < pattern_count; j++)
{
hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
@@ -2583,6 +2605,10 @@ for (j = 0; j < pattern_count; j++)
goto EXIT2;
}
hint_count++;
+#ifdef SUPPORT_PCREGREP_JIT
+ if (jit_stack != NULL && hints_list[j] != NULL)
+ pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
+#endif
}
/* If --match-limit or --recursion-limit was set, put the value(s) into the
@@ -2689,6 +2715,9 @@ for (; i < argc; i++)
}
EXIT:
+#ifdef SUPPORT_PCREGREP_JIT
+if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
+#endif
if (main_buffer != NULL) free(main_buffer);
if (pattern_list != NULL)
{
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 07e8586..9b3fb29 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -391,7 +391,7 @@ pcregrep: pcre_exec() gave error -8 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
---------------------------- Test 38 ------------------------------
This line contains a binary zero here >
@@ -515,7 +515,7 @@ This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read
-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 63 -----------------------------
@@ -525,7 +525,7 @@ This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read
-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 64 ------------------------------