diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-09-06 15:02:07 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-09-06 15:02:07 +0000 |
commit | dd829a0cc64612821a1a41e5077aab091b72c87b (patch) | |
tree | 61d2f2304af31e5ef548f099a2e459be109d86de | |
parent | 82bb1634d1b0763e2209729fd268548ecb81e83b (diff) | |
download | pcre-dd829a0cc64612821a1a41e5077aab091b72c87b.tar.gz |
Update pcregrep to use JIT by default with options to disable.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@685 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | CMakeLists.txt | 11 | ||||
-rw-r--r-- | README | 5 | ||||
-rwxr-xr-x | RunGrepTest | 4 | ||||
-rw-r--r-- | config-cmake.h.in | 1 | ||||
-rw-r--r-- | configure.ac | 14 | ||||
-rw-r--r-- | doc/pcrebuild.3 | 9 | ||||
-rw-r--r-- | doc/pcregrep.1 | 10 | ||||
-rw-r--r-- | doc/pcrejit.3 | 1 | ||||
-rw-r--r-- | pcregrep.c | 43 | ||||
-rw-r--r-- | testdata/grepoutput | 6 |
10 files changed, 88 insertions, 16 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 51ef0cd..b618b93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ # 2011-08-01 PH added PCREGREP_BUFSIZE # 2011-08-22 PH added PCRE_SUPPORT_JIT # 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov +# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT PROJECT(PCRE C CXX) @@ -117,6 +118,9 @@ SET(PCRE_POSIX_MALLOC_THRESHOLD "10" CACHE STRING SET(PCRE_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.") +SET(PCRE_SUPPORT_PCREGREP_JIT ON CACHE BOOL + "Enable use of Just-in-time compiling in pcregrep.") + SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL "Enable support for Unicode properties. (If set, UTF-8 support will be enabled as well)") @@ -213,8 +217,14 @@ ENDIF(PCRE_SUPPORT_UNICODE_PROPERTIES) IF(PCRE_SUPPORT_JIT) SET(SUPPORT_JIT 1) +ELSE + SET(PCRE_SUPPORT_PCREGREP_JIT 0) ENDIF(PCRE_SUPPORT_JIT) +IF(PCRE_SUPPORT_PCREGREP_JIT) + SET(SUPPORT_PCREGREP_JIT 1) +ENDIF(PCRE_SUPPORT_PCREGREP_JIT) + # This next one used to contain # SET(PCRETEST_LIBS ${READLINE_LIBRARY}) # but I was advised to add the NCURSES test as well, along with @@ -586,6 +596,7 @@ IF(PCRE_SHOW_REPORT) MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}") MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}") + MESSAGE(STATUS " Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}") MESSAGE(STATUS " Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}") MESSAGE(STATUS " Build tests (implies pcretest) .. : ${PCRE_BUILD_TESTS}") IF(ZLIB_FOUND) @@ -179,6 +179,9 @@ library. They are also documented in the pcrebuild man page. "configure" command. This support is available only for certain hardware architectures. If you try to enable it on an unsupported architecture, there will be a compile time error. + +. When JIT support is enabled, pcregrep automatically makes use of it, unless + you add --disable-pcregrep-jit to the "configure" command. . If you want to make use of the support for UTF-8 Unicode character strings in PCRE, you must add --enable-utf8 to the "configure" command. Without it, the @@ -839,4 +842,4 @@ The distribution should contain the following files: Philip Hazel Email local part: ph10 Email domain: cam.ac.uk -Last updated: 27 August 2011 +Last updated: 06 September 2011 diff --git a/RunGrepTest b/RunGrepTest index e8f57e4..b70689e 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -305,11 +305,11 @@ echo "---------------------------- Test 61 -----------------------------" >>test echo "RC=$?" >>testtry echo "---------------------------- Test 62 -----------------------------" >>testtry -(cd $srcdir; $valgrind $pcregrep --match-limit=1000 -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1 +(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1 echo "RC=$?" >>testtry echo "---------------------------- Test 63 -----------------------------" >>testtry -(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1 +(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1 echo "RC=$?" >>testtry echo "---------------------------- Test 64 ------------------------------" >>testtry diff --git a/config-cmake.h.in b/config-cmake.h.in index e6d94dc..d790ea3 100644 --- a/config-cmake.h.in +++ b/config-cmake.h.in @@ -19,6 +19,7 @@ #cmakedefine PCRE_STATIC 1 #cmakedefine SUPPORT_JIT 1 +#cmakedefine SUPPORT_PCREGREP_JIT 1 #cmakedefine SUPPORT_UTF8 1 #cmakedefine SUPPORT_UCP 1 #cmakedefine EBCDIC 1 diff --git a/configure.ac b/configure.ac index 192adc9..de027bb 100644 --- a/configure.ac +++ b/configure.ac @@ -118,6 +118,12 @@ AC_ARG_ENABLE(jit, [enable Just-In-Time compiling support]), , enable_jit=no) +# Handle --disable-pcregrep-jit (enabled by default) +AC_ARG_ENABLE(pcregrep-jit, + AS_HELP_STRING([--disable-pcregrep-jit], + [disable JIT support in pcregrep]), + , enable_pcregrep_jit=yes) + # Handle --enable-rebuild-chartables AC_ARG_ENABLE(rebuild-chartables, AS_HELP_STRING([--enable-rebuild-chartables], @@ -478,6 +484,13 @@ AC_SUBST(PCRE_STATIC_CFLAG) if test "$enable_jit" = "yes"; then AC_DEFINE([SUPPORT_JIT], [], [ Define to enable support for Just-In-Time compiling.]) +else + enable_pcregrep_jit="no" +fi + +if test "$enable_pcregrep_jit" = "yes"; then + AC_DEFINE([SUPPORT_PCREGREP_JIT], [], [ + Define to enable JIT support in pcregrep.]) fi if test "$enable_utf8" = "yes"; then @@ -757,6 +770,7 @@ $PACKAGE-$VERSION configuration summary: Match limit recursion ........... : ${with_match_limit_recursion} Build shared libs ............... : ${enable_shared} Build static libs ............... : ${enable_static} + Use JIT in pcregrep ............. : ${enable_pcregrep_jit} Buffer size for pcregrep ........ : ${with_pcregrep_bufsize} Link pcregrep with libz ......... : ${enable_pcregrep_libz} Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2} diff --git a/doc/pcrebuild.3 b/doc/pcrebuild.3 index b8c12b7..e63b41a 100644 --- a/doc/pcrebuild.3 +++ b/doc/pcrebuild.3 @@ -111,7 +111,12 @@ See the .\" HREF \fBpcrejit\fP .\" -documentation for a discussion of JIT usage. +documentation for a discussion of JIT usage. When JIT support is enabled, +pcregrep automatically makes use of it, unless you add +.sp + --disable-pcregrep-jit +.sp +to the "configure" command. . . .SH "CODE VALUE OF NEWLINE" @@ -383,6 +388,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 27 August 2011 +Last updated: 06 September 2011 Copyright (c) 1997-2011 University of Cambridge. .fi diff --git a/doc/pcregrep.1 b/doc/pcregrep.1 index 514e94f..c446a8a 100644 --- a/doc/pcregrep.1 +++ b/doc/pcregrep.1 @@ -83,6 +83,7 @@ If the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variable is set, \fBpcregrep\fP uses the value to set a locale when calling the PCRE library. The \fB--locale\fP option can be used to override this. . +. .SH "SUPPORT FOR COMPRESSED FILES" .rs .sp @@ -387,6 +388,13 @@ for matching lines or a hyphen for context lines. If the filename is also being output, it precedes the line number. This option is forced if \fB--line-offsets\fP is used. .TP +\fB--no-jit\fP +If the PCRE library is built with support for just-in-time compiling (which +speeds up matching), \fBpcregrep\fP automatically makes use of this, unless it +was explicitly disabled at build time. This option can be used to disable the +use of JIT at run time. It is provided for testing and working round problems. +It should never be needed in normal use. +.TP \fB-o\fP, \fB--only-matching\fP Show only the part of the line that matched a pattern instead of the whole line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and @@ -569,6 +577,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 30 July 2011 +Last updated: 06 September 2011 Copyright (c) 1997-2011 University of Cambridge. .fi diff --git a/doc/pcrejit.3 b/doc/pcrejit.3 index 78d0513..522fbef 100644 --- a/doc/pcrejit.3 +++ b/doc/pcrejit.3 @@ -232,6 +232,7 @@ callback. /* Check results */ pcre_free(re); pcre_free_study(extra); + pcre_jit_stack_free(jit_stack); .sp . . @@ -168,7 +168,12 @@ static int error_count = 0; static int filenames = FN_DEFAULT; static int only_matching = -1; static int process_options = 0; + +#ifdef SUPPORT_PCREGREP_JIT +static int study_options = PCRE_STUDY_JIT_COMPILE; +#else static int study_options = 0; +#endif static unsigned long int match_limit = 0; static unsigned long int match_limit_recursion = 0; @@ -219,6 +224,7 @@ used to identify them. */ #define N_M_LIMIT (-13) #define N_M_LIMIT_REC (-14) #define N_BUFSIZE (-15) +#define N_NOJIT (-16) static option_item optionlist[] = { { OP_NODATA, N_NULL, NULL, "", " terminate options" }, @@ -239,7 +245,11 @@ static option_item optionlist[] = { { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, - { OP_NODATA, 'j', NULL, "jit", "use JIT compiler if available" }, +#ifdef SUPPORT_PCREGREP_JIT + { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" }, +#else + { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" }, +#endif { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, @@ -317,8 +327,9 @@ pcregrep_exit(int rc) { if (resource_error) { - fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit " - "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT); + fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit " + "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT, + PCRE_ERROR_JIT_STACKLIMIT); fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n"); } @@ -977,7 +988,8 @@ for (i = 0; i < pattern_count; i++) fprintf(stderr, "%s", msg); FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */ fprintf(stderr, "\n\n"); - if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT) + if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT || + *mrc == PCRE_ERROR_JIT_STACKLIMIT) resource_error = TRUE; if (error_count++ > 20) { @@ -1857,14 +1869,14 @@ switch(letter) { case N_FOFFSETS: file_offsets = TRUE; break; case N_HELP: help(); pcregrep_exit(0); - case N_LOFFSETS: line_offsets = number = TRUE; break; case N_LBUFFER: line_buffered = TRUE; break; + case N_LOFFSETS: line_offsets = number = TRUE; break; + case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break; case 'c': count_only = TRUE; break; case 'F': process_options |= PO_FIXED_STRINGS; break; case 'H': filenames = FN_FORCE; break; case 'h': filenames = FN_NONE; break; case 'i': options |= PCRE_CASELESS; break; - case 'j': study_options |= PCRE_STUDY_JIT_COMPILE; break; case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break; case 'L': filenames = FN_NOMATCH_ONLY; break; case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break; @@ -2048,6 +2060,10 @@ char *patterns[MAX_PATTERN_COUNT]; const char *locale_from = "--locale"; const char *error; +#ifdef SUPPORT_PCREGREP_JIT +pcre_jit_stack *jit_stack = NULL; +#endif + /* Set the default line ending value from the default in the PCRE library; "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf". Note that the return values from pcre_config(), though derived from the ASCII @@ -2570,8 +2586,14 @@ if (pattern_filename != NULL) if (f != stdin) fclose(f); } -/* Study the regular expressions, as we will be running them many times */ +/* Study the regular expressions, as we will be running them many times. Unless +JIT has been explicitly disabled, arrange a stack for it to use. */ +#ifdef SUPPORT_PCREGREP_JIT +if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0) + jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024); +#endif + for (j = 0; j < pattern_count; j++) { hints_list[j] = pcre_study(pattern_list[j], study_options, &error); @@ -2583,6 +2605,10 @@ for (j = 0; j < pattern_count; j++) goto EXIT2; } hint_count++; +#ifdef SUPPORT_PCREGREP_JIT + if (jit_stack != NULL && hints_list[j] != NULL) + pcre_assign_jit_stack(hints_list[j], NULL, jit_stack); +#endif } /* If --match-limit or --recursion-limit was set, put the value(s) into the @@ -2689,6 +2715,9 @@ for (; i < argc; i++) } EXIT: +#ifdef SUPPORT_PCREGREP_JIT +if (jit_stack != NULL) pcre_jit_stack_free(jit_stack); +#endif if (main_buffer != NULL) free(main_buffer); if (pattern_list != NULL) { diff --git a/testdata/grepoutput b/testdata/grepoutput index 07e8586..9b3fb29 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -391,7 +391,7 @@ pcregrep: pcre_exec() gave error -8 while matching this text: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -pcregrep: Error -8 or -21 means that a resource limit was exceeded. +pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded. pcregrep: Check your regex for nested unlimited loops. ---------------------------- Test 38 ------------------------------ This line contains a binary zero here > @@ -515,7 +515,7 @@ This is a file of miscellaneous text that is used as test data for checking that the pcregrep command is working correctly. The file must be more than 24K long so that it needs more than a single read -pcregrep: Error -8 or -21 means that a resource limit was exceeded. +pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded. pcregrep: Check your regex for nested unlimited loops. RC=1 ---------------------------- Test 63 ----------------------------- @@ -525,7 +525,7 @@ This is a file of miscellaneous text that is used as test data for checking that the pcregrep command is working correctly. The file must be more than 24K long so that it needs more than a single read -pcregrep: Error -8 or -21 means that a resource limit was exceeded. +pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded. pcregrep: Check your regex for nested unlimited loops. RC=1 ---------------------------- Test 64 ------------------------------ |