diff options
author | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:38:49 +0000 |
---|---|---|
committer | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:38:49 +0000 |
commit | 0c8732c8583c7e31476c0ec1c0ac92cc7e5f8bc0 (patch) | |
tree | 97b11a15f98ccfb7a200cc428c3bed36fd023f0f | |
parent | c87b6bbacc291c0a1e1d8a396de1b621151a7822 (diff) | |
download | pcre-0c8732c8583c7e31476c0ec1c0ac92cc7e5f8bc0.tar.gz |
Load pcre-2.02 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@27 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 21 | ||||
-rw-r--r-- | Makefile | 18 | ||||
-rw-r--r-- | README | 8 | ||||
-rw-r--r-- | Tech.Notes | 2 | ||||
-rw-r--r-- | dftables.c (renamed from deftables.c) | 6 | ||||
-rw-r--r-- | internal.h | 4 | ||||
-rw-r--r-- | maketables.c | 12 | ||||
-rw-r--r-- | pcre.3 | 43 | ||||
-rw-r--r-- | pcre.c | 13 | ||||
-rw-r--r-- | pcre.h | 2 | ||||
-rw-r--r-- | pcreposix.3 | 2 | ||||
-rw-r--r-- | pcreposix.c | 2 | ||||
-rw-r--r-- | pcreposix.h | 2 | ||||
-rw-r--r-- | pcretest.c | 19 | ||||
-rw-r--r-- | pgrep.1 | 2 | ||||
-rw-r--r-- | study.c | 11 | ||||
-rw-r--r-- | testinput3 | 25 | ||||
-rw-r--r-- | testoutput | 2 | ||||
-rw-r--r-- | testoutput2 | 2 | ||||
-rw-r--r-- | testoutput3 | 59 | ||||
-rw-r--r-- | testoutput4 | 2 |
21 files changed, 208 insertions, 49 deletions
@@ -2,6 +2,27 @@ ChangeLog for PCRE ------------------ +Version 2.02 14-Jan-99 +---------------------- + +1. Initialized the working variables associated with each extraction so that +their saving and restoring doesn't refer to uninitialized store. + +2. Put dummy code into study.c in order to trick the optimizer of the IBM C +compiler for OS/2 into generating correct code. Apparently IBM isn't going to +fix the problem. + +3. Pcretest: the timing code wasn't using LOOPREPEAT for timing execution +calls, and wasn't printing the correct value for compiling calls. Increased the +default value of LOOPREPEAT, and the number of significant figures in the +times. + +4. Changed "/bin/rm" in the Makefile to "-rm" so it works on Windows NT. + +5. Renamed "deftables" as "dftables" to get it down to 8 characters, to avoid +a building problem on Windows NT with a FAT file system. + + Version 2.01 21-Oct-98 ---------------------- @@ -27,15 +27,15 @@ pgrep: libpcre.a pgrep.o $(CC) $(CFLAGS) -o pgrep pgrep.o libpcre.a pcretest: libpcre.a libpcreposix.a pcretest.o - $(CC) $(CFLAGS) -o pcretest pcretest.o libpcre.a libpcreposix.a + $(PURIFY) $(CC) $(CFLAGS) -o pcretest pcretest.o libpcre.a libpcreposix.a libpcre.a: $(OBJ) - /bin/rm -f libpcre.a + -rm -f libpcre.a $(AR) libpcre.a $(OBJ) $(RANLIB) libpcre.a libpcreposix.a: pcreposix.o - /bin/rm -f libpcreposix.a + -rm -f libpcreposix.a $(AR) libpcreposix.a pcreposix.o $(RANLIB) libpcreposix.a @@ -59,17 +59,17 @@ pgrep.o: pgrep.c pcre.h Makefile # An auxiliary program makes the default character table source -chartables.c: deftables - ./deftables >chartables.c +chartables.c: dftables + ./dftables >chartables.c -deftables: deftables.c maketables.c pcre.h internal.h Makefile - $(CC) -o deftables $(CFLAGS) deftables.c +dftables: dftables.c maketables.c pcre.h internal.h Makefile + $(CC) -o dftables $(CFLAGS) dftables.c -# We deliberately omit deftables and chartables.c from 'make clean'; once made +# We deliberately omit dftables and chartables.c from 'make clean'; once made # chartables.c shouldn't change, and if people have edited the tables by hand, # you don't want to throw them away. -clean:; /bin/rm -f *.o *.a pcretest pgrep +clean:; -rm -f *.o *.a pcretest pgrep runtest: all ./RunTest @@ -27,7 +27,7 @@ The distribution should contain the following files: Tech.Notes notes on the encoding pcre.3 man page for the functions pcreposix.3 man page for the POSIX wrapper API - deftables.c auxiliary program for building chartables.c + dftables.c auxiliary program for building chartables.c maketables.c ) study.c ) source of pcre.c ) the functions @@ -115,8 +115,8 @@ is passed as NULL, a set of default tables that is built into the binary is used. The source file called chartables.c contains the default set of tables. This is -not supplied in the distribution, but is built by the program deftables -(compiled from deftables.c), which uses the ANSI C character handling functions +not supplied in the distribution, but is built by the program dftables +(compiled from dftables.c), which uses the ANSI C character handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to build the table sources. This means that the default C locale set your system will control the contents of the tables. You can change the default tables by editing @@ -291,4 +291,4 @@ contains malformed regular expressions, in order to check that PCRE diagnoses them correctly. Philip Hazel <ph10@cam.ac.uk> -October 1998 +January 1999 @@ -234,4 +234,4 @@ the compiled data. Philip Hazel -October 1998 +January 1999 @@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1998 University of Cambridge + Copyright (c) 1997-1999 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any @@ -42,7 +42,7 @@ order to be consistent. */ #include "internal.h" -#define DEFTABLES +#define DFTABLES /* maketables.c notices this */ #include "maketables.c" @@ -139,4 +139,4 @@ printf(" */\n\n/* End of chartables.c */\n"); return 0; } -/* End of deftables.c */ +/* End of dftables.c */ @@ -3,7 +3,7 @@ *************************************************/ -#define PCRE_VERSION "2.01 21-Oct-1998" +#define PCRE_VERSION "2.02 14-Jan-1999" /* This is a library of functions to support regular expressions whose syntax @@ -12,7 +12,7 @@ the file Tech.Notes for some information on the internals. Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1998 University of Cambridge + Copyright (c) 1997-1999 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any diff --git a/maketables.c b/maketables.c index 370a0e9..01943d3 100644 --- a/maketables.c +++ b/maketables.c @@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1998 University of Cambridge + Copyright (c) 1997-1999 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any @@ -31,10 +31,10 @@ See the file Tech.Notes for some information on the internals. /* This file is compiled on its own as part of the PCRE library. However, -it is also included in the compilation of deftables.c, in which case the macro -DEFTABLES is defined. */ +it is also included in the compilation of dftables.c, in which case the macro +DFTABLES is defined. */ -#ifndef DEFTABLES +#ifndef DFTABLES #include "internal.h" #endif @@ -48,7 +48,7 @@ DEFTABLES is defined. */ a pointer to them. They are build using the ctype functions, and consequently their contents will depend upon the current locale setting. When compiled as part of the library, the store is obtained via pcre_malloc(), but when compiled -inside deftables, use malloc(). +inside dftables, use malloc(). Arguments: none Returns: pointer to the contiguous block of data @@ -60,7 +60,7 @@ pcre_maketables(void) unsigned char *yield, *p; int i; -#ifndef DEFTABLES +#ifndef DFTABLES yield = (pcre_malloc)(tables_length); #else yield = malloc(tables_length); @@ -1090,17 +1090,24 @@ same length of string. An assertion such as (?<=ab(c|de)) -is not permitted, because its single branch can match two different lengths, -but it is acceptable if rewritten to use two branches: +is not permitted, because its single top-level branch can match two different +lengths, but it is acceptable if rewritten to use two top-level branches: (?<=abc|abde) The implementation of lookbehind assertions is, for each alternative, to temporarily move the current position back by the fixed width and then try to match. If there are insufficient characters before the current position, the -match is deemed to fail. +match is deemed to fail. Lookbehinds in conjunction with once-only subpatterns +can be particularly useful for matching at the ends of strings; an example is +given at the end of the section on once-only subpatterns. -Assertions can be nested in any combination. For example, +Several assertions (of any sort) may occur in succession. For example, + + (?<=\\d{3})(?<!999)foo + +matches "foo" preceded by three digits that are not "999". Furthermore, +assertions can be nested in any combination. For example, (?<=(?<!foo)bar)baz @@ -1157,6 +1164,32 @@ number of digits they match in order to make the rest of the pattern match, This construction can of course contain arbitrarily complicated subpatterns, and it can be nested. +Once-only subpatterns can be used in conjunction with lookbehind assertions to +specify efficient matching at the end of the subject string. Consider a simple +pattern such as + + abcd$ + +when applied to a long string which does not match it. Because matching +proceeds from left to right, PCRE will look for each "a" in the subject and +then see if what follows matches the rest of the pattern. If the pattern is +specified as + + .*abcd$ + +then the initial .* matches the entire string at first, but when this fails, it +backtracks to match all but the last character, then all but the last two +characters, and so on. Once again the search for "a" covers the entire string, +from right to left, so we are no better off. However, if the pattern is written +as + + (?>.*)(?<=abcd) + +then there can be no backtracking for the .* item; it can match only the entire +string. The subsequent lookbehind assertion does a single test on the last four +characters. If it fails, the match fails immediately. For long strings, this +approach makes a significant difference to the processing time. + .SH CONDITIONAL SUBPATTERNS It is possible to cause the matching process to obey a subpattern @@ -1236,4 +1269,4 @@ Cambridge CB2 3QG, England. .br Phone: +44 1223 334714 -Copyright (c) 1998 University of Cambridge. +Copyright (c) 1997-1999 University of Cambridge. @@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals. Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1998 University of Cambridge + Copyright (c) 1997-1999 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any @@ -4033,6 +4033,17 @@ in the pattern. */ resetcount = 2 + re->top_bracket * 2; if (resetcount > offsetcount) resetcount = ocount; +/* Reset the working variable associated with each extraction. These should +never be used unless previously set, but they get saved and restored, and so we +initialize them to avoid reading uninitialized locations. */ + +if (match_block.offset_vector != NULL) + { + register int *iptr = match_block.offset_vector + ocount; + register int *iend = iptr - resetcount/2 + 1; + while (--iptr >= iend) *iptr = -1; + } + /* Set up the first character to match, if available. The first_char value is never set for an anchored regular expression, but the anchoring may be forced at run time, so we have to test for anchoring. The first char may be unset for @@ -2,7 +2,7 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* Copyright (c) 1998 University of Cambridge */ +/* Copyright (c) 1997-1999 University of Cambridge */ #ifndef _PCRE_H #define _PCRE_H diff --git a/pcreposix.3 b/pcreposix.3 index 017b977..40601c4 100644 --- a/pcreposix.3 +++ b/pcreposix.3 @@ -132,4 +132,4 @@ Cambridge CB2 3QG, England. .br Phone: +44 1223 334714 -Copyright (c) 1998 University of Cambridge. +Copyright (c) 1997-1999 University of Cambridge. diff --git a/pcreposix.c b/pcreposix.c index 63b99ef..4470676 100644 --- a/pcreposix.c +++ b/pcreposix.c @@ -12,7 +12,7 @@ functions. Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1998 University of Cambridge + Copyright (c) 1997-1999 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any diff --git a/pcreposix.h b/pcreposix.h index 91636aa..208db35 100644 --- a/pcreposix.h +++ b/pcreposix.h @@ -2,7 +2,7 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* Copyright (c) 1998 University of Cambridge */ +/* Copyright (c) 1997-1999 University of Cambridge */ #ifndef _PCREPOSIX_H #define _PCREPOSIX_H @@ -22,7 +22,7 @@ #endif #endif -#define LOOPREPEAT 10000 +#define LOOPREPEAT 20000 static FILE *outfile; @@ -499,8 +499,9 @@ while (!done) if (re != NULL) free(re); } time_taken = clock() - start_time; - fprintf(outfile, "Compile time %.2f milliseconds\n", - ((double)time_taken)/(4 * CLOCKS_PER_SEC)); + fprintf(outfile, "Compile time %.3f milliseconds\n", + ((double)time_taken * 1000.0) / + ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC)); } re = pcre_compile((char *)p, options, &error, &erroroffset, tables); @@ -586,8 +587,9 @@ while (!done) extra = pcre_study(re, study_options, &error); time_taken = clock() - start_time; if (extra != NULL) free(extra); - fprintf(outfile, " Study time %.2f milliseconds\n", - ((double)time_taken)/(4 * CLOCKS_PER_SEC)); + fprintf(outfile, " Study time %.3f milliseconds\n", + ((double)time_taken * 1000.0)/ + ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC)); } extra = pcre_study(re, study_options, &error); @@ -765,12 +767,13 @@ while (!done) register int i; clock_t time_taken; clock_t start_time = clock(); - for (i = 0; i < 4000; i++) + for (i = 0; i < LOOPREPEAT; i++) count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets, size_offsets); time_taken = clock() - start_time; - fprintf(outfile, "Execute time %.2f milliseconds\n", - ((double)time_taken)/(4 * CLOCKS_PER_SEC)); + fprintf(outfile, "Execute time %.3f milliseconds\n", + ((double)time_taken * 1000.0)/ + ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC)); } count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets, @@ -69,4 +69,4 @@ for syntax errors or inacessible files (even if matches were found). .SH AUTHOR Philip Hazel <ph10@cam.ac.uk> .br -Copyright (c) 1998 University of Cambridge. +Copyright (c) 1997-1999 University of Cambridge. @@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals. Written by: Philip Hazel <ph10@cam.ac.uk> - Copyright (c) 1998 University of Cambridge + Copyright (c) 1997-1999 University of Cambridge ----------------------------------------------------------------------------- Permission is granted to anyone to use this software for any purpose on any @@ -85,6 +85,14 @@ set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless, { register int c; +/* This next statement and the later reference to dummy are here in order to +trick the optimizer of the IBM C compiler for OS/2 into generating correct +code. Apparently IBM isn't going to fix the problem, and we would rather not +disable optimization (in this module it actually makes a big difference, and +the pcre module can use all the optimization it can get). */ + +volatile int dummy; + do { const uschar *tcode = code + 3; @@ -132,6 +140,7 @@ do case OP_BRAMINZERO: if (!set_start_bits(++tcode, start_bits, caseless, cd)) return FALSE; + dummy = 1; do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT); tcode += 3; try_next = TRUE; @@ -1599,5 +1599,30 @@ /b\z/ a\nb *** Failers + +/^(?>(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/ + a + abc + a-b + 0-9 + a.b + 5.6.7 + the.quick.brown.fox + a100.b200.300c + 12-ab.1245 + ***Failers + \ + .a + -a + a- + a. + a_b + a.- + a.. + ab..bc + the.quick.brown.fox- + the.quick.brown.fox. + the.quick.brown.fox_ + the.quick.brown.fox+ / End of test input / @@ -1,4 +1,4 @@ -PCRE version 2.01 21-Oct-1998 +PCRE version 2.02 14-Jan-1999 /the quick brown fox/ the quick brown fox diff --git a/testoutput2 b/testoutput2 index c2e0148..34dad57 100644 --- a/testoutput2 +++ b/testoutput2 @@ -1,4 +1,4 @@ -PCRE version 2.01 21-Oct-1998 +PCRE version 2.02 14-Jan-1999 /(a)b|/ Identifying subpattern count = 1 diff --git a/testoutput3 b/testoutput3 index a5d960c..18a07ef 100644 --- a/testoutput3 +++ b/testoutput3 @@ -1,4 +1,4 @@ -PCRE version 2.01 21-Oct-1998 +PCRE version 2.02 14-Jan-1999 /(?<!bar)foo/ foo @@ -2748,6 +2748,63 @@ No match 0: b *** Failers No match + +/^(?>(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/ + a + 0: a + 1: + abc + 0: abc + 1: + a-b + 0: a-b + 1: + 0-9 + 0: 0-9 + 1: + a.b + 0: a.b + 1: + 5.6.7 + 0: 5.6.7 + 1: + the.quick.brown.fox + 0: the.quick.brown.fox + 1: + a100.b200.300c + 0: a100.b200.300c + 1: + 12-ab.1245 + 0: 12-ab.1245 + 1: + ***Failers +No match + \ +No match + .a +No match + -a +No match + a- +No match + a. +No match + a_b +No match + a.- +No match + a.. +No match + ab..bc +No match + the.quick.brown.fox- +No match + the.quick.brown.fox. +No match + the.quick.brown.fox_ +No match + the.quick.brown.fox+ +No match / End of test input / diff --git a/testoutput4 b/testoutput4 index 9848f5a..c72a1f3 100644 --- a/testoutput4 +++ b/testoutput4 @@ -1,4 +1,4 @@ -PCRE version 2.01 21-Oct-1998 +PCRE version 2.02 14-Jan-1999 /^[\w]+/ *** Failers |