summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:38:49 +0000
committernigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:38:49 +0000
commit0c8732c8583c7e31476c0ec1c0ac92cc7e5f8bc0 (patch)
tree97b11a15f98ccfb7a200cc428c3bed36fd023f0f
parentc87b6bbacc291c0a1e1d8a396de1b621151a7822 (diff)
downloadpcre-0c8732c8583c7e31476c0ec1c0ac92cc7e5f8bc0.tar.gz
Load pcre-2.02 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@27 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog21
-rw-r--r--Makefile18
-rw-r--r--README8
-rw-r--r--Tech.Notes2
-rw-r--r--dftables.c (renamed from deftables.c)6
-rw-r--r--internal.h4
-rw-r--r--maketables.c12
-rw-r--r--pcre.343
-rw-r--r--pcre.c13
-rw-r--r--pcre.h2
-rw-r--r--pcreposix.32
-rw-r--r--pcreposix.c2
-rw-r--r--pcreposix.h2
-rw-r--r--pcretest.c19
-rw-r--r--pgrep.12
-rw-r--r--study.c11
-rw-r--r--testinput325
-rw-r--r--testoutput2
-rw-r--r--testoutput22
-rw-r--r--testoutput359
-rw-r--r--testoutput42
21 files changed, 208 insertions, 49 deletions
diff --git a/ChangeLog b/ChangeLog
index 438d942..435b90a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,27 @@ ChangeLog for PCRE
------------------
+Version 2.02 14-Jan-99
+----------------------
+
+1. Initialized the working variables associated with each extraction so that
+their saving and restoring doesn't refer to uninitialized store.
+
+2. Put dummy code into study.c in order to trick the optimizer of the IBM C
+compiler for OS/2 into generating correct code. Apparently IBM isn't going to
+fix the problem.
+
+3. Pcretest: the timing code wasn't using LOOPREPEAT for timing execution
+calls, and wasn't printing the correct value for compiling calls. Increased the
+default value of LOOPREPEAT, and the number of significant figures in the
+times.
+
+4. Changed "/bin/rm" in the Makefile to "-rm" so it works on Windows NT.
+
+5. Renamed "deftables" as "dftables" to get it down to 8 characters, to avoid
+a building problem on Windows NT with a FAT file system.
+
+
Version 2.01 21-Oct-98
----------------------
diff --git a/Makefile b/Makefile
index 4c5e42f..afa6316 100644
--- a/Makefile
+++ b/Makefile
@@ -27,15 +27,15 @@ pgrep: libpcre.a pgrep.o
$(CC) $(CFLAGS) -o pgrep pgrep.o libpcre.a
pcretest: libpcre.a libpcreposix.a pcretest.o
- $(CC) $(CFLAGS) -o pcretest pcretest.o libpcre.a libpcreposix.a
+ $(PURIFY) $(CC) $(CFLAGS) -o pcretest pcretest.o libpcre.a libpcreposix.a
libpcre.a: $(OBJ)
- /bin/rm -f libpcre.a
+ -rm -f libpcre.a
$(AR) libpcre.a $(OBJ)
$(RANLIB) libpcre.a
libpcreposix.a: pcreposix.o
- /bin/rm -f libpcreposix.a
+ -rm -f libpcreposix.a
$(AR) libpcreposix.a pcreposix.o
$(RANLIB) libpcreposix.a
@@ -59,17 +59,17 @@ pgrep.o: pgrep.c pcre.h Makefile
# An auxiliary program makes the default character table source
-chartables.c: deftables
- ./deftables >chartables.c
+chartables.c: dftables
+ ./dftables >chartables.c
-deftables: deftables.c maketables.c pcre.h internal.h Makefile
- $(CC) -o deftables $(CFLAGS) deftables.c
+dftables: dftables.c maketables.c pcre.h internal.h Makefile
+ $(CC) -o dftables $(CFLAGS) dftables.c
-# We deliberately omit deftables and chartables.c from 'make clean'; once made
+# We deliberately omit dftables and chartables.c from 'make clean'; once made
# chartables.c shouldn't change, and if people have edited the tables by hand,
# you don't want to throw them away.
-clean:; /bin/rm -f *.o *.a pcretest pgrep
+clean:; -rm -f *.o *.a pcretest pgrep
runtest: all
./RunTest
diff --git a/README b/README
index fb36b02..e169e46 100644
--- a/README
+++ b/README
@@ -27,7 +27,7 @@ The distribution should contain the following files:
Tech.Notes notes on the encoding
pcre.3 man page for the functions
pcreposix.3 man page for the POSIX wrapper API
- deftables.c auxiliary program for building chartables.c
+ dftables.c auxiliary program for building chartables.c
maketables.c )
study.c ) source of
pcre.c ) the functions
@@ -115,8 +115,8 @@ is passed as NULL, a set of default tables that is built into the binary is
used.
The source file called chartables.c contains the default set of tables. This is
-not supplied in the distribution, but is built by the program deftables
-(compiled from deftables.c), which uses the ANSI C character handling functions
+not supplied in the distribution, but is built by the program dftables
+(compiled from dftables.c), which uses the ANSI C character handling functions
such as isalnum(), isalpha(), isupper(), islower(), etc. to build the table
sources. This means that the default C locale set your system will control the
contents of the tables. You can change the default tables by editing
@@ -291,4 +291,4 @@ contains malformed regular expressions, in order to check that PCRE diagnoses
them correctly.
Philip Hazel <ph10@cam.ac.uk>
-October 1998
+January 1999
diff --git a/Tech.Notes b/Tech.Notes
index 9e5aefd..d564c7a 100644
--- a/Tech.Notes
+++ b/Tech.Notes
@@ -234,4 +234,4 @@ the compiled data.
Philip Hazel
-October 1998
+January 1999
diff --git a/deftables.c b/dftables.c
index b6c8e58..3e5d592 100644
--- a/deftables.c
+++ b/dftables.c
@@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by: Philip Hazel <ph10@cam.ac.uk>
- Copyright (c) 1998 University of Cambridge
+ Copyright (c) 1997-1999 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
@@ -42,7 +42,7 @@ order to be consistent. */
#include "internal.h"
-#define DEFTABLES
+#define DFTABLES /* maketables.c notices this */
#include "maketables.c"
@@ -139,4 +139,4 @@ printf(" */\n\n/* End of chartables.c */\n");
return 0;
}
-/* End of deftables.c */
+/* End of dftables.c */
diff --git a/internal.h b/internal.h
index 11ec37b..713e6c5 100644
--- a/internal.h
+++ b/internal.h
@@ -3,7 +3,7 @@
*************************************************/
-#define PCRE_VERSION "2.01 21-Oct-1998"
+#define PCRE_VERSION "2.02 14-Jan-1999"
/* This is a library of functions to support regular expressions whose syntax
@@ -12,7 +12,7 @@ the file Tech.Notes for some information on the internals.
Written by: Philip Hazel <ph10@cam.ac.uk>
- Copyright (c) 1998 University of Cambridge
+ Copyright (c) 1997-1999 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
diff --git a/maketables.c b/maketables.c
index 370a0e9..01943d3 100644
--- a/maketables.c
+++ b/maketables.c
@@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by: Philip Hazel <ph10@cam.ac.uk>
- Copyright (c) 1998 University of Cambridge
+ Copyright (c) 1997-1999 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
@@ -31,10 +31,10 @@ See the file Tech.Notes for some information on the internals.
/* This file is compiled on its own as part of the PCRE library. However,
-it is also included in the compilation of deftables.c, in which case the macro
-DEFTABLES is defined. */
+it is also included in the compilation of dftables.c, in which case the macro
+DFTABLES is defined. */
-#ifndef DEFTABLES
+#ifndef DFTABLES
#include "internal.h"
#endif
@@ -48,7 +48,7 @@ DEFTABLES is defined. */
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via pcre_malloc(), but when compiled
-inside deftables, use malloc().
+inside dftables, use malloc().
Arguments: none
Returns: pointer to the contiguous block of data
@@ -60,7 +60,7 @@ pcre_maketables(void)
unsigned char *yield, *p;
int i;
-#ifndef DEFTABLES
+#ifndef DFTABLES
yield = (pcre_malloc)(tables_length);
#else
yield = malloc(tables_length);
diff --git a/pcre.3 b/pcre.3
index 8ff9a0d..098813e 100644
--- a/pcre.3
+++ b/pcre.3
@@ -1090,17 +1090,24 @@ same length of string. An assertion such as
(?<=ab(c|de))
-is not permitted, because its single branch can match two different lengths,
-but it is acceptable if rewritten to use two branches:
+is not permitted, because its single top-level branch can match two different
+lengths, but it is acceptable if rewritten to use two top-level branches:
(?<=abc|abde)
The implementation of lookbehind assertions is, for each alternative, to
temporarily move the current position back by the fixed width and then try to
match. If there are insufficient characters before the current position, the
-match is deemed to fail.
+match is deemed to fail. Lookbehinds in conjunction with once-only subpatterns
+can be particularly useful for matching at the ends of strings; an example is
+given at the end of the section on once-only subpatterns.
-Assertions can be nested in any combination. For example,
+Several assertions (of any sort) may occur in succession. For example,
+
+ (?<=\\d{3})(?<!999)foo
+
+matches "foo" preceded by three digits that are not "999". Furthermore,
+assertions can be nested in any combination. For example,
(?<=(?<!foo)bar)baz
@@ -1157,6 +1164,32 @@ number of digits they match in order to make the rest of the pattern match,
This construction can of course contain arbitrarily complicated subpatterns,
and it can be nested.
+Once-only subpatterns can be used in conjunction with lookbehind assertions to
+specify efficient matching at the end of the subject string. Consider a simple
+pattern such as
+
+ abcd$
+
+when applied to a long string which does not match it. Because matching
+proceeds from left to right, PCRE will look for each "a" in the subject and
+then see if what follows matches the rest of the pattern. If the pattern is
+specified as
+
+ .*abcd$
+
+then the initial .* matches the entire string at first, but when this fails, it
+backtracks to match all but the last character, then all but the last two
+characters, and so on. Once again the search for "a" covers the entire string,
+from right to left, so we are no better off. However, if the pattern is written
+as
+
+ (?>.*)(?<=abcd)
+
+then there can be no backtracking for the .* item; it can match only the entire
+string. The subsequent lookbehind assertion does a single test on the last four
+characters. If it fails, the match fails immediately. For long strings, this
+approach makes a significant difference to the processing time.
+
.SH CONDITIONAL SUBPATTERNS
It is possible to cause the matching process to obey a subpattern
@@ -1236,4 +1269,4 @@ Cambridge CB2 3QG, England.
.br
Phone: +44 1223 334714
-Copyright (c) 1998 University of Cambridge.
+Copyright (c) 1997-1999 University of Cambridge.
diff --git a/pcre.c b/pcre.c
index bc09a47..320b8e2 100644
--- a/pcre.c
+++ b/pcre.c
@@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals.
Written by: Philip Hazel <ph10@cam.ac.uk>
- Copyright (c) 1998 University of Cambridge
+ Copyright (c) 1997-1999 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
@@ -4033,6 +4033,17 @@ in the pattern. */
resetcount = 2 + re->top_bracket * 2;
if (resetcount > offsetcount) resetcount = ocount;
+/* Reset the working variable associated with each extraction. These should
+never be used unless previously set, but they get saved and restored, and so we
+initialize them to avoid reading uninitialized locations. */
+
+if (match_block.offset_vector != NULL)
+ {
+ register int *iptr = match_block.offset_vector + ocount;
+ register int *iend = iptr - resetcount/2 + 1;
+ while (--iptr >= iend) *iptr = -1;
+ }
+
/* Set up the first character to match, if available. The first_char value is
never set for an anchored regular expression, but the anchoring may be forced
at run time, so we have to test for anchoring. The first char may be unset for
diff --git a/pcre.h b/pcre.h
index 3d81ad8..5224e25 100644
--- a/pcre.h
+++ b/pcre.h
@@ -2,7 +2,7 @@
* Perl-Compatible Regular Expressions *
*************************************************/
-/* Copyright (c) 1998 University of Cambridge */
+/* Copyright (c) 1997-1999 University of Cambridge */
#ifndef _PCRE_H
#define _PCRE_H
diff --git a/pcreposix.3 b/pcreposix.3
index 017b977..40601c4 100644
--- a/pcreposix.3
+++ b/pcreposix.3
@@ -132,4 +132,4 @@ Cambridge CB2 3QG, England.
.br
Phone: +44 1223 334714
-Copyright (c) 1998 University of Cambridge.
+Copyright (c) 1997-1999 University of Cambridge.
diff --git a/pcreposix.c b/pcreposix.c
index 63b99ef..4470676 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -12,7 +12,7 @@ functions.
Written by: Philip Hazel <ph10@cam.ac.uk>
- Copyright (c) 1998 University of Cambridge
+ Copyright (c) 1997-1999 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
diff --git a/pcreposix.h b/pcreposix.h
index 91636aa..208db35 100644
--- a/pcreposix.h
+++ b/pcreposix.h
@@ -2,7 +2,7 @@
* Perl-Compatible Regular Expressions *
*************************************************/
-/* Copyright (c) 1998 University of Cambridge */
+/* Copyright (c) 1997-1999 University of Cambridge */
#ifndef _PCREPOSIX_H
#define _PCREPOSIX_H
diff --git a/pcretest.c b/pcretest.c
index 6729fe7..9aaf981 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -22,7 +22,7 @@
#endif
#endif
-#define LOOPREPEAT 10000
+#define LOOPREPEAT 20000
static FILE *outfile;
@@ -499,8 +499,9 @@ while (!done)
if (re != NULL) free(re);
}
time_taken = clock() - start_time;
- fprintf(outfile, "Compile time %.2f milliseconds\n",
- ((double)time_taken)/(4 * CLOCKS_PER_SEC));
+ fprintf(outfile, "Compile time %.3f milliseconds\n",
+ ((double)time_taken * 1000.0) /
+ ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
}
re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
@@ -586,8 +587,9 @@ while (!done)
extra = pcre_study(re, study_options, &error);
time_taken = clock() - start_time;
if (extra != NULL) free(extra);
- fprintf(outfile, " Study time %.2f milliseconds\n",
- ((double)time_taken)/(4 * CLOCKS_PER_SEC));
+ fprintf(outfile, " Study time %.3f milliseconds\n",
+ ((double)time_taken * 1000.0)/
+ ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
}
extra = pcre_study(re, study_options, &error);
@@ -765,12 +767,13 @@ while (!done)
register int i;
clock_t time_taken;
clock_t start_time = clock();
- for (i = 0; i < 4000; i++)
+ for (i = 0; i < LOOPREPEAT; i++)
count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
size_offsets);
time_taken = clock() - start_time;
- fprintf(outfile, "Execute time %.2f milliseconds\n",
- ((double)time_taken)/(4 * CLOCKS_PER_SEC));
+ fprintf(outfile, "Execute time %.3f milliseconds\n",
+ ((double)time_taken * 1000.0)/
+ ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
}
count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,
diff --git a/pgrep.1 b/pgrep.1
index da1744d..49f81d3 100644
--- a/pgrep.1
+++ b/pgrep.1
@@ -69,4 +69,4 @@ for syntax errors or inacessible files (even if matches were found).
.SH AUTHOR
Philip Hazel <ph10@cam.ac.uk>
.br
-Copyright (c) 1998 University of Cambridge.
+Copyright (c) 1997-1999 University of Cambridge.
diff --git a/study.c b/study.c
index 8b18699..40f489b 100644
--- a/study.c
+++ b/study.c
@@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals.
Written by: Philip Hazel <ph10@cam.ac.uk>
- Copyright (c) 1998 University of Cambridge
+ Copyright (c) 1997-1999 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
@@ -85,6 +85,14 @@ set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
{
register int c;
+/* This next statement and the later reference to dummy are here in order to
+trick the optimizer of the IBM C compiler for OS/2 into generating correct
+code. Apparently IBM isn't going to fix the problem, and we would rather not
+disable optimization (in this module it actually makes a big difference, and
+the pcre module can use all the optimization it can get). */
+
+volatile int dummy;
+
do
{
const uschar *tcode = code + 3;
@@ -132,6 +140,7 @@ do
case OP_BRAMINZERO:
if (!set_start_bits(++tcode, start_bits, caseless, cd))
return FALSE;
+ dummy = 1;
do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
tcode += 3;
try_next = TRUE;
diff --git a/testinput3 b/testinput3
index 2816cf2..c27c780 100644
--- a/testinput3
+++ b/testinput3
@@ -1599,5 +1599,30 @@
/b\z/
a\nb
*** Failers
+
+/^(?>(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/
+ a
+ abc
+ a-b
+ 0-9
+ a.b
+ 5.6.7
+ the.quick.brown.fox
+ a100.b200.300c
+ 12-ab.1245
+ ***Failers
+ \
+ .a
+ -a
+ a-
+ a.
+ a_b
+ a.-
+ a..
+ ab..bc
+ the.quick.brown.fox-
+ the.quick.brown.fox.
+ the.quick.brown.fox_
+ the.quick.brown.fox+
/ End of test input /
diff --git a/testoutput b/testoutput
index 5f4bf92..37bf728 100644
--- a/testoutput
+++ b/testoutput
@@ -1,4 +1,4 @@
-PCRE version 2.01 21-Oct-1998
+PCRE version 2.02 14-Jan-1999
/the quick brown fox/
the quick brown fox
diff --git a/testoutput2 b/testoutput2
index c2e0148..34dad57 100644
--- a/testoutput2
+++ b/testoutput2
@@ -1,4 +1,4 @@
-PCRE version 2.01 21-Oct-1998
+PCRE version 2.02 14-Jan-1999
/(a)b|/
Identifying subpattern count = 1
diff --git a/testoutput3 b/testoutput3
index a5d960c..18a07ef 100644
--- a/testoutput3
+++ b/testoutput3
@@ -1,4 +1,4 @@
-PCRE version 2.01 21-Oct-1998
+PCRE version 2.02 14-Jan-1999
/(?<!bar)foo/
foo
@@ -2748,6 +2748,63 @@ No match
0: b
*** Failers
No match
+
+/^(?>(?(1)\.|())[^\W_](?>[a-z0-9-]*[^\W_])?)+$/
+ a
+ 0: a
+ 1:
+ abc
+ 0: abc
+ 1:
+ a-b
+ 0: a-b
+ 1:
+ 0-9
+ 0: 0-9
+ 1:
+ a.b
+ 0: a.b
+ 1:
+ 5.6.7
+ 0: 5.6.7
+ 1:
+ the.quick.brown.fox
+ 0: the.quick.brown.fox
+ 1:
+ a100.b200.300c
+ 0: a100.b200.300c
+ 1:
+ 12-ab.1245
+ 0: 12-ab.1245
+ 1:
+ ***Failers
+No match
+ \
+No match
+ .a
+No match
+ -a
+No match
+ a-
+No match
+ a.
+No match
+ a_b
+No match
+ a.-
+No match
+ a..
+No match
+ ab..bc
+No match
+ the.quick.brown.fox-
+No match
+ the.quick.brown.fox.
+No match
+ the.quick.brown.fox_
+No match
+ the.quick.brown.fox+
+No match
/ End of test input /
diff --git a/testoutput4 b/testoutput4
index 9848f5a..c72a1f3 100644
--- a/testoutput4
+++ b/testoutput4
@@ -1,4 +1,4 @@
-PCRE version 2.01 21-Oct-1998
+PCRE version 2.02 14-Jan-1999
/^[\w]+/
*** Failers