diff options
author | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:39:54 +0000 |
---|---|---|
committer | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:39:54 +0000 |
commit | 77c897711a9b8aeb05bbbc34cbf8fac9d4bddebf (patch) | |
tree | 9d799e9b16c71f75e24bda424003937bb05d4123 | |
parent | a9e8c332d367f8ddb17b80729591617196829bb0 (diff) | |
download | pcre-77c897711a9b8aeb05bbbc34cbf8fac9d4bddebf.tar.gz |
Load pcre-3.8 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@59 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 12 | ||||
-rwxr-xr-x | configure | 10 | ||||
-rw-r--r-- | configure.in | 4 | ||||
-rw-r--r-- | pcre.c | 26 | ||||
-rw-r--r-- | pcretest.c | 23 | ||||
-rw-r--r-- | testdata/testinput6 | 26 | ||||
-rw-r--r-- | testdata/testoutput1 | 2 | ||||
-rw-r--r-- | testdata/testoutput2 | 2 | ||||
-rw-r--r-- | testdata/testoutput3 | 2 | ||||
-rw-r--r-- | testdata/testoutput4 | 2 | ||||
-rw-r--r-- | testdata/testoutput5 | 2 | ||||
-rw-r--r-- | testdata/testoutput6 | 180 |
12 files changed, 229 insertions, 62 deletions
@@ -1,6 +1,13 @@ ChangeLog for PCRE ------------------ +Version 3.8 18-Dec-01 +--------------------- + +1. The experimental UTF-8 code was completely screwed up. It was packing the +bytes in the wrong order. How dumb can you get? + + Version 3.7 29-Oct-01 --------------------- @@ -8,6 +15,11 @@ Version 3.7 29-Oct-01 This caused pcretest, when used on the test data, to segfault. Unfortunately, this didn't happen under Solaris 8, where I normally test things. +2. The Makefile had to be changed to make it work on BSD systems, where 'make' +doesn't seem to recognize that ./xxx and xxx are the same file. (This entry +isn't in ChangeLog distributed with 3.7 because I forgot when I hastily made +this fix an hour or so after the initial 3.7 release.) + Version 3.6 23-Oct-01 --------------------- @@ -738,12 +738,6 @@ if test "$ac_init_help" = "long"; then # The list generated by autoconf has been trimmed to remove many # options that are totally irrelevant to PCRE (e.g. relating to X), # or are not supported by its Makefile. - # The list generated by autoconf has been trimmed to remove many - # options that are totally irrelevant to PCRE (e.g. relating to X), - # or are not supported by its Makefile. - # The list generated by autoconf has been trimmed to remove many - # options that are totally irrelevant to PCRE (e.g. relating to X), - # or are not supported by its Makefile. # This message is too long to be a string in the A/UX 3.1 sh. cat <<EOF \`configure' configures this package to adapt to many kinds of systems. @@ -1097,8 +1091,8 @@ rm -f conftest.sh ac_config_headers="$ac_config_headers config.h:config.in" PCRE_MAJOR=3 -PCRE_MINOR=7 -PCRE_DATE=29-Oct-2001 +PCRE_MINOR=8 +PCRE_DATE=18-Dec-2001 PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR} PCRE_LIB_VERSION=0:1:0 diff --git a/configure.in b/configure.in index 3d2deee..630401b 100644 --- a/configure.in +++ b/configure.in @@ -17,8 +17,8 @@ dnl digits for minor numbers less than 10. There are unlikely to be dnl that many releases anyway. PCRE_MAJOR=3 -PCRE_MINOR=7 -PCRE_DATE=29-Oct-2001 +PCRE_MINOR=8 +PCRE_DATE=18-Dec-2001 PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR} dnl Provide versioning information for libtool shared libraries that @@ -211,12 +211,12 @@ byte-mode, and more complicated ones for UTF-8 characters. */ if (md->utf8 && (c & 0xc0) == 0xc0) \ { \ int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int s = 6 - a; /* Amount to shift next byte */ \ - c &= utf8_table3[a]; /* Low order bits from first byte */ \ + int s = 6*a; \ + c = (c & utf8_table3[a]) << s; \ while (a-- > 0) \ { \ + s -= 6; \ c |= (*eptr++ & 0x3f) << s; \ - s += 6; \ } \ } @@ -229,12 +229,12 @@ byte-mode, and more complicated ones for UTF-8 characters. */ { \ int i; \ int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int s = 6 - a; /* Amount to shift next byte */ \ - c &= utf8_table3[a]; /* Low order bits from first byte */ \ + int s = 6*a; \ + c = (c & utf8_table3[a]) << s; \ for (i = 1; i <= a; i++) \ { \ + s -= 6; \ c |= (eptr[i] & 0x3f) << s; \ - s += 6; \ } \ len += a; \ } @@ -309,13 +309,13 @@ ord2utf8(int cvalue, uschar *buffer) register int i, j; for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) if (cvalue <= utf8_table1[i]) break; -*buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]); -cvalue >>= 6 - i; -for (j = 0; j < i; j++) - { - *buffer++ = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } +buffer += i; +for (j = i; j > 0; j--) + { + *buffer-- = 0x80 | (cvalue & 0x3f); + cvalue >>= 6; + } +*buffer = utf8_table2[i] | cvalue; return i + 1; } #endif @@ -73,13 +73,14 @@ for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) if (cvalue <= utf8_table1[i]) break; if (i >= sizeof(utf8_table1)/sizeof(int)) return 0; if (cvalue < 0) return -1; -*buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]); -cvalue >>= 6 - i; -for (j = 0; j < i; j++) - { - *buffer++ = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } + +buffer += i; +for (j = i; j > 0; j--) + { + *buffer-- = 0x80 | (cvalue & 0x3f); + cvalue >>= 6; + } +*buffer = utf8_table2[i] | cvalue; return i + 1; } @@ -117,15 +118,15 @@ if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ /* i now has a value in the range 1-5 */ -d = c & utf8_table3[i]; -s = 6 - i; +s = 6*i; +d = (c & utf8_table3[i]) << s; for (j = 0; j < i; j++) { c = *buffer++; if ((c & 0xc0) != 0x80) return -(j+1); + s -= 6; d |= (c & 0x3f) << s; - s += 6; } /* Check that encoding was the correct unique one */ @@ -460,7 +461,7 @@ while (argc > 1 && argv[op][0] == '-') else if (strcmp(argv[op], "-i") == 0) showinfo = 1; else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1; else if (strcmp(argv[op], "-o") == 0 && argc > 2 && - ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) + ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) { op++; argc--; diff --git a/testdata/testinput6 b/testdata/testinput6 index 1ccaa0d..0074851 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -27,6 +27,32 @@ /\xff/8D +/\x{0041}\x{2262}\x{0391}\x{002e}/D8 + \x{0041}\x{2262}\x{0391}\x{002e} + +/\x{D55c}\x{ad6d}\x{C5B4}/D8 + \x{D55c}\x{ad6d}\x{C5B4} + +/\x{65e5}\x{672c}\x{8a9e}/D8 + \x{65e5}\x{672c}\x{8a9e} + +/\x{80}/D8 + +/\x{084}/D8 + +/\x{104}/D8 + +/\x{861}/D8 + +/\x{212ab}/D8 + +/.{3,5}X/D8 + \x{212ab}\x{212ab}\x{212ab}\x{861}X + + +/.{3,5}?/D8 + \x{212ab}\x{212ab}\x{212ab}\x{861} + /-- These tests are here rather than in testinput5 because Perl 5.6 has --/ /-- some problems with UTF-8 support, in the area of \x{..} where the --/ /-- value is < 255. It grumbles about invalid UTF-8 strings. --/ diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 41a5346..5fa1e6e 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -1,4 +1,4 @@ -PCRE version 3.7 29-Oct-2001 +PCRE version 3.8 18-Dec-2001 /the quick brown fox/ the quick brown fox diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 91236df..e1e60ce 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -1,4 +1,4 @@ -PCRE version 3.7 29-Oct-2001 +PCRE version 3.8 18-Dec-2001 /(a)b|/ Capturing subpattern count = 1 diff --git a/testdata/testoutput3 b/testdata/testoutput3 index 54b146b..9e297d5 100644 --- a/testdata/testoutput3 +++ b/testdata/testoutput3 @@ -1,4 +1,4 @@ -PCRE version 3.7 29-Oct-2001 +PCRE version 3.8 18-Dec-2001 /(?<!bar)foo/ foo diff --git a/testdata/testoutput4 b/testdata/testoutput4 index 6ee711d..61d0505 100644 --- a/testdata/testoutput4 +++ b/testdata/testoutput4 @@ -1,4 +1,4 @@ -PCRE version 3.7 29-Oct-2001 +PCRE version 3.8 18-Dec-2001 /^[\w]+/ *** Failers diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 39bc1f1..9213199 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -1,4 +1,4 @@ -PCRE version 3.7 29-Oct-2001 +PCRE version 3.8 18-Dec-2001 /-- Because of problems with Perl 5.6 in handling UTF-8 vs non UTF-8 --/ /-- strings automatically, do not use the \x{} construct except with --/ diff --git a/testdata/testoutput6 b/testdata/testoutput6 index e00d2b7..8fa6682 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -1,82 +1,82 @@ -PCRE version 3.7 29-Oct-2001 +PCRE version 3.8 18-Dec-2001 /\x{100}/8DM Memory allocation (code space): 11 ------------------------------------------------------------------ 0 7 Bra 0 - 3 2 \xc0\x88 + 3 2 \xc4\x80 7 7 Ket 10 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 192 -Need char = 136 +First char = 196 +Need char = 128 /\x{1000}/8DM Memory allocation (code space): 12 ------------------------------------------------------------------ 0 8 Bra 0 - 3 3 \xe0\x80\x84 + 3 3 \xe1\x80\x80 8 8 Ket 11 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 224 -Need char = 132 +First char = 225 +Need char = 128 /\x{10000}/8DM Memory allocation (code space): 13 ------------------------------------------------------------------ 0 9 Bra 0 - 3 4 \xf0\x80\x80\x82 + 3 4 \xf0\x90\x80\x80 9 9 Ket 12 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 First char = 240 -Need char = 130 +Need char = 128 /\x{100000}/8DM Memory allocation (code space): 13 ------------------------------------------------------------------ 0 9 Bra 0 - 3 4 \xf0\x80\x80\xa0 + 3 4 \xf4\x80\x80\x80 9 9 Ket 12 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 240 -Need char = 160 +First char = 244 +Need char = 128 /\x{1000000}/8DM Memory allocation (code space): 14 ------------------------------------------------------------------ 0 10 Bra 0 - 3 5 \xf8\x80\x80\x80\x90 + 3 5 \xf9\x80\x80\x80\x80 10 10 Ket 13 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 248 -Need char = 144 +First char = 249 +Need char = 128 /\x{4000000}/8DM Memory allocation (code space): 15 ------------------------------------------------------------------ 0 11 Bra 0 - 3 6 \xfc\x80\x80\x80\x80\x82 + 3 6 \xfc\x84\x80\x80\x80\x80 11 11 Ket 14 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 First char = 252 -Need char = 130 +Need char = 128 /\x{7fffFFFF}/8DM Memory allocation (code space): 15 @@ -121,26 +121,160 @@ Failed: character value in \x{...} sequence is too large at offset 12 /\x80/8D ------------------------------------------------------------------ 0 7 Bra 0 - 3 2 \xc0\x84 + 3 2 \xc2\x80 7 7 Ket 10 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 192 -Need char = 132 +First char = 194 +Need char = 128 /\xff/8D ------------------------------------------------------------------ 0 7 Bra 0 - 3 2 \xdf\x87 + 3 2 \xc3\xbf + 7 7 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 195 +Need char = 191 + +/\x{0041}\x{2262}\x{0391}\x{002e}/D8 +------------------------------------------------------------------ + 0 12 Bra 0 + 3 7 A\xe2\x89\xa2\xce\x91. + 12 12 Ket + 15 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 'A' +Need char = '.' + \x{0041}\x{2262}\x{0391}\x{002e} + 0: A\x{2262}\x{391}. + +/\x{D55c}\x{ad6d}\x{C5B4}/D8 +------------------------------------------------------------------ + 0 14 Bra 0 + 3 9 \xed\x95\x9c\xea\xb5\xad\xec\x96\xb4 + 14 14 Ket + 17 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 237 +Need char = 180 + \x{D55c}\x{ad6d}\x{C5B4} + 0: \x{d55c}\x{ad6d}\x{c5b4} + +/\x{65e5}\x{672c}\x{8a9e}/D8 +------------------------------------------------------------------ + 0 14 Bra 0 + 3 9 \xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e + 14 14 Ket + 17 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 230 +Need char = 158 + \x{65e5}\x{672c}\x{8a9e} + 0: \x{65e5}\x{672c}\x{8a9e} + +/\x{80}/D8 +------------------------------------------------------------------ + 0 7 Bra 0 + 3 2 \xc2\x80 + 7 7 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 194 +Need char = 128 + +/\x{084}/D8 +------------------------------------------------------------------ + 0 7 Bra 0 + 3 2 \xc2\x84 7 7 Ket 10 End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -First char = 223 -Need char = 135 +First char = 194 +Need char = 132 + +/\x{104}/D8 +------------------------------------------------------------------ + 0 7 Bra 0 + 3 2 \xc4\x84 + 7 7 Ket + 10 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 196 +Need char = 132 + +/\x{861}/D8 +------------------------------------------------------------------ + 0 8 Bra 0 + 3 3 \xe0\xa1\xa1 + 8 8 Ket + 11 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 224 +Need char = 161 + +/\x{212ab}/D8 +------------------------------------------------------------------ + 0 9 Bra 0 + 3 4 \xf0\xa1\x8a\xab + 9 9 Ket + 12 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 240 +Need char = 171 + +/.{3,5}X/D8 +------------------------------------------------------------------ + 0 14 Bra 0 + 3 Any{3} + 7 Any{0,2} + 11 1 X + 14 14 Ket + 17 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +No first char +Need char = 'X' + \x{212ab}\x{212ab}\x{212ab}\x{861}X + 0: \x{212ab}\x{212ab}\x{212ab}\x{861}X + + +/.{3,5}?/D8 +------------------------------------------------------------------ + 0 11 Bra 0 + 3 Any{3} + 7 Any{0,2}? + 11 11 Ket + 14 End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +No first char +No need char + \x{212ab}\x{212ab}\x{212ab}\x{861} + 0: \x{212ab}\x{212ab}\x{212ab} /-- These tests are here rather than in testinput5 because Perl 5.6 has --/ /-- some problems with UTF-8 support, in the area of \x{..} where the --/ |