summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:39:54 +0000
committernigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:39:54 +0000
commit77c897711a9b8aeb05bbbc34cbf8fac9d4bddebf (patch)
tree9d799e9b16c71f75e24bda424003937bb05d4123
parenta9e8c332d367f8ddb17b80729591617196829bb0 (diff)
downloadpcre-77c897711a9b8aeb05bbbc34cbf8fac9d4bddebf.tar.gz
Load pcre-3.8 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@59 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog12
-rwxr-xr-xconfigure10
-rw-r--r--configure.in4
-rw-r--r--pcre.c26
-rw-r--r--pcretest.c23
-rw-r--r--testdata/testinput626
-rw-r--r--testdata/testoutput12
-rw-r--r--testdata/testoutput22
-rw-r--r--testdata/testoutput32
-rw-r--r--testdata/testoutput42
-rw-r--r--testdata/testoutput52
-rw-r--r--testdata/testoutput6180
12 files changed, 229 insertions, 62 deletions
diff --git a/ChangeLog b/ChangeLog
index e7eaf9f..b53be1c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,13 @@
ChangeLog for PCRE
------------------
+Version 3.8 18-Dec-01
+---------------------
+
+1. The experimental UTF-8 code was completely screwed up. It was packing the
+bytes in the wrong order. How dumb can you get?
+
+
Version 3.7 29-Oct-01
---------------------
@@ -8,6 +15,11 @@ Version 3.7 29-Oct-01
This caused pcretest, when used on the test data, to segfault. Unfortunately,
this didn't happen under Solaris 8, where I normally test things.
+2. The Makefile had to be changed to make it work on BSD systems, where 'make'
+doesn't seem to recognize that ./xxx and xxx are the same file. (This entry
+isn't in ChangeLog distributed with 3.7 because I forgot when I hastily made
+this fix an hour or so after the initial 3.7 release.)
+
Version 3.6 23-Oct-01
---------------------
diff --git a/configure b/configure
index be47c55..0cd1623 100755
--- a/configure
+++ b/configure
@@ -738,12 +738,6 @@ if test "$ac_init_help" = "long"; then
# The list generated by autoconf has been trimmed to remove many
# options that are totally irrelevant to PCRE (e.g. relating to X),
# or are not supported by its Makefile.
- # The list generated by autoconf has been trimmed to remove many
- # options that are totally irrelevant to PCRE (e.g. relating to X),
- # or are not supported by its Makefile.
- # The list generated by autoconf has been trimmed to remove many
- # options that are totally irrelevant to PCRE (e.g. relating to X),
- # or are not supported by its Makefile.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<EOF
\`configure' configures this package to adapt to many kinds of systems.
@@ -1097,8 +1091,8 @@ rm -f conftest.sh
ac_config_headers="$ac_config_headers config.h:config.in"
PCRE_MAJOR=3
-PCRE_MINOR=7
-PCRE_DATE=29-Oct-2001
+PCRE_MINOR=8
+PCRE_DATE=18-Dec-2001
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
PCRE_LIB_VERSION=0:1:0
diff --git a/configure.in b/configure.in
index 3d2deee..630401b 100644
--- a/configure.in
+++ b/configure.in
@@ -17,8 +17,8 @@ dnl digits for minor numbers less than 10. There are unlikely to be
dnl that many releases anyway.
PCRE_MAJOR=3
-PCRE_MINOR=7
-PCRE_DATE=29-Oct-2001
+PCRE_MINOR=8
+PCRE_DATE=18-Dec-2001
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
dnl Provide versioning information for libtool shared libraries that
diff --git a/pcre.c b/pcre.c
index 2f379d9..ad3ddc7 100644
--- a/pcre.c
+++ b/pcre.c
@@ -211,12 +211,12 @@ byte-mode, and more complicated ones for UTF-8 characters. */
if (md->utf8 && (c & 0xc0) == 0xc0) \
{ \
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
- int s = 6 - a; /* Amount to shift next byte */ \
- c &= utf8_table3[a]; /* Low order bits from first byte */ \
+ int s = 6*a; \
+ c = (c & utf8_table3[a]) << s; \
while (a-- > 0) \
{ \
+ s -= 6; \
c |= (*eptr++ & 0x3f) << s; \
- s += 6; \
} \
}
@@ -229,12 +229,12 @@ byte-mode, and more complicated ones for UTF-8 characters. */
{ \
int i; \
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
- int s = 6 - a; /* Amount to shift next byte */ \
- c &= utf8_table3[a]; /* Low order bits from first byte */ \
+ int s = 6*a; \
+ c = (c & utf8_table3[a]) << s; \
for (i = 1; i <= a; i++) \
{ \
+ s -= 6; \
c |= (eptr[i] & 0x3f) << s; \
- s += 6; \
} \
len += a; \
}
@@ -309,13 +309,13 @@ ord2utf8(int cvalue, uschar *buffer)
register int i, j;
for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
if (cvalue <= utf8_table1[i]) break;
-*buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
-cvalue >>= 6 - i;
-for (j = 0; j < i; j++)
- {
- *buffer++ = 0x80 | (cvalue & 0x3f);
- cvalue >>= 6;
- }
+buffer += i;
+for (j = i; j > 0; j--)
+ {
+ *buffer-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*buffer = utf8_table2[i] | cvalue;
return i + 1;
}
#endif
diff --git a/pcretest.c b/pcretest.c
index 0fed5da..f04443a 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -73,13 +73,14 @@ for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
if (cvalue <= utf8_table1[i]) break;
if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
if (cvalue < 0) return -1;
-*buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
-cvalue >>= 6 - i;
-for (j = 0; j < i; j++)
- {
- *buffer++ = 0x80 | (cvalue & 0x3f);
- cvalue >>= 6;
- }
+
+buffer += i;
+for (j = i; j > 0; j--)
+ {
+ *buffer-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*buffer = utf8_table2[i] | cvalue;
return i + 1;
}
@@ -117,15 +118,15 @@ if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
/* i now has a value in the range 1-5 */
-d = c & utf8_table3[i];
-s = 6 - i;
+s = 6*i;
+d = (c & utf8_table3[i]) << s;
for (j = 0; j < i; j++)
{
c = *buffer++;
if ((c & 0xc0) != 0x80) return -(j+1);
+ s -= 6;
d |= (c & 0x3f) << s;
- s += 6;
}
/* Check that encoding was the correct unique one */
@@ -460,7 +461,7 @@ while (argc > 1 && argv[op][0] == '-')
else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
- ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
+ ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
{
op++;
argc--;
diff --git a/testdata/testinput6 b/testdata/testinput6
index 1ccaa0d..0074851 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -27,6 +27,32 @@
/\xff/8D
+/\x{0041}\x{2262}\x{0391}\x{002e}/D8
+ \x{0041}\x{2262}\x{0391}\x{002e}
+
+/\x{D55c}\x{ad6d}\x{C5B4}/D8
+ \x{D55c}\x{ad6d}\x{C5B4}
+
+/\x{65e5}\x{672c}\x{8a9e}/D8
+ \x{65e5}\x{672c}\x{8a9e}
+
+/\x{80}/D8
+
+/\x{084}/D8
+
+/\x{104}/D8
+
+/\x{861}/D8
+
+/\x{212ab}/D8
+
+/.{3,5}X/D8
+ \x{212ab}\x{212ab}\x{212ab}\x{861}X
+
+
+/.{3,5}?/D8
+ \x{212ab}\x{212ab}\x{212ab}\x{861}
+
/-- These tests are here rather than in testinput5 because Perl 5.6 has --/
/-- some problems with UTF-8 support, in the area of \x{..} where the --/
/-- value is < 255. It grumbles about invalid UTF-8 strings. --/
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 41a5346..5fa1e6e 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -1,4 +1,4 @@
-PCRE version 3.7 29-Oct-2001
+PCRE version 3.8 18-Dec-2001
/the quick brown fox/
the quick brown fox
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 91236df..e1e60ce 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -1,4 +1,4 @@
-PCRE version 3.7 29-Oct-2001
+PCRE version 3.8 18-Dec-2001
/(a)b|/
Capturing subpattern count = 1
diff --git a/testdata/testoutput3 b/testdata/testoutput3
index 54b146b..9e297d5 100644
--- a/testdata/testoutput3
+++ b/testdata/testoutput3
@@ -1,4 +1,4 @@
-PCRE version 3.7 29-Oct-2001
+PCRE version 3.8 18-Dec-2001
/(?<!bar)foo/
foo
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 6ee711d..61d0505 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -1,4 +1,4 @@
-PCRE version 3.7 29-Oct-2001
+PCRE version 3.8 18-Dec-2001
/^[\w]+/
*** Failers
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 39bc1f1..9213199 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -1,4 +1,4 @@
-PCRE version 3.7 29-Oct-2001
+PCRE version 3.8 18-Dec-2001
/-- Because of problems with Perl 5.6 in handling UTF-8 vs non UTF-8 --/
/-- strings automatically, do not use the \x{} construct except with --/
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index e00d2b7..8fa6682 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -1,82 +1,82 @@
-PCRE version 3.7 29-Oct-2001
+PCRE version 3.8 18-Dec-2001
/\x{100}/8DM
Memory allocation (code space): 11
------------------------------------------------------------------
0 7 Bra 0
- 3 2 \xc0\x88
+ 3 2 \xc4\x80
7 7 Ket
10 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
-First char = 192
-Need char = 136
+First char = 196
+Need char = 128
/\x{1000}/8DM
Memory allocation (code space): 12
------------------------------------------------------------------
0 8 Bra 0
- 3 3 \xe0\x80\x84
+ 3 3 \xe1\x80\x80
8 8 Ket
11 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
-First char = 224
-Need char = 132
+First char = 225
+Need char = 128
/\x{10000}/8DM
Memory allocation (code space): 13
------------------------------------------------------------------
0 9 Bra 0
- 3 4 \xf0\x80\x80\x82
+ 3 4 \xf0\x90\x80\x80
9 9 Ket
12 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
First char = 240
-Need char = 130
+Need char = 128
/\x{100000}/8DM
Memory allocation (code space): 13
------------------------------------------------------------------
0 9 Bra 0
- 3 4 \xf0\x80\x80\xa0
+ 3 4 \xf4\x80\x80\x80
9 9 Ket
12 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
-First char = 240
-Need char = 160
+First char = 244
+Need char = 128
/\x{1000000}/8DM
Memory allocation (code space): 14
------------------------------------------------------------------
0 10 Bra 0
- 3 5 \xf8\x80\x80\x80\x90
+ 3 5 \xf9\x80\x80\x80\x80
10 10 Ket
13 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
-First char = 248
-Need char = 144
+First char = 249
+Need char = 128
/\x{4000000}/8DM
Memory allocation (code space): 15
------------------------------------------------------------------
0 11 Bra 0
- 3 6 \xfc\x80\x80\x80\x80\x82
+ 3 6 \xfc\x84\x80\x80\x80\x80
11 11 Ket
14 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
First char = 252
-Need char = 130
+Need char = 128
/\x{7fffFFFF}/8DM
Memory allocation (code space): 15
@@ -121,26 +121,160 @@ Failed: character value in \x{...} sequence is too large at offset 12
/\x80/8D
------------------------------------------------------------------
0 7 Bra 0
- 3 2 \xc0\x84
+ 3 2 \xc2\x80
7 7 Ket
10 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
-First char = 192
-Need char = 132
+First char = 194
+Need char = 128
/\xff/8D
------------------------------------------------------------------
0 7 Bra 0
- 3 2 \xdf\x87
+ 3 2 \xc3\xbf
+ 7 7 Ket
+ 10 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+First char = 195
+Need char = 191
+
+/\x{0041}\x{2262}\x{0391}\x{002e}/D8
+------------------------------------------------------------------
+ 0 12 Bra 0
+ 3 7 A\xe2\x89\xa2\xce\x91.
+ 12 12 Ket
+ 15 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+First char = 'A'
+Need char = '.'
+ \x{0041}\x{2262}\x{0391}\x{002e}
+ 0: A\x{2262}\x{391}.
+
+/\x{D55c}\x{ad6d}\x{C5B4}/D8
+------------------------------------------------------------------
+ 0 14 Bra 0
+ 3 9 \xed\x95\x9c\xea\xb5\xad\xec\x96\xb4
+ 14 14 Ket
+ 17 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+First char = 237
+Need char = 180
+ \x{D55c}\x{ad6d}\x{C5B4}
+ 0: \x{d55c}\x{ad6d}\x{c5b4}
+
+/\x{65e5}\x{672c}\x{8a9e}/D8
+------------------------------------------------------------------
+ 0 14 Bra 0
+ 3 9 \xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e
+ 14 14 Ket
+ 17 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+First char = 230
+Need char = 158
+ \x{65e5}\x{672c}\x{8a9e}
+ 0: \x{65e5}\x{672c}\x{8a9e}
+
+/\x{80}/D8
+------------------------------------------------------------------
+ 0 7 Bra 0
+ 3 2 \xc2\x80
+ 7 7 Ket
+ 10 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+First char = 194
+Need char = 128
+
+/\x{084}/D8
+------------------------------------------------------------------
+ 0 7 Bra 0
+ 3 2 \xc2\x84
7 7 Ket
10 End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
-First char = 223
-Need char = 135
+First char = 194
+Need char = 132
+
+/\x{104}/D8
+------------------------------------------------------------------
+ 0 7 Bra 0
+ 3 2 \xc4\x84
+ 7 7 Ket
+ 10 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+First char = 196
+Need char = 132
+
+/\x{861}/D8
+------------------------------------------------------------------
+ 0 8 Bra 0
+ 3 3 \xe0\xa1\xa1
+ 8 8 Ket
+ 11 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+First char = 224
+Need char = 161
+
+/\x{212ab}/D8
+------------------------------------------------------------------
+ 0 9 Bra 0
+ 3 4 \xf0\xa1\x8a\xab
+ 9 9 Ket
+ 12 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+First char = 240
+Need char = 171
+
+/.{3,5}X/D8
+------------------------------------------------------------------
+ 0 14 Bra 0
+ 3 Any{3}
+ 7 Any{0,2}
+ 11 1 X
+ 14 14 Ket
+ 17 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+No first char
+Need char = 'X'
+ \x{212ab}\x{212ab}\x{212ab}\x{861}X
+ 0: \x{212ab}\x{212ab}\x{212ab}\x{861}X
+
+
+/.{3,5}?/D8
+------------------------------------------------------------------
+ 0 11 Bra 0
+ 3 Any{3}
+ 7 Any{0,2}?
+ 11 11 Ket
+ 14 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf8
+No first char
+No need char
+ \x{212ab}\x{212ab}\x{212ab}\x{861}
+ 0: \x{212ab}\x{212ab}\x{212ab}
/-- These tests are here rather than in testinput5 because Perl 5.6 has --/
/-- some problems with UTF-8 support, in the area of \x{..} where the --/