summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2008-07-07 17:45:23 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2008-07-07 17:45:23 +0000
commitbe88e9fc647d2d583bc3420bbe013230196fc443 (patch)
tree5fc7e858be9012349312388cd3aabf915ef0cfdd
parent798472a8be18bffaa324fa1c6cc052895a063e47 (diff)
downloadpcre-be88e9fc647d2d583bc3420bbe013230196fc443.tar.gz
Make pcretest generate a single byte for \x{} escapes in non-UTF-8 mode.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@355 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog5
-rw-r--r--pcretest.c16
-rw-r--r--testdata/testinput26
-rw-r--r--testdata/testinput54
-rw-r--r--testdata/testinput74
-rw-r--r--testdata/testoutput26
-rw-r--r--testdata/testoutput59
-rw-r--r--testdata/testoutput74
8 files changed, 41 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 3e5ccdf..a2dfe8f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,6 +21,11 @@ Version 8.0 02 Jul-08
4. Caseless matching was not working for non-ASCII characters in back
references. For example, /(\x{de})\1/8i was not matching \x{de}\x{fe}.
It now works when Unicode Property Support is available.
+
+5. In pcretest, an escape such as \x{de} in the data was always generating
+ a UTF-8 string, even in non-UTF-8 mode. Now it generates a single byte in
+ non-UTF-8 mode. If the value is greater than 255, it gives a warning about
+ truncation.
Version 7.7 07-May-08
diff --git a/pcretest.c b/pcretest.c
index ac8290d..31d0357 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -1806,9 +1806,19 @@ while (!done)
{
unsigned char buff8[8];
int ii, utn;
- utn = ord2utf8(c, buff8);
- for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
- c = buff8[ii]; /* Last byte */
+ if (use_utf8)
+ {
+ utn = ord2utf8(c, buff8);
+ for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
+ c = buff8[ii]; /* Last byte */
+ }
+ else
+ {
+ if (c > 255)
+ fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
+ "UTF-8 mode is not enabled.\n"
+ "** Truncation will probably give the wrong result.\n", c);
+ }
p = pt + 1;
break;
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index 8536d66..2b64546 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -1988,10 +1988,10 @@ a random value. /Ix
a\rb\<anycrlf>
/^abc./mgx<any>
- abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+ abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK
/abc.$/mgx<any>
- abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7\x{2028} abc8\x{2029} abc9
+ abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7 abc9
/a/<cr><any>
@@ -2147,7 +2147,7 @@ a random value. /Ix
abc\r\n\r\n
/abc.$/mgx<anycrlf>
- abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7\x{2028} abc8\x{2029} abc9
+ abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9
/^X/m
XABC
diff --git a/testdata/testinput5 b/testdata/testinput5
index 8a8e499..62e9d1e 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -473,4 +473,8 @@ can't tell the difference.) --/
** Failers
ab
+/(\x{de})\1/
+ \x{de}\x{de}
+ \x{123}
+
/ End of testinput5 /
diff --git a/testdata/testinput7 b/testdata/testinput7
index 5d593ee..5ec4827 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -4151,10 +4151,10 @@
a\rb\<any>
/^abc./mgx<any>
- abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+ abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK
/abc.$/mgx<any>
- abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7\x{2028} abc8\x{2029} abc9
+ abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9
/^a\Rb/<bsr_unicode>
a\nb
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 5dd2b4c..2ac018b 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -7851,7 +7851,7 @@ No match
No match
/^abc./mgx<any>
- abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+ abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK
0: abc1
0: abc2
0: abc3
@@ -7861,7 +7861,7 @@ No match
0: abc7
/abc.$/mgx<any>
- abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7\x{2028} abc8\x{2029} abc9
+ abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7 abc9
0: abc1
0: abc2
0: abc3
@@ -8163,7 +8163,7 @@ No match
0+
/abc.$/mgx<anycrlf>
- abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7\x{2028} abc8\x{2029} abc9
+ abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9
0: abc1
0: abc4
0: abc5
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 1c745f4..d36a246 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -1628,4 +1628,13 @@ No match
ab
No match
+/(\x{de})\1/
+ \x{de}\x{de}
+ 0: \xde\xde
+ 1: \xde
+ \x{123}
+** Character \x{123} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
/ End of testinput5 /
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 9ded29d..aef4b6c 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -6805,7 +6805,7 @@ No match
No match
/^abc./mgx<any>
- abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+ abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK
0: abc1
0: abc2
0: abc3
@@ -6815,7 +6815,7 @@ No match
0: abc7
/abc.$/mgx<any>
- abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7\x{2028} abc8\x{2029} abc9
+ abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9
0: abc1
0: abc2
0: abc3