Unicode upper/lower casing is now used when UCP is set, even if UTF is not set.

This is not yet documented, and it not yet implemented in JIT. git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1224 6239d852-aaf2-0410-a92c-79f79f948069
author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> 2020-02-23 16:40:05 +0000
committer: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> 2020-02-23 16:40:05 +0000
commit: eeeb059e46a07f10023f2313894159161504b664 (patch)
tree: 98719aa173603943d4a2a403724045bf8a40f19c /testdata/testoutput10
parent: 98c6677bd3ff37d50249b32297abdb6008b42d54 (diff)
download: pcre2-eeeb059e46a07f10023f2313894159161504b664.tar.gz
1 files changed, 63 insertions, 1 deletions
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 775c2ab..9fe5ef6 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1780,11 +1780,15 @@ Capture group count = 0
 Options: utf
 Starting code units: \xc3 
 Subject length lower bound = 1
+    abc\x{ff}def
+ 0: \x{ff}
 
 /[\xff\x{ff}]/I
 Capture group count = 0
-Starting code units: \xff 
+First code unit = \xff
 Subject length lower bound = 1
+    abc\x{ff}def
+ 0: \xff
 
 /[Ss]/I
 Capture group count = 0
@@ -1813,4 +1817,62 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
     abc\x80\=startchar,offset=3
 Error -36 (bad UTF-8 offset)
 
+#subject no_jit
+
+/\x{c1}+\x{e1}/iIB,ucp
+------------------------------------------------------------------
+        Bra
+     /i \x{c1}+
+     /i \x{e1}
+        Ket
+        End
+------------------------------------------------------------------
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Last code unit = \xe1 (caseless)
+Subject length lower bound = 2
+    \x{c1}\x{c1}\x{c1}
+ 0: \xc1\xc1\xc1
+    \x{e1}\x{e1}\x{e1} 
+ 0: \xe1\xe1\xe1
+
+/a|\x{c1}/iI,ucp
+Capture group count = 0
+Options: caseless ucp
+Starting code units: A a \xc1 \xe1 
+Subject length lower bound = 1
+    \x{e1}xxx
+ 0: \xe1
+
+/a|\x{c1}/iI,utf
+Capture group count = 0
+Options: caseless utf
+Starting code units: A a \xc3 
+Subject length lower bound = 1
+    \x{e1}xxx
+ 0: \x{e1}
+
+/\x{c1}|\x{e1}/iI,ucp
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Subject length lower bound = 1
+
+/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
+    X\x{e1}Y
+ 1: >\xc1<
+
+/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended
+    X\x{c1}Y
+ 1: >\xe1<
+
+# Without UTF or UCP characters > 127 have only one case in the default locale.
+
+/X(\x{e1})Y/replace=>\U$1<,substitute_extended
+    X\x{e1}Y
+ 1: >\xe1<
+
+#subject     
+
 # End of testinput10
author	ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>	2020-02-23 16:40:05 +0000
committer	ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>	2020-02-23 16:40:05 +0000
commit	eeeb059e46a07f10023f2313894159161504b664 (patch)
tree	98719aa173603943d4a2a403724045bf8a40f19c /testdata/testoutput10
parent	98c6677bd3ff37d50249b32297abdb6008b42d54 (diff)
download	pcre2-eeeb059e46a07f10023f2313894159161504b664.tar.gz