From b96a92fb2dbf3acb43641479fc731469e1de9f6c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 5 Sep 2012 20:32:29 -0600 Subject: utf8.h: Remove some EBCDIC dependencies regen/regcharclass.pl has been enhanced in previous commits so that it generates as good code as these hand-defined macro definitions for various UTF-8 constructs. And, it should be able to generate EBCDIC ones as well. By using its definitions, we can remove the EBCDIC dependencies for them. It is quite possible that the EBCDIC versions were wrong, since they have never been tested. Even if regcharclass.pl has bugs under EBCDIC, it is easier to find and fix those in one place, than all the sundry definitions. --- regen/regcharclass.pl | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'regen/regcharclass.pl') diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index 70f46b03b4..81ac13ce45 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -1112,6 +1112,18 @@ VERTWS: Vertical Whitespace: \v \V => generic UTF8 LATIN1 cp :fast safe \p{VertSpace} +REPLACEMENT: Unicode REPLACEMENT CHARACTER +=> UTF8 :safe +0xFFFD + +NONCHAR: Non character code points +=> UTF8 :fast +\p{Nchar} + +SURROGATE: Surrogate characters +=> UTF8 :fast +\p{Gc=Cs} + GCB_L: Grapheme_Cluster_Break=L => UTF8 :fast \p{_X_GCB_L} -- cgit v1.2.1