diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-05-05 10:44:20 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-05-05 10:44:20 +0000 |
commit | 85b995f30cc9bf0bb04f5b3b3707a216a56b6bdf (patch) | |
tree | 6410c80d7502e73c04d5eb0fa4f8ae885e6e3449 /maint | |
parent | 2bcdcbf324bea8939d73f9b32e3625539a4d209e (diff) | |
download | pcre-85b995f30cc9bf0bb04f5b3b3707a216a56b6bdf.tar.gz |
Add new special properties Xan, Xps, Xsp, Xwd to help with \w etc.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@517 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'maint')
-rwxr-xr-x | maint/GenerateUtt.py | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/maint/GenerateUtt.py b/maint/GenerateUtt.py index 8b2dbf8..caad4ac 100755 --- a/maint/GenerateUtt.py +++ b/maint/GenerateUtt.py @@ -11,6 +11,7 @@ # Modified by PH 17-March-2009 to generate the more verbose form that works # for UTF-support in EBCDIC as well as ASCII environments. # Modified by PH 01-March-2010 to add new scripts from Unicode 5.2.0. +# Modified by PH 04-May-2010 to add new "X.." special categories. script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \ 'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \ @@ -36,11 +37,22 @@ category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', general_category_names = ['C', 'L', 'M', 'N', 'P', 'S', 'Z'] +# First add the Unicode script and category names. + utt_table = zip(script_names, ['PT_SC'] * len(script_names)) utt_table += zip(category_names, ['PT_PC'] * len(category_names)) utt_table += zip(general_category_names, ['PT_GC'] * len(general_category_names)) -utt_table.append(('L&', 'PT_LAMP')) + +# Now add our own specials. + utt_table.append(('Any', 'PT_ANY')) +utt_table.append(('L&', 'PT_LAMP')) +utt_table.append(('Xan', 'PT_ALNUM')) +utt_table.append(('Xps', 'PT_PXSPACE')) +utt_table.append(('Xsp', 'PT_SPACE')) +utt_table.append(('Xwd', 'PT_WORD')) + +# Sort the table. utt_table.sort() @@ -74,7 +86,8 @@ print '\nconst ucp_type_table _pcre_utt[] = { ' offset = 0 last = ',' for utt in utt_table: - if utt[1] in ('PT_ANY', 'PT_LAMP'): + if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE', + 'PT_SPACE', 'PT_WORD'): value = '0' else: value = 'ucp_' + utt[0] |