summaryrefslogtreecommitdiff
path: root/pcre_tables.c
diff options
context:
space:
mode:
authornigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:41:21 +0000
committernigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:41:21 +0000
commitced1f145fdf26ec7df4b9048a9da0ef17e9618f2 (patch)
tree371f88a16cfb5ac0a176622bcd424aa6c28c4cc8 /pcre_tables.c
parent2550303b1f255c525d802f94d9c4411a0ccc630f (diff)
downloadpcre-ced1f145fdf26ec7df4b9048a9da0ef17e9618f2.tar.gz
Load pcre-6.5 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@87 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_tables.c')
-rw-r--r--pcre_tables.c145
1 files changed, 104 insertions, 41 deletions
diff --git a/pcre_tables.c b/pcre_tables.c
index f91e881..480257c 100644
--- a/pcre_tables.c
+++ b/pcre_tables.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2005 University of Cambridge
+ Copyright (c) 1997-2006 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -48,7 +48,7 @@ clashes with the library. */
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
-the definition is next to the definition of the opcodes in internal.h. */
+the definition is next to the definition of the opcodes in pcre_internal.h. */
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
@@ -82,47 +82,110 @@ const uschar _pcre_utf8_table4[] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
-/* This table translates Unicode property names into code values for the
-ucp_findchar() function. */
+/* This table translates Unicode property names into type and code values. It
+is searched by binary chop, so must be in collating sequence of name. */
const ucp_type_table _pcre_utt[] = {
- { "C", 128 + ucp_C },
- { "Cc", ucp_Cc },
- { "Cf", ucp_Cf },
- { "Cn", ucp_Cn },
- { "Co", ucp_Co },
- { "Cs", ucp_Cs },
- { "L", 128 + ucp_L },
- { "Ll", ucp_Ll },
- { "Lm", ucp_Lm },
- { "Lo", ucp_Lo },
- { "Lt", ucp_Lt },
- { "Lu", ucp_Lu },
- { "M", 128 + ucp_M },
- { "Mc", ucp_Mc },
- { "Me", ucp_Me },
- { "Mn", ucp_Mn },
- { "N", 128 + ucp_N },
- { "Nd", ucp_Nd },
- { "Nl", ucp_Nl },
- { "No", ucp_No },
- { "P", 128 + ucp_P },
- { "Pc", ucp_Pc },
- { "Pd", ucp_Pd },
- { "Pe", ucp_Pe },
- { "Pf", ucp_Pf },
- { "Pi", ucp_Pi },
- { "Po", ucp_Po },
- { "Ps", ucp_Ps },
- { "S", 128 + ucp_S },
- { "Sc", ucp_Sc },
- { "Sk", ucp_Sk },
- { "Sm", ucp_Sm },
- { "So", ucp_So },
- { "Z", 128 + ucp_Z },
- { "Zl", ucp_Zl },
- { "Zp", ucp_Zp },
- { "Zs", ucp_Zs }
+ { "Any", PT_ANY, 0 },
+ { "Arabic", PT_SC, ucp_Arabic },
+ { "Armenian", PT_SC, ucp_Armenian },
+ { "Bengali", PT_SC, ucp_Bengali },
+ { "Bopomofo", PT_SC, ucp_Bopomofo },
+ { "Braille", PT_SC, ucp_Braille },
+ { "Buginese", PT_SC, ucp_Buginese },
+ { "Buhid", PT_SC, ucp_Buhid },
+ { "C", PT_GC, ucp_C },
+ { "Canadian_Aboriginal", PT_SC, ucp_Canadian_Aboriginal },
+ { "Cc", PT_PC, ucp_Cc },
+ { "Cf", PT_PC, ucp_Cf },
+ { "Cherokee", PT_SC, ucp_Cherokee },
+ { "Cn", PT_PC, ucp_Cn },
+ { "Co", PT_PC, ucp_Co },
+ { "Common", PT_SC, ucp_Common },
+ { "Coptic", PT_SC, ucp_Coptic },
+ { "Cs", PT_PC, ucp_Cs },
+ { "Cypriot", PT_SC, ucp_Cypriot },
+ { "Cyrillic", PT_SC, ucp_Cyrillic },
+ { "Deseret", PT_SC, ucp_Deseret },
+ { "Devanagari", PT_SC, ucp_Devanagari },
+ { "Ethiopic", PT_SC, ucp_Ethiopic },
+ { "Georgian", PT_SC, ucp_Georgian },
+ { "Glagolitic", PT_SC, ucp_Glagolitic },
+ { "Gothic", PT_SC, ucp_Gothic },
+ { "Greek", PT_SC, ucp_Greek },
+ { "Gujarati", PT_SC, ucp_Gujarati },
+ { "Gurmukhi", PT_SC, ucp_Gurmukhi },
+ { "Han", PT_SC, ucp_Han },
+ { "Hangul", PT_SC, ucp_Hangul },
+ { "Hanunoo", PT_SC, ucp_Hanunoo },
+ { "Hebrew", PT_SC, ucp_Hebrew },
+ { "Hiragana", PT_SC, ucp_Hiragana },
+ { "Inherited", PT_SC, ucp_Inherited },
+ { "Kannada", PT_SC, ucp_Kannada },
+ { "Katakana", PT_SC, ucp_Katakana },
+ { "Kharoshthi", PT_SC, ucp_Kharoshthi },
+ { "Khmer", PT_SC, ucp_Khmer },
+ { "L", PT_GC, ucp_L },
+ { "L&", PT_LAMP, 0 },
+ { "Lao", PT_SC, ucp_Lao },
+ { "Latin", PT_SC, ucp_Latin },
+ { "Limbu", PT_SC, ucp_Limbu },
+ { "Linear_B", PT_SC, ucp_Linear_B },
+ { "Ll", PT_PC, ucp_Ll },
+ { "Lm", PT_PC, ucp_Lm },
+ { "Lo", PT_PC, ucp_Lo },
+ { "Lt", PT_PC, ucp_Lt },
+ { "Lu", PT_PC, ucp_Lu },
+ { "M", PT_GC, ucp_M },
+ { "Malayalam", PT_SC, ucp_Malayalam },
+ { "Mc", PT_PC, ucp_Mc },
+ { "Me", PT_PC, ucp_Me },
+ { "Mn", PT_PC, ucp_Mn },
+ { "Mongolian", PT_SC, ucp_Mongolian },
+ { "Myanmar", PT_SC, ucp_Myanmar },
+ { "N", PT_GC, ucp_N },
+ { "Nd", PT_PC, ucp_Nd },
+ { "New_Tai_Lue", PT_SC, ucp_New_Tai_Lue },
+ { "Nl", PT_PC, ucp_Nl },
+ { "No", PT_PC, ucp_No },
+ { "Ogham", PT_SC, ucp_Ogham },
+ { "Old_Italic", PT_SC, ucp_Old_Italic },
+ { "Old_Persian", PT_SC, ucp_Old_Persian },
+ { "Oriya", PT_SC, ucp_Oriya },
+ { "Osmanya", PT_SC, ucp_Osmanya },
+ { "P", PT_GC, ucp_P },
+ { "Pc", PT_PC, ucp_Pc },
+ { "Pd", PT_PC, ucp_Pd },
+ { "Pe", PT_PC, ucp_Pe },
+ { "Pf", PT_PC, ucp_Pf },
+ { "Pi", PT_PC, ucp_Pi },
+ { "Po", PT_PC, ucp_Po },
+ { "Ps", PT_PC, ucp_Ps },
+ { "Runic", PT_SC, ucp_Runic },
+ { "S", PT_GC, ucp_S },
+ { "Sc", PT_PC, ucp_Sc },
+ { "Shavian", PT_SC, ucp_Shavian },
+ { "Sinhala", PT_SC, ucp_Sinhala },
+ { "Sk", PT_PC, ucp_Sk },
+ { "Sm", PT_PC, ucp_Sm },
+ { "So", PT_PC, ucp_So },
+ { "Syloti_Nagri", PT_SC, ucp_Syloti_Nagri },
+ { "Syriac", PT_SC, ucp_Syriac },
+ { "Tagalog", PT_SC, ucp_Tagalog },
+ { "Tagbanwa", PT_SC, ucp_Tagbanwa },
+ { "Tai_Le", PT_SC, ucp_Tai_Le },
+ { "Tamil", PT_SC, ucp_Tamil },
+ { "Telugu", PT_SC, ucp_Telugu },
+ { "Thaana", PT_SC, ucp_Thaana },
+ { "Thai", PT_SC, ucp_Thai },
+ { "Tibetan", PT_SC, ucp_Tibetan },
+ { "Tifinagh", PT_SC, ucp_Tifinagh },
+ { "Ugaritic", PT_SC, ucp_Ugaritic },
+ { "Yi", PT_SC, ucp_Yi },
+ { "Z", PT_GC, ucp_Z },
+ { "Zl", PT_PC, ucp_Zl },
+ { "Zp", PT_PC, ucp_Zp },
+ { "Zs", PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);