diff options
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | pcre_compile.c | 216 | ||||
-rw-r--r-- | pcre_internal.h | 7 | ||||
-rw-r--r-- | pcre_printint.src | 2 | ||||
-rw-r--r-- | pcre_tables.c | 327 | ||||
-rw-r--r-- | pcretest.c | 1 |
6 files changed, 370 insertions, 197 deletions
@@ -40,6 +40,20 @@ Version 7.4 10-Sep-07 9. When pcregrep was used with the --colour option, it missed the line ending sequence off the lines that it output. + +10. It was pointed out to me that arrays of string pointers cause lots of + relocations when a shared library is dynamically loaded. A technique of + using a single long string with a table of offsets can drastically reduce + these. I have refactored PCRE in four places to do this. The result is + dramatic: + + Originally: 290 + After changing UCP table: 187 + After changing error message table: 43 + After changing table of "verbs" 36 + After changing table of Posix names 22 + + Thanks to the folks working on Gregex for glib for this insight. Version 7.3 28-Aug-07 diff --git a/pcre_compile.c b/pcre_compile.c index 15e06bc..fe74e59 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -140,35 +140,47 @@ static const short int escapes[] = { #endif -/* Table of special "verbs" like (*PRUNE) */ +/* Table of special "verbs" like (*PRUNE). This is a short table, so it is +searched linearly. Put all the names into a single string, in order to reduce +the number of relocations when a shared library is dynamically linked. */ typedef struct verbitem { - const char *name; int len; int op; } verbitem; +static const char verbnames[] = + "ACCEPT\0" + "COMMIT\0" + "F\0" + "FAIL\0" + "PRUNE\0" + "SKIP\0" + "THEN"; + static verbitem verbs[] = { - { "ACCEPT", 6, OP_ACCEPT }, - { "COMMIT", 6, OP_COMMIT }, - { "F", 1, OP_FAIL }, - { "FAIL", 4, OP_FAIL }, - { "PRUNE", 5, OP_PRUNE }, - { "SKIP", 4, OP_SKIP }, - { "THEN", 4, OP_THEN } + { 6, OP_ACCEPT }, + { 6, OP_COMMIT }, + { 1, OP_FAIL }, + { 4, OP_FAIL }, + { 5, OP_PRUNE }, + { 4, OP_SKIP }, + { 4, OP_THEN } }; static int verbcount = sizeof(verbs)/sizeof(verbitem); -/* Tables of names of POSIX character classes and their lengths. The list is -terminated by a zero length entry. The first three must be alpha, lower, upper, -as this is assumed for handling case independence. */ +/* Tables of names of POSIX character classes and their lengths. The names are +now all in a single string, to reduce the number of relocations when a shared +library is dynamically loaded. The list of lengths is terminated by a zero +length entry. The first three must be alpha, lower, upper, as this is assumed +for handling case independence. */ -static const char *const posix_names[] = { - "alpha", "lower", "upper", - "alnum", "ascii", "blank", "cntrl", "digit", "graph", - "print", "punct", "space", "word", "xdigit" }; +static const char posix_names[] = + "alpha\0" "lower\0" "upper\0" "alnum\0" "ascii\0" "blank\0" + "cntrl\0" "digit\0" "graph\0" "print\0" "punct\0" "space\0" + "word\0" "xdigit"; static const uschar posix_name_lengths[] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; @@ -207,84 +219,88 @@ static const int posix_class_maps[] = { /* The texts of compile-time error messages. These are "char *" because they are passed to the outside world. Do not ever re-use any error number, because they are documented. Always add a new error instead. Messages marked DEAD below -are no longer used. */ - -static const char *error_texts[] = { - "no error", - "\\ at end of pattern", - "\\c at end of pattern", - "unrecognized character follows \\", - "numbers out of order in {} quantifier", +are no longer used. This used to be a table of strings, but in order to reduce +the number of relocations needed when a shared library is loaded dynamically, +it is now one long string. We cannot use a table of offsets, because the +lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we +simply count through to the one we want - this isn't a performance issue +because these strings are used only when there is a compilation error. */ + +static const char error_texts[] = + "no error\0" + "\\ at end of pattern\0" + "\\c at end of pattern\0" + "unrecognized character follows \\\0" + "numbers out of order in {} quantifier\0" /* 5 */ - "number too big in {} quantifier", - "missing terminating ] for character class", - "invalid escape sequence in character class", - "range out of order in character class", - "nothing to repeat", + "number too big in {} quantifier\0" + "missing terminating ] for character class\0" + "invalid escape sequence in character class\0" + "range out of order in character class\0" + "nothing to repeat\0" /* 10 */ - "operand of unlimited repeat could match the empty string", /** DEAD **/ - "internal error: unexpected repeat", - "unrecognized character after (?", - "POSIX named classes are supported only within a class", - "missing )", + "operand of unlimited repeat could match the empty string\0" /** DEAD **/ + "internal error: unexpected repeat\0" + "unrecognized character after (?\0" + "POSIX named classes are supported only within a class\0" + "missing )\0" /* 15 */ - "reference to non-existent subpattern", - "erroffset passed as NULL", - "unknown option bit(s) set", - "missing ) after comment", - "parentheses nested too deeply", /** DEAD **/ + "reference to non-existent subpattern\0" + "erroffset passed as NULL\0" + "unknown option bit(s) set\0" + "missing ) after comment\0" + "parentheses nested too deeply\0" /** DEAD **/ /* 20 */ - "regular expression is too large", - "failed to get memory", - "unmatched parentheses", - "internal error: code overflow", - "unrecognized character after (?<", + "regular expression is too large\0" + "failed to get memory\0" + "unmatched parentheses\0" + "internal error: code overflow\0" + "unrecognized character after (?<\0" /* 25 */ - "lookbehind assertion is not fixed length", - "malformed number or name after (?(", - "conditional group contains more than two branches", - "assertion expected after (?(", - "(?R or (?[+-]digits must be followed by )", + "lookbehind assertion is not fixed length\0" + "malformed number or name after (?(\0" + "conditional group contains more than two branches\0" + "assertion expected after (?(\0" + "(?R or (?[+-]digits must be followed by )\0" /* 30 */ - "unknown POSIX class name", - "POSIX collating elements are not supported", - "this version of PCRE is not compiled with PCRE_UTF8 support", - "spare error", /** DEAD **/ - "character value in \\x{...} sequence is too large", + "unknown POSIX class name\0" + "POSIX collating elements are not supported\0" + "this version of PCRE is not compiled with PCRE_UTF8 support\0" + "spare error\0" /** DEAD **/ + "character value in \\x{...} sequence is too large\0" /* 35 */ - "invalid condition (?(0)", - "\\C not allowed in lookbehind assertion", - "PCRE does not support \\L, \\l, \\N, \\U, or \\u", - "number after (?C is > 255", - "closing ) for (?C expected", + "invalid condition (?(0)\0" + "\\C not allowed in lookbehind assertion\0" + "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0" + "number after (?C is > 255\0" + "closing ) for (?C expected\0" /* 40 */ - "recursive call could loop indefinitely", - "unrecognized character after (?P", - "syntax error in subpattern name (missing terminator)", - "two named subpatterns have the same name", - "invalid UTF-8 string", + "recursive call could loop indefinitely\0" + "unrecognized character after (?P\0" + "syntax error in subpattern name (missing terminator)\0" + "two named subpatterns have the same name\0" + "invalid UTF-8 string\0" /* 45 */ - "support for \\P, \\p, and \\X has not been compiled", - "malformed \\P or \\p sequence", - "unknown property name after \\P or \\p", - "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)", - "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")", + "support for \\P, \\p, and \\X has not been compiled\0" + "malformed \\P or \\p sequence\0" + "unknown property name after \\P or \\p\0" + "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0" + "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" /* 50 */ - "repeated subpattern is too long", /** DEAD **/ - "octal value is greater than \\377 (not in UTF-8 mode)", - "internal error: overran compiling workspace", - "internal error: previously-checked referenced subpattern not found", - "DEFINE group contains more than one branch", + "repeated subpattern is too long\0" /** DEAD **/ + "octal value is greater than \\377 (not in UTF-8 mode)\0" + "internal error: overran compiling workspace\0" + "internal error: previously-checked referenced subpattern not found\0" + "DEFINE group contains more than one branch\0" /* 55 */ - "repeating a DEFINE group is not allowed", - "inconsistent NEWLINE options", - "\\g is not followed by a braced name or an optionally braced non-zero number", - "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number", - "(*VERB) with an argument is not supported", + "repeating a DEFINE group is not allowed\0" + "inconsistent NEWLINE options\0" + "\\g is not followed by a braced name or an optionally braced non-zero number\0" + "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0" + "(*VERB) with an argument is not supported\0" /* 60 */ - "(*VERB) not recognized", - "number is too big" -}; + "(*VERB) not recognized\0" + "number is too big"; /* Table to identify digits and hex digits. This is used when compiling @@ -420,6 +436,28 @@ static BOOL /************************************************* +* Find an error text * +*************************************************/ + +/* The error texts are now all in one long string, to save on relocations. As +some of the text is of unknown length, we can't use a table of offsets. +Instead, just count through the strings. This is not a performance issue +because it happens only when there has been a compilation error. + +Argument: the error number +Returns: pointer to the error string +*/ + +static const char * +find_error_text(int n) +{ +const char *s = error_texts; +for (; n > 0; n--) while (*s++ != 0); +return s; +} + + +/************************************************* * Handle escapes * *************************************************/ @@ -776,7 +814,7 @@ top = _pcre_utt_size; while (bot < top) { i = (bot + top) >> 1; - c = strcmp(name, _pcre_utt[i].name); + c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset); if (c == 0) { *dptr = _pcre_utt[i].value; @@ -1733,11 +1771,13 @@ Returns: a value representing the name, or -1 if unknown static int check_posix_name(const uschar *ptr, int len) { +const char *pn = posix_names; register int yield = 0; while (posix_name_lengths[yield] != 0) { if (len == posix_name_lengths[yield] && - strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield; + strncmp((const char *)ptr, pn, len) == 0) return yield; + pn += posix_name_lengths[yield] + 1; yield++; } return -1; @@ -4024,6 +4064,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) { int i, namelen; + const char *vn = verbnames; const uschar *name = ++ptr; previous = NULL; while ((cd->ctypes[*++ptr] & ctype_letter) != 0); @@ -4041,12 +4082,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ for (i = 0; i < verbcount; i++) { if (namelen == verbs[i].len && - strncmp((char *)name, verbs[i].name, namelen) == 0) + strncmp((char *)name, vn, namelen) == 0) { *code = verbs[i].op; if (*code++ == OP_ACCEPT) cd->had_accept = TRUE; break; } + vn += verbs[i].len + 1; } if (i < verbcount) continue; *errorcodeptr = ERR60; @@ -6005,7 +6047,7 @@ if (errorcode != 0) PCRE_EARLY_ERROR_RETURN: *erroroffset = ptr - (const uschar *)pattern; PCRE_EARLY_ERROR_RETURN2: - *errorptr = error_texts[errorcode]; + *errorptr = find_error_text(errorcode); if (errorcodeptr != NULL) *errorcodeptr = errorcode; return NULL; } @@ -6090,7 +6132,7 @@ was compiled can be seen. */ if (code - codestart > length) { (pcre_free)(re); - *errorptr = error_texts[ERR23]; + *errorptr = find_error_text(ERR23); *erroroffset = ptr - (uschar *)pattern; if (errorcodeptr != NULL) *errorcodeptr = ERR23; return NULL; diff --git a/pcre_internal.h b/pcre_internal.h index 775e03f..b039900 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -1064,10 +1064,12 @@ total length. */ #define tables_length (ctypes_offset + 256) /* Layout of the UCP type table that translates property names into types and -codes. */ +codes. Each entry used to point directly to a name, but to reduce the number of +relocations in shared libraries, it now has an offset into a single string +instead. */ typedef struct { - const char *name; + pcre_uint16 name_offset; pcre_uint16 type; pcre_uint16 value; } ucp_type_table; @@ -1085,6 +1087,7 @@ extern const uschar _pcre_utf8_table4[]; extern const int _pcre_utf8_table1_size; +extern const char _pcre_utt_names[]; extern const ucp_type_table _pcre_utt[]; extern const int _pcre_utt_size; diff --git a/pcre_printint.src b/pcre_printint.src index 90381ed..d51cbe0 100644 --- a/pcre_printint.src +++ b/pcre_printint.src @@ -126,7 +126,7 @@ for (i = _pcre_utt_size - 1; i >= 0; i--) { if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break; } -return (i >= 0)? _pcre_utt[i].name : "??"; +return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??"; #else /* It gets harder and harder to shut off unwanted compiler warnings. */ ptype = ptype * pvalue; diff --git a/pcre_tables.c b/pcre_tables.c index 0d060c2..6c4a60d 100644 --- a/pcre_tables.c +++ b/pcre_tables.c @@ -87,115 +87,228 @@ const uschar _pcre_utf8_table4[] = { 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; -/* This table translates Unicode property names into type and code values. It -is searched by binary chop, so must be in collating sequence of name. */ +/* The pcre_utt[] table below translates Unicode property names into type and +code values. It is searched by binary chop, so must be in collating sequence of +name. Originally, the table contained pointers to the name strings in the first +field of each entry. However, that leads to a large number of relocations when +a shared library is dynamically loaded. A significant reduction is made by +putting all the names into a single, large string and then using offsets in the +table itself. Maintenance is more error-prone, but frequent changes to this +data is unlikely. */ + +const char _pcre_utt_names[] = + "Any\0" + "Arabic\0" + "Armenian\0" + "Balinese\0" + "Bengali\0" + "Bopomofo\0" + "Braille\0" + "Buginese\0" + "Buhid\0" + "C\0" + "Canadian_Aboriginal\0" + "Cc\0" + "Cf\0" + "Cherokee\0" + "Cn\0" + "Co\0" + "Common\0" + "Coptic\0" + "Cs\0" + "Cuneiform\0" + "Cypriot\0" + "Cyrillic\0" + "Deseret\0" + "Devanagari\0" + "Ethiopic\0" + "Georgian\0" + "Glagolitic\0" + "Gothic\0" + "Greek\0" + "Gujarati\0" + "Gurmukhi\0" + "Han\0" + "Hangul\0" + "Hanunoo\0" + "Hebrew\0" + "Hiragana\0" + "Inherited\0" + "Kannada\0" + "Katakana\0" + "Kharoshthi\0" + "Khmer\0" + "L\0" + "L&\0" + "Lao\0" + "Latin\0" + "Limbu\0" + "Linear_B\0" + "Ll\0" + "Lm\0" + "Lo\0" + "Lt\0" + "Lu\0" + "M\0" + "Malayalam\0" + "Mc\0" + "Me\0" + "Mn\0" + "Mongolian\0" + "Myanmar\0" + "N\0" + "Nd\0" + "New_Tai_Lue\0" + "Nko\0" + "Nl\0" + "No\0" + "Ogham\0" + "Old_Italic\0" + "Old_Persian\0" + "Oriya\0" + "Osmanya\0" + "P\0" + "Pc\0" + "Pd\0" + "Pe\0" + "Pf\0" + "Phags_Pa\0" + "Phoenician\0" + "Pi\0" + "Po\0" + "Ps\0" + "Runic\0" + "S\0" + "Sc\0" + "Shavian\0" + "Sinhala\0" + "Sk\0" + "Sm\0" + "So\0" + "Syloti_Nagri\0" + "Syriac\0" + "Tagalog\0" + "Tagbanwa\0" + "Tai_Le\0" + "Tamil\0" + "Telugu\0" + "Thaana\0" + "Thai\0" + "Tibetan\0" + "Tifinagh\0" + "Ugaritic\0" + "Yi\0" + "Z\0" + "Zl\0" + "Zp\0" + "Zs\0"; const ucp_type_table _pcre_utt[] = { - { "Any", PT_ANY, 0 }, - { "Arabic", PT_SC, ucp_Arabic }, - { "Armenian", PT_SC, ucp_Armenian }, - { "Balinese", PT_SC, ucp_Balinese }, - { "Bengali", PT_SC, ucp_Bengali }, - { "Bopomofo", PT_SC, ucp_Bopomofo }, - { "Braille", PT_SC, ucp_Braille }, - { "Buginese", PT_SC, ucp_Buginese }, - { "Buhid", PT_SC, ucp_Buhid }, - { "C", PT_GC, ucp_C }, - { "Canadian_Aboriginal", PT_SC, ucp_Canadian_Aboriginal }, - { "Cc", PT_PC, ucp_Cc }, - { "Cf", PT_PC, ucp_Cf }, - { "Cherokee", PT_SC, ucp_Cherokee }, - { "Cn", PT_PC, ucp_Cn }, - { "Co", PT_PC, ucp_Co }, - { "Common", PT_SC, ucp_Common }, - { "Coptic", PT_SC, ucp_Coptic }, - { "Cs", PT_PC, ucp_Cs }, - { "Cuneiform", PT_SC, ucp_Cuneiform }, - { "Cypriot", PT_SC, ucp_Cypriot }, - { "Cyrillic", PT_SC, ucp_Cyrillic }, - { "Deseret", PT_SC, ucp_Deseret }, - { "Devanagari", PT_SC, ucp_Devanagari }, - { "Ethiopic", PT_SC, ucp_Ethiopic }, - { "Georgian", PT_SC, ucp_Georgian }, - { "Glagolitic", PT_SC, ucp_Glagolitic }, - { "Gothic", PT_SC, ucp_Gothic }, - { "Greek", PT_SC, ucp_Greek }, - { "Gujarati", PT_SC, ucp_Gujarati }, - { "Gurmukhi", PT_SC, ucp_Gurmukhi }, - { "Han", PT_SC, ucp_Han }, - { "Hangul", PT_SC, ucp_Hangul }, - { "Hanunoo", PT_SC, ucp_Hanunoo }, - { "Hebrew", PT_SC, ucp_Hebrew }, - { "Hiragana", PT_SC, ucp_Hiragana }, - { "Inherited", PT_SC, ucp_Inherited }, - { "Kannada", PT_SC, ucp_Kannada }, - { "Katakana", PT_SC, ucp_Katakana }, - { "Kharoshthi", PT_SC, ucp_Kharoshthi }, - { "Khmer", PT_SC, ucp_Khmer }, - { "L", PT_GC, ucp_L }, - { "L&", PT_LAMP, 0 }, - { "Lao", PT_SC, ucp_Lao }, - { "Latin", PT_SC, ucp_Latin }, - { "Limbu", PT_SC, ucp_Limbu }, - { "Linear_B", PT_SC, ucp_Linear_B }, - { "Ll", PT_PC, ucp_Ll }, - { "Lm", PT_PC, ucp_Lm }, - { "Lo", PT_PC, ucp_Lo }, - { "Lt", PT_PC, ucp_Lt }, - { "Lu", PT_PC, ucp_Lu }, - { "M", PT_GC, ucp_M }, - { "Malayalam", PT_SC, ucp_Malayalam }, - { "Mc", PT_PC, ucp_Mc }, - { "Me", PT_PC, ucp_Me }, - { "Mn", PT_PC, ucp_Mn }, - { "Mongolian", PT_SC, ucp_Mongolian }, - { "Myanmar", PT_SC, ucp_Myanmar }, - { "N", PT_GC, ucp_N }, - { "Nd", PT_PC, ucp_Nd }, - { "New_Tai_Lue", PT_SC, ucp_New_Tai_Lue }, - { "Nko", PT_SC, ucp_Nko }, - { "Nl", PT_PC, ucp_Nl }, - { "No", PT_PC, ucp_No }, - { "Ogham", PT_SC, ucp_Ogham }, - { "Old_Italic", PT_SC, ucp_Old_Italic }, - { "Old_Persian", PT_SC, ucp_Old_Persian }, - { "Oriya", PT_SC, ucp_Oriya }, - { "Osmanya", PT_SC, ucp_Osmanya }, - { "P", PT_GC, ucp_P }, - { "Pc", PT_PC, ucp_Pc }, - { "Pd", PT_PC, ucp_Pd }, - { "Pe", PT_PC, ucp_Pe }, - { "Pf", PT_PC, ucp_Pf }, - { "Phags_Pa", PT_SC, ucp_Phags_Pa }, - { "Phoenician", PT_SC, ucp_Phoenician }, - { "Pi", PT_PC, ucp_Pi }, - { "Po", PT_PC, ucp_Po }, - { "Ps", PT_PC, ucp_Ps }, - { "Runic", PT_SC, ucp_Runic }, - { "S", PT_GC, ucp_S }, - { "Sc", PT_PC, ucp_Sc }, - { "Shavian", PT_SC, ucp_Shavian }, - { "Sinhala", PT_SC, ucp_Sinhala }, - { "Sk", PT_PC, ucp_Sk }, - { "Sm", PT_PC, ucp_Sm }, - { "So", PT_PC, ucp_So }, - { "Syloti_Nagri", PT_SC, ucp_Syloti_Nagri }, - { "Syriac", PT_SC, ucp_Syriac }, - { "Tagalog", PT_SC, ucp_Tagalog }, - { "Tagbanwa", PT_SC, ucp_Tagbanwa }, - { "Tai_Le", PT_SC, ucp_Tai_Le }, - { "Tamil", PT_SC, ucp_Tamil }, - { "Telugu", PT_SC, ucp_Telugu }, - { "Thaana", PT_SC, ucp_Thaana }, - { "Thai", PT_SC, ucp_Thai }, - { "Tibetan", PT_SC, ucp_Tibetan }, - { "Tifinagh", PT_SC, ucp_Tifinagh }, - { "Ugaritic", PT_SC, ucp_Ugaritic }, - { "Yi", PT_SC, ucp_Yi }, - { "Z", PT_GC, ucp_Z }, - { "Zl", PT_PC, ucp_Zl }, - { "Zp", PT_PC, ucp_Zp }, - { "Zs", PT_PC, ucp_Zs } + { 0, PT_ANY, 0 }, + { 4, PT_SC, ucp_Arabic }, + { 11, PT_SC, ucp_Armenian }, + { 20, PT_SC, ucp_Balinese }, + { 29, PT_SC, ucp_Bengali }, + { 37, PT_SC, ucp_Bopomofo }, + { 46, PT_SC, ucp_Braille }, + { 54, PT_SC, ucp_Buginese }, + { 63, PT_SC, ucp_Buhid }, + { 69, PT_GC, ucp_C }, + { 71, PT_SC, ucp_Canadian_Aboriginal }, + { 91, PT_PC, ucp_Cc }, + { 94, PT_PC, ucp_Cf }, + { 97, PT_SC, ucp_Cherokee }, + { 106, PT_PC, ucp_Cn }, + { 109, PT_PC, ucp_Co }, + { 112, PT_SC, ucp_Common }, + { 119, PT_SC, ucp_Coptic }, + { 126, PT_PC, ucp_Cs }, + { 129, PT_SC, ucp_Cuneiform }, + { 139, PT_SC, ucp_Cypriot }, + { 147, PT_SC, ucp_Cyrillic }, + { 156, PT_SC, ucp_Deseret }, + { 164, PT_SC, ucp_Devanagari }, + { 175, PT_SC, ucp_Ethiopic }, + { 184, PT_SC, ucp_Georgian }, + { 193, PT_SC, ucp_Glagolitic }, + { 204, PT_SC, ucp_Gothic }, + { 211, PT_SC, ucp_Greek }, + { 217, PT_SC, ucp_Gujarati }, + { 226, PT_SC, ucp_Gurmukhi }, + { 235, PT_SC, ucp_Han }, + { 239, PT_SC, ucp_Hangul }, + { 246, PT_SC, ucp_Hanunoo }, + { 254, PT_SC, ucp_Hebrew }, + { 261, PT_SC, ucp_Hiragana }, + { 270, PT_SC, ucp_Inherited }, + { 280, PT_SC, ucp_Kannada }, + { 288, PT_SC, ucp_Katakana }, + { 297, PT_SC, ucp_Kharoshthi }, + { 308, PT_SC, ucp_Khmer }, + { 314, PT_GC, ucp_L }, + { 316, PT_LAMP, 0 }, + { 319, PT_SC, ucp_Lao }, + { 323, PT_SC, ucp_Latin }, + { 329, PT_SC, ucp_Limbu }, + { 335, PT_SC, ucp_Linear_B }, + { 344, PT_PC, ucp_Ll }, + { 347, PT_PC, ucp_Lm }, + { 350, PT_PC, ucp_Lo }, + { 353, PT_PC, ucp_Lt }, + { 356, PT_PC, ucp_Lu }, + { 359, PT_GC, ucp_M }, + { 361, PT_SC, ucp_Malayalam }, + { 371, PT_PC, ucp_Mc }, + { 374, PT_PC, ucp_Me }, + { 377, PT_PC, ucp_Mn }, + { 380, PT_SC, ucp_Mongolian }, + { 390, PT_SC, ucp_Myanmar }, + { 398, PT_GC, ucp_N }, + { 400, PT_PC, ucp_Nd }, + { 403, PT_SC, ucp_New_Tai_Lue }, + { 415, PT_SC, ucp_Nko }, + { 419, PT_PC, ucp_Nl }, + { 422, PT_PC, ucp_No }, + { 425, PT_SC, ucp_Ogham }, + { 431, PT_SC, ucp_Old_Italic }, + { 442, PT_SC, ucp_Old_Persian }, + { 454, PT_SC, ucp_Oriya }, + { 460, PT_SC, ucp_Osmanya }, + { 468, PT_GC, ucp_P }, + { 470, PT_PC, ucp_Pc }, + { 473, PT_PC, ucp_Pd }, + { 476, PT_PC, ucp_Pe }, + { 479, PT_PC, ucp_Pf }, + { 482, PT_SC, ucp_Phags_Pa }, + { 491, PT_SC, ucp_Phoenician }, + { 502, PT_PC, ucp_Pi }, + { 505, PT_PC, ucp_Po }, + { 508, PT_PC, ucp_Ps }, + { 511, PT_SC, ucp_Runic }, + { 517, PT_GC, ucp_S }, + { 519, PT_PC, ucp_Sc }, + { 522, PT_SC, ucp_Shavian }, + { 530, PT_SC, ucp_Sinhala }, + { 538, PT_PC, ucp_Sk }, + { 541, PT_PC, ucp_Sm }, + { 544, PT_PC, ucp_So }, + { 547, PT_SC, ucp_Syloti_Nagri }, + { 560, PT_SC, ucp_Syriac }, + { 567, PT_SC, ucp_Tagalog }, + { 575, PT_SC, ucp_Tagbanwa }, + { 584, PT_SC, ucp_Tai_Le }, + { 591, PT_SC, ucp_Tamil }, + { 597, PT_SC, ucp_Telugu }, + { 604, PT_SC, ucp_Thaana }, + { 611, PT_SC, ucp_Thai }, + { 616, PT_SC, ucp_Tibetan }, + { 624, PT_SC, ucp_Tifinagh }, + { 633, PT_SC, ucp_Ugaritic }, + { 642, PT_SC, ucp_Yi }, + { 645, PT_GC, ucp_Z }, + { 647, PT_PC, ucp_Zl }, + { 650, PT_PC, ucp_Zp }, + { 653, PT_PC, ucp_Zs } }; const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); @@ -94,6 +94,7 @@ symbols to prevent clashes. */ #define _pcre_utf8_table4 utf8_table4 #define _pcre_utt utt #define _pcre_utt_size utt_size +#define _pcre_utt_names utt_names #define _pcre_OP_lengths OP_lengths #include "pcre_tables.c" |