summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog14
-rw-r--r--pcre_compile.c216
-rw-r--r--pcre_internal.h7
-rw-r--r--pcre_printint.src2
-rw-r--r--pcre_tables.c327
-rw-r--r--pcretest.c1
6 files changed, 370 insertions, 197 deletions
diff --git a/ChangeLog b/ChangeLog
index 25ad42e..215c83c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -40,6 +40,20 @@ Version 7.4 10-Sep-07
9. When pcregrep was used with the --colour option, it missed the line ending
sequence off the lines that it output.
+
+10. It was pointed out to me that arrays of string pointers cause lots of
+ relocations when a shared library is dynamically loaded. A technique of
+ using a single long string with a table of offsets can drastically reduce
+ these. I have refactored PCRE in four places to do this. The result is
+ dramatic:
+
+ Originally: 290
+ After changing UCP table: 187
+ After changing error message table: 43
+ After changing table of "verbs" 36
+ After changing table of Posix names 22
+
+ Thanks to the folks working on Gregex for glib for this insight.
Version 7.3 28-Aug-07
diff --git a/pcre_compile.c b/pcre_compile.c
index 15e06bc..fe74e59 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -140,35 +140,47 @@ static const short int escapes[] = {
#endif
-/* Table of special "verbs" like (*PRUNE) */
+/* Table of special "verbs" like (*PRUNE). This is a short table, so it is
+searched linearly. Put all the names into a single string, in order to reduce
+the number of relocations when a shared library is dynamically linked. */
typedef struct verbitem {
- const char *name;
int len;
int op;
} verbitem;
+static const char verbnames[] =
+ "ACCEPT\0"
+ "COMMIT\0"
+ "F\0"
+ "FAIL\0"
+ "PRUNE\0"
+ "SKIP\0"
+ "THEN";
+
static verbitem verbs[] = {
- { "ACCEPT", 6, OP_ACCEPT },
- { "COMMIT", 6, OP_COMMIT },
- { "F", 1, OP_FAIL },
- { "FAIL", 4, OP_FAIL },
- { "PRUNE", 5, OP_PRUNE },
- { "SKIP", 4, OP_SKIP },
- { "THEN", 4, OP_THEN }
+ { 6, OP_ACCEPT },
+ { 6, OP_COMMIT },
+ { 1, OP_FAIL },
+ { 4, OP_FAIL },
+ { 5, OP_PRUNE },
+ { 4, OP_SKIP },
+ { 4, OP_THEN }
};
static int verbcount = sizeof(verbs)/sizeof(verbitem);
-/* Tables of names of POSIX character classes and their lengths. The list is
-terminated by a zero length entry. The first three must be alpha, lower, upper,
-as this is assumed for handling case independence. */
+/* Tables of names of POSIX character classes and their lengths. The names are
+now all in a single string, to reduce the number of relocations when a shared
+library is dynamically loaded. The list of lengths is terminated by a zero
+length entry. The first three must be alpha, lower, upper, as this is assumed
+for handling case independence. */
-static const char *const posix_names[] = {
- "alpha", "lower", "upper",
- "alnum", "ascii", "blank", "cntrl", "digit", "graph",
- "print", "punct", "space", "word", "xdigit" };
+static const char posix_names[] =
+ "alpha\0" "lower\0" "upper\0" "alnum\0" "ascii\0" "blank\0"
+ "cntrl\0" "digit\0" "graph\0" "print\0" "punct\0" "space\0"
+ "word\0" "xdigit";
static const uschar posix_name_lengths[] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
@@ -207,84 +219,88 @@ static const int posix_class_maps[] = {
/* The texts of compile-time error messages. These are "char *" because they
are passed to the outside world. Do not ever re-use any error number, because
they are documented. Always add a new error instead. Messages marked DEAD below
-are no longer used. */
-
-static const char *error_texts[] = {
- "no error",
- "\\ at end of pattern",
- "\\c at end of pattern",
- "unrecognized character follows \\",
- "numbers out of order in {} quantifier",
+are no longer used. This used to be a table of strings, but in order to reduce
+the number of relocations needed when a shared library is loaded dynamically,
+it is now one long string. We cannot use a table of offsets, because the
+lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
+simply count through to the one we want - this isn't a performance issue
+because these strings are used only when there is a compilation error. */
+
+static const char error_texts[] =
+ "no error\0"
+ "\\ at end of pattern\0"
+ "\\c at end of pattern\0"
+ "unrecognized character follows \\\0"
+ "numbers out of order in {} quantifier\0"
/* 5 */
- "number too big in {} quantifier",
- "missing terminating ] for character class",
- "invalid escape sequence in character class",
- "range out of order in character class",
- "nothing to repeat",
+ "number too big in {} quantifier\0"
+ "missing terminating ] for character class\0"
+ "invalid escape sequence in character class\0"
+ "range out of order in character class\0"
+ "nothing to repeat\0"
/* 10 */
- "operand of unlimited repeat could match the empty string", /** DEAD **/
- "internal error: unexpected repeat",
- "unrecognized character after (?",
- "POSIX named classes are supported only within a class",
- "missing )",
+ "operand of unlimited repeat could match the empty string\0" /** DEAD **/
+ "internal error: unexpected repeat\0"
+ "unrecognized character after (?\0"
+ "POSIX named classes are supported only within a class\0"
+ "missing )\0"
/* 15 */
- "reference to non-existent subpattern",
- "erroffset passed as NULL",
- "unknown option bit(s) set",
- "missing ) after comment",
- "parentheses nested too deeply", /** DEAD **/
+ "reference to non-existent subpattern\0"
+ "erroffset passed as NULL\0"
+ "unknown option bit(s) set\0"
+ "missing ) after comment\0"
+ "parentheses nested too deeply\0" /** DEAD **/
/* 20 */
- "regular expression is too large",
- "failed to get memory",
- "unmatched parentheses",
- "internal error: code overflow",
- "unrecognized character after (?<",
+ "regular expression is too large\0"
+ "failed to get memory\0"
+ "unmatched parentheses\0"
+ "internal error: code overflow\0"
+ "unrecognized character after (?<\0"
/* 25 */
- "lookbehind assertion is not fixed length",
- "malformed number or name after (?(",
- "conditional group contains more than two branches",
- "assertion expected after (?(",
- "(?R or (?[+-]digits must be followed by )",
+ "lookbehind assertion is not fixed length\0"
+ "malformed number or name after (?(\0"
+ "conditional group contains more than two branches\0"
+ "assertion expected after (?(\0"
+ "(?R or (?[+-]digits must be followed by )\0"
/* 30 */
- "unknown POSIX class name",
- "POSIX collating elements are not supported",
- "this version of PCRE is not compiled with PCRE_UTF8 support",
- "spare error", /** DEAD **/
- "character value in \\x{...} sequence is too large",
+ "unknown POSIX class name\0"
+ "POSIX collating elements are not supported\0"
+ "this version of PCRE is not compiled with PCRE_UTF8 support\0"
+ "spare error\0" /** DEAD **/
+ "character value in \\x{...} sequence is too large\0"
/* 35 */
- "invalid condition (?(0)",
- "\\C not allowed in lookbehind assertion",
- "PCRE does not support \\L, \\l, \\N, \\U, or \\u",
- "number after (?C is > 255",
- "closing ) for (?C expected",
+ "invalid condition (?(0)\0"
+ "\\C not allowed in lookbehind assertion\0"
+ "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0"
+ "number after (?C is > 255\0"
+ "closing ) for (?C expected\0"
/* 40 */
- "recursive call could loop indefinitely",
- "unrecognized character after (?P",
- "syntax error in subpattern name (missing terminator)",
- "two named subpatterns have the same name",
- "invalid UTF-8 string",
+ "recursive call could loop indefinitely\0"
+ "unrecognized character after (?P\0"
+ "syntax error in subpattern name (missing terminator)\0"
+ "two named subpatterns have the same name\0"
+ "invalid UTF-8 string\0"
/* 45 */
- "support for \\P, \\p, and \\X has not been compiled",
- "malformed \\P or \\p sequence",
- "unknown property name after \\P or \\p",
- "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)",
- "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")",
+ "support for \\P, \\p, and \\X has not been compiled\0"
+ "malformed \\P or \\p sequence\0"
+ "unknown property name after \\P or \\p\0"
+ "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
+ "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
/* 50 */
- "repeated subpattern is too long", /** DEAD **/
- "octal value is greater than \\377 (not in UTF-8 mode)",
- "internal error: overran compiling workspace",
- "internal error: previously-checked referenced subpattern not found",
- "DEFINE group contains more than one branch",
+ "repeated subpattern is too long\0" /** DEAD **/
+ "octal value is greater than \\377 (not in UTF-8 mode)\0"
+ "internal error: overran compiling workspace\0"
+ "internal error: previously-checked referenced subpattern not found\0"
+ "DEFINE group contains more than one branch\0"
/* 55 */
- "repeating a DEFINE group is not allowed",
- "inconsistent NEWLINE options",
- "\\g is not followed by a braced name or an optionally braced non-zero number",
- "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number",
- "(*VERB) with an argument is not supported",
+ "repeating a DEFINE group is not allowed\0"
+ "inconsistent NEWLINE options\0"
+ "\\g is not followed by a braced name or an optionally braced non-zero number\0"
+ "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0"
+ "(*VERB) with an argument is not supported\0"
/* 60 */
- "(*VERB) not recognized",
- "number is too big"
-};
+ "(*VERB) not recognized\0"
+ "number is too big";
/* Table to identify digits and hex digits. This is used when compiling
@@ -420,6 +436,28 @@ static BOOL
/*************************************************
+* Find an error text *
+*************************************************/
+
+/* The error texts are now all in one long string, to save on relocations. As
+some of the text is of unknown length, we can't use a table of offsets.
+Instead, just count through the strings. This is not a performance issue
+because it happens only when there has been a compilation error.
+
+Argument: the error number
+Returns: pointer to the error string
+*/
+
+static const char *
+find_error_text(int n)
+{
+const char *s = error_texts;
+for (; n > 0; n--) while (*s++ != 0);
+return s;
+}
+
+
+/*************************************************
* Handle escapes *
*************************************************/
@@ -776,7 +814,7 @@ top = _pcre_utt_size;
while (bot < top)
{
i = (bot + top) >> 1;
- c = strcmp(name, _pcre_utt[i].name);
+ c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);
if (c == 0)
{
*dptr = _pcre_utt[i].value;
@@ -1733,11 +1771,13 @@ Returns: a value representing the name, or -1 if unknown
static int
check_posix_name(const uschar *ptr, int len)
{
+const char *pn = posix_names;
register int yield = 0;
while (posix_name_lengths[yield] != 0)
{
if (len == posix_name_lengths[yield] &&
- strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;
+ strncmp((const char *)ptr, pn, len) == 0) return yield;
+ pn += posix_name_lengths[yield] + 1;
yield++;
}
return -1;
@@ -4024,6 +4064,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
{
int i, namelen;
+ const char *vn = verbnames;
const uschar *name = ++ptr;
previous = NULL;
while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
@@ -4041,12 +4082,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
for (i = 0; i < verbcount; i++)
{
if (namelen == verbs[i].len &&
- strncmp((char *)name, verbs[i].name, namelen) == 0)
+ strncmp((char *)name, vn, namelen) == 0)
{
*code = verbs[i].op;
if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
break;
}
+ vn += verbs[i].len + 1;
}
if (i < verbcount) continue;
*errorcodeptr = ERR60;
@@ -6005,7 +6047,7 @@ if (errorcode != 0)
PCRE_EARLY_ERROR_RETURN:
*erroroffset = ptr - (const uschar *)pattern;
PCRE_EARLY_ERROR_RETURN2:
- *errorptr = error_texts[errorcode];
+ *errorptr = find_error_text(errorcode);
if (errorcodeptr != NULL) *errorcodeptr = errorcode;
return NULL;
}
@@ -6090,7 +6132,7 @@ was compiled can be seen. */
if (code - codestart > length)
{
(pcre_free)(re);
- *errorptr = error_texts[ERR23];
+ *errorptr = find_error_text(ERR23);
*erroroffset = ptr - (uschar *)pattern;
if (errorcodeptr != NULL) *errorcodeptr = ERR23;
return NULL;
diff --git a/pcre_internal.h b/pcre_internal.h
index 775e03f..b039900 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1064,10 +1064,12 @@ total length. */
#define tables_length (ctypes_offset + 256)
/* Layout of the UCP type table that translates property names into types and
-codes. */
+codes. Each entry used to point directly to a name, but to reduce the number of
+relocations in shared libraries, it now has an offset into a single string
+instead. */
typedef struct {
- const char *name;
+ pcre_uint16 name_offset;
pcre_uint16 type;
pcre_uint16 value;
} ucp_type_table;
@@ -1085,6 +1087,7 @@ extern const uschar _pcre_utf8_table4[];
extern const int _pcre_utf8_table1_size;
+extern const char _pcre_utt_names[];
extern const ucp_type_table _pcre_utt[];
extern const int _pcre_utt_size;
diff --git a/pcre_printint.src b/pcre_printint.src
index 90381ed..d51cbe0 100644
--- a/pcre_printint.src
+++ b/pcre_printint.src
@@ -126,7 +126,7 @@ for (i = _pcre_utt_size - 1; i >= 0; i--)
{
if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
}
-return (i >= 0)? _pcre_utt[i].name : "??";
+return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
#else
/* It gets harder and harder to shut off unwanted compiler warnings. */
ptype = ptype * pvalue;
diff --git a/pcre_tables.c b/pcre_tables.c
index 0d060c2..6c4a60d 100644
--- a/pcre_tables.c
+++ b/pcre_tables.c
@@ -87,115 +87,228 @@ const uschar _pcre_utf8_table4[] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
-/* This table translates Unicode property names into type and code values. It
-is searched by binary chop, so must be in collating sequence of name. */
+/* The pcre_utt[] table below translates Unicode property names into type and
+code values. It is searched by binary chop, so must be in collating sequence of
+name. Originally, the table contained pointers to the name strings in the first
+field of each entry. However, that leads to a large number of relocations when
+a shared library is dynamically loaded. A significant reduction is made by
+putting all the names into a single, large string and then using offsets in the
+table itself. Maintenance is more error-prone, but frequent changes to this
+data is unlikely. */
+
+const char _pcre_utt_names[] =
+ "Any\0"
+ "Arabic\0"
+ "Armenian\0"
+ "Balinese\0"
+ "Bengali\0"
+ "Bopomofo\0"
+ "Braille\0"
+ "Buginese\0"
+ "Buhid\0"
+ "C\0"
+ "Canadian_Aboriginal\0"
+ "Cc\0"
+ "Cf\0"
+ "Cherokee\0"
+ "Cn\0"
+ "Co\0"
+ "Common\0"
+ "Coptic\0"
+ "Cs\0"
+ "Cuneiform\0"
+ "Cypriot\0"
+ "Cyrillic\0"
+ "Deseret\0"
+ "Devanagari\0"
+ "Ethiopic\0"
+ "Georgian\0"
+ "Glagolitic\0"
+ "Gothic\0"
+ "Greek\0"
+ "Gujarati\0"
+ "Gurmukhi\0"
+ "Han\0"
+ "Hangul\0"
+ "Hanunoo\0"
+ "Hebrew\0"
+ "Hiragana\0"
+ "Inherited\0"
+ "Kannada\0"
+ "Katakana\0"
+ "Kharoshthi\0"
+ "Khmer\0"
+ "L\0"
+ "L&\0"
+ "Lao\0"
+ "Latin\0"
+ "Limbu\0"
+ "Linear_B\0"
+ "Ll\0"
+ "Lm\0"
+ "Lo\0"
+ "Lt\0"
+ "Lu\0"
+ "M\0"
+ "Malayalam\0"
+ "Mc\0"
+ "Me\0"
+ "Mn\0"
+ "Mongolian\0"
+ "Myanmar\0"
+ "N\0"
+ "Nd\0"
+ "New_Tai_Lue\0"
+ "Nko\0"
+ "Nl\0"
+ "No\0"
+ "Ogham\0"
+ "Old_Italic\0"
+ "Old_Persian\0"
+ "Oriya\0"
+ "Osmanya\0"
+ "P\0"
+ "Pc\0"
+ "Pd\0"
+ "Pe\0"
+ "Pf\0"
+ "Phags_Pa\0"
+ "Phoenician\0"
+ "Pi\0"
+ "Po\0"
+ "Ps\0"
+ "Runic\0"
+ "S\0"
+ "Sc\0"
+ "Shavian\0"
+ "Sinhala\0"
+ "Sk\0"
+ "Sm\0"
+ "So\0"
+ "Syloti_Nagri\0"
+ "Syriac\0"
+ "Tagalog\0"
+ "Tagbanwa\0"
+ "Tai_Le\0"
+ "Tamil\0"
+ "Telugu\0"
+ "Thaana\0"
+ "Thai\0"
+ "Tibetan\0"
+ "Tifinagh\0"
+ "Ugaritic\0"
+ "Yi\0"
+ "Z\0"
+ "Zl\0"
+ "Zp\0"
+ "Zs\0";
const ucp_type_table _pcre_utt[] = {
- { "Any", PT_ANY, 0 },
- { "Arabic", PT_SC, ucp_Arabic },
- { "Armenian", PT_SC, ucp_Armenian },
- { "Balinese", PT_SC, ucp_Balinese },
- { "Bengali", PT_SC, ucp_Bengali },
- { "Bopomofo", PT_SC, ucp_Bopomofo },
- { "Braille", PT_SC, ucp_Braille },
- { "Buginese", PT_SC, ucp_Buginese },
- { "Buhid", PT_SC, ucp_Buhid },
- { "C", PT_GC, ucp_C },
- { "Canadian_Aboriginal", PT_SC, ucp_Canadian_Aboriginal },
- { "Cc", PT_PC, ucp_Cc },
- { "Cf", PT_PC, ucp_Cf },
- { "Cherokee", PT_SC, ucp_Cherokee },
- { "Cn", PT_PC, ucp_Cn },
- { "Co", PT_PC, ucp_Co },
- { "Common", PT_SC, ucp_Common },
- { "Coptic", PT_SC, ucp_Coptic },
- { "Cs", PT_PC, ucp_Cs },
- { "Cuneiform", PT_SC, ucp_Cuneiform },
- { "Cypriot", PT_SC, ucp_Cypriot },
- { "Cyrillic", PT_SC, ucp_Cyrillic },
- { "Deseret", PT_SC, ucp_Deseret },
- { "Devanagari", PT_SC, ucp_Devanagari },
- { "Ethiopic", PT_SC, ucp_Ethiopic },
- { "Georgian", PT_SC, ucp_Georgian },
- { "Glagolitic", PT_SC, ucp_Glagolitic },
- { "Gothic", PT_SC, ucp_Gothic },
- { "Greek", PT_SC, ucp_Greek },
- { "Gujarati", PT_SC, ucp_Gujarati },
- { "Gurmukhi", PT_SC, ucp_Gurmukhi },
- { "Han", PT_SC, ucp_Han },
- { "Hangul", PT_SC, ucp_Hangul },
- { "Hanunoo", PT_SC, ucp_Hanunoo },
- { "Hebrew", PT_SC, ucp_Hebrew },
- { "Hiragana", PT_SC, ucp_Hiragana },
- { "Inherited", PT_SC, ucp_Inherited },
- { "Kannada", PT_SC, ucp_Kannada },
- { "Katakana", PT_SC, ucp_Katakana },
- { "Kharoshthi", PT_SC, ucp_Kharoshthi },
- { "Khmer", PT_SC, ucp_Khmer },
- { "L", PT_GC, ucp_L },
- { "L&", PT_LAMP, 0 },
- { "Lao", PT_SC, ucp_Lao },
- { "Latin", PT_SC, ucp_Latin },
- { "Limbu", PT_SC, ucp_Limbu },
- { "Linear_B", PT_SC, ucp_Linear_B },
- { "Ll", PT_PC, ucp_Ll },
- { "Lm", PT_PC, ucp_Lm },
- { "Lo", PT_PC, ucp_Lo },
- { "Lt", PT_PC, ucp_Lt },
- { "Lu", PT_PC, ucp_Lu },
- { "M", PT_GC, ucp_M },
- { "Malayalam", PT_SC, ucp_Malayalam },
- { "Mc", PT_PC, ucp_Mc },
- { "Me", PT_PC, ucp_Me },
- { "Mn", PT_PC, ucp_Mn },
- { "Mongolian", PT_SC, ucp_Mongolian },
- { "Myanmar", PT_SC, ucp_Myanmar },
- { "N", PT_GC, ucp_N },
- { "Nd", PT_PC, ucp_Nd },
- { "New_Tai_Lue", PT_SC, ucp_New_Tai_Lue },
- { "Nko", PT_SC, ucp_Nko },
- { "Nl", PT_PC, ucp_Nl },
- { "No", PT_PC, ucp_No },
- { "Ogham", PT_SC, ucp_Ogham },
- { "Old_Italic", PT_SC, ucp_Old_Italic },
- { "Old_Persian", PT_SC, ucp_Old_Persian },
- { "Oriya", PT_SC, ucp_Oriya },
- { "Osmanya", PT_SC, ucp_Osmanya },
- { "P", PT_GC, ucp_P },
- { "Pc", PT_PC, ucp_Pc },
- { "Pd", PT_PC, ucp_Pd },
- { "Pe", PT_PC, ucp_Pe },
- { "Pf", PT_PC, ucp_Pf },
- { "Phags_Pa", PT_SC, ucp_Phags_Pa },
- { "Phoenician", PT_SC, ucp_Phoenician },
- { "Pi", PT_PC, ucp_Pi },
- { "Po", PT_PC, ucp_Po },
- { "Ps", PT_PC, ucp_Ps },
- { "Runic", PT_SC, ucp_Runic },
- { "S", PT_GC, ucp_S },
- { "Sc", PT_PC, ucp_Sc },
- { "Shavian", PT_SC, ucp_Shavian },
- { "Sinhala", PT_SC, ucp_Sinhala },
- { "Sk", PT_PC, ucp_Sk },
- { "Sm", PT_PC, ucp_Sm },
- { "So", PT_PC, ucp_So },
- { "Syloti_Nagri", PT_SC, ucp_Syloti_Nagri },
- { "Syriac", PT_SC, ucp_Syriac },
- { "Tagalog", PT_SC, ucp_Tagalog },
- { "Tagbanwa", PT_SC, ucp_Tagbanwa },
- { "Tai_Le", PT_SC, ucp_Tai_Le },
- { "Tamil", PT_SC, ucp_Tamil },
- { "Telugu", PT_SC, ucp_Telugu },
- { "Thaana", PT_SC, ucp_Thaana },
- { "Thai", PT_SC, ucp_Thai },
- { "Tibetan", PT_SC, ucp_Tibetan },
- { "Tifinagh", PT_SC, ucp_Tifinagh },
- { "Ugaritic", PT_SC, ucp_Ugaritic },
- { "Yi", PT_SC, ucp_Yi },
- { "Z", PT_GC, ucp_Z },
- { "Zl", PT_PC, ucp_Zl },
- { "Zp", PT_PC, ucp_Zp },
- { "Zs", PT_PC, ucp_Zs }
+ { 0, PT_ANY, 0 },
+ { 4, PT_SC, ucp_Arabic },
+ { 11, PT_SC, ucp_Armenian },
+ { 20, PT_SC, ucp_Balinese },
+ { 29, PT_SC, ucp_Bengali },
+ { 37, PT_SC, ucp_Bopomofo },
+ { 46, PT_SC, ucp_Braille },
+ { 54, PT_SC, ucp_Buginese },
+ { 63, PT_SC, ucp_Buhid },
+ { 69, PT_GC, ucp_C },
+ { 71, PT_SC, ucp_Canadian_Aboriginal },
+ { 91, PT_PC, ucp_Cc },
+ { 94, PT_PC, ucp_Cf },
+ { 97, PT_SC, ucp_Cherokee },
+ { 106, PT_PC, ucp_Cn },
+ { 109, PT_PC, ucp_Co },
+ { 112, PT_SC, ucp_Common },
+ { 119, PT_SC, ucp_Coptic },
+ { 126, PT_PC, ucp_Cs },
+ { 129, PT_SC, ucp_Cuneiform },
+ { 139, PT_SC, ucp_Cypriot },
+ { 147, PT_SC, ucp_Cyrillic },
+ { 156, PT_SC, ucp_Deseret },
+ { 164, PT_SC, ucp_Devanagari },
+ { 175, PT_SC, ucp_Ethiopic },
+ { 184, PT_SC, ucp_Georgian },
+ { 193, PT_SC, ucp_Glagolitic },
+ { 204, PT_SC, ucp_Gothic },
+ { 211, PT_SC, ucp_Greek },
+ { 217, PT_SC, ucp_Gujarati },
+ { 226, PT_SC, ucp_Gurmukhi },
+ { 235, PT_SC, ucp_Han },
+ { 239, PT_SC, ucp_Hangul },
+ { 246, PT_SC, ucp_Hanunoo },
+ { 254, PT_SC, ucp_Hebrew },
+ { 261, PT_SC, ucp_Hiragana },
+ { 270, PT_SC, ucp_Inherited },
+ { 280, PT_SC, ucp_Kannada },
+ { 288, PT_SC, ucp_Katakana },
+ { 297, PT_SC, ucp_Kharoshthi },
+ { 308, PT_SC, ucp_Khmer },
+ { 314, PT_GC, ucp_L },
+ { 316, PT_LAMP, 0 },
+ { 319, PT_SC, ucp_Lao },
+ { 323, PT_SC, ucp_Latin },
+ { 329, PT_SC, ucp_Limbu },
+ { 335, PT_SC, ucp_Linear_B },
+ { 344, PT_PC, ucp_Ll },
+ { 347, PT_PC, ucp_Lm },
+ { 350, PT_PC, ucp_Lo },
+ { 353, PT_PC, ucp_Lt },
+ { 356, PT_PC, ucp_Lu },
+ { 359, PT_GC, ucp_M },
+ { 361, PT_SC, ucp_Malayalam },
+ { 371, PT_PC, ucp_Mc },
+ { 374, PT_PC, ucp_Me },
+ { 377, PT_PC, ucp_Mn },
+ { 380, PT_SC, ucp_Mongolian },
+ { 390, PT_SC, ucp_Myanmar },
+ { 398, PT_GC, ucp_N },
+ { 400, PT_PC, ucp_Nd },
+ { 403, PT_SC, ucp_New_Tai_Lue },
+ { 415, PT_SC, ucp_Nko },
+ { 419, PT_PC, ucp_Nl },
+ { 422, PT_PC, ucp_No },
+ { 425, PT_SC, ucp_Ogham },
+ { 431, PT_SC, ucp_Old_Italic },
+ { 442, PT_SC, ucp_Old_Persian },
+ { 454, PT_SC, ucp_Oriya },
+ { 460, PT_SC, ucp_Osmanya },
+ { 468, PT_GC, ucp_P },
+ { 470, PT_PC, ucp_Pc },
+ { 473, PT_PC, ucp_Pd },
+ { 476, PT_PC, ucp_Pe },
+ { 479, PT_PC, ucp_Pf },
+ { 482, PT_SC, ucp_Phags_Pa },
+ { 491, PT_SC, ucp_Phoenician },
+ { 502, PT_PC, ucp_Pi },
+ { 505, PT_PC, ucp_Po },
+ { 508, PT_PC, ucp_Ps },
+ { 511, PT_SC, ucp_Runic },
+ { 517, PT_GC, ucp_S },
+ { 519, PT_PC, ucp_Sc },
+ { 522, PT_SC, ucp_Shavian },
+ { 530, PT_SC, ucp_Sinhala },
+ { 538, PT_PC, ucp_Sk },
+ { 541, PT_PC, ucp_Sm },
+ { 544, PT_PC, ucp_So },
+ { 547, PT_SC, ucp_Syloti_Nagri },
+ { 560, PT_SC, ucp_Syriac },
+ { 567, PT_SC, ucp_Tagalog },
+ { 575, PT_SC, ucp_Tagbanwa },
+ { 584, PT_SC, ucp_Tai_Le },
+ { 591, PT_SC, ucp_Tamil },
+ { 597, PT_SC, ucp_Telugu },
+ { 604, PT_SC, ucp_Thaana },
+ { 611, PT_SC, ucp_Thai },
+ { 616, PT_SC, ucp_Tibetan },
+ { 624, PT_SC, ucp_Tifinagh },
+ { 633, PT_SC, ucp_Ugaritic },
+ { 642, PT_SC, ucp_Yi },
+ { 645, PT_GC, ucp_Z },
+ { 647, PT_PC, ucp_Zl },
+ { 650, PT_PC, ucp_Zp },
+ { 653, PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
diff --git a/pcretest.c b/pcretest.c
index cb9e077..f7a7e21 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -94,6 +94,7 @@ symbols to prevent clashes. */
#define _pcre_utf8_table4 utf8_table4
#define _pcre_utt utt
#define _pcre_utt_size utt_size
+#define _pcre_utt_names utt_names
#define _pcre_OP_lengths OP_lengths
#include "pcre_tables.c"