summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-02-27 15:41:22 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-02-27 15:41:22 +0000
commitdba7b5b11ce2b26cc1747f58910da27dd15a72b3 (patch)
tree5cc0a551e1f9c7110a14acb09c2c8395f7fe3929
parent93810e7565ae4917ac8d23dcb2e5f8aed2999c24 (diff)
downloadpcre-dba7b5b11ce2b26cc1747f58910da27dd15a72b3.tar.gz
Add \p{Xuc} to match characters identifiable by Universal Character Names.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1260 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog4
-rw-r--r--doc/pcrepattern.316
-rw-r--r--doc/pcresyntax.38
-rwxr-xr-xmaint/GenerateUtt.py28
-rw-r--r--pcre_dfa_exec.c24
-rw-r--r--pcre_exec.c64
-rw-r--r--pcre_internal.h1
-rw-r--r--pcre_tables.c15
-rw-r--r--pcre_xclass.c16
-rw-r--r--testdata/testinput1064
-rw-r--r--testdata/testinput766
-rw-r--r--testdata/testoutput10125
-rw-r--r--testdata/testoutput7108
13 files changed, 512 insertions, 27 deletions
diff --git a/ChangeLog b/ChangeLog
index d97b3de..b100623 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -75,6 +75,10 @@ Version 8.33 xx-xxxx-201x
19. Redefined some pcre_uchar variables in pcre_exec.c as pcre_uint32; this
gives some modest performance improvement in 8-bit mode.
+
+20. Added the PCRE-specific property \p{Xuc} for matching characters that can
+ be expressed in certain programming languages using Universal Character
+ Names.
Version 8.32 30-November-2012
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index 08859c6..9b3e471 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -1,4 +1,4 @@
-.TH PCREPATTERN 3 "23 February 2013" "PCRE 8.33"
+.TH PCREPATTERN 3 "27 February 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION DETAILS"
@@ -863,7 +863,8 @@ the "mark" property always have the "extend" grapheme breaking property.
As well as the standard Unicode properties described above, PCRE supports four
more that make it possible to convert traditional escape sequences such as \ew
and \es and POSIX character classes to use Unicode properties. PCRE uses these
-non-standard, non-Perl properties internally when PCRE_UCP is set. They are:
+non-standard, non-Perl properties internally when PCRE_UCP is set. However,
+they may also be used explicitly. These properties are:
.sp
Xan Any alphanumeric character
Xps Any POSIX space character
@@ -875,6 +876,15 @@ property. Xps matches the characters tab, linefeed, vertical tab, form feed, or
carriage return, and any other character that has the Z (separator) property.
Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the
same characters as Xan, plus underscore.
+.P
+There is another non-standard property, Xuc, which matches any character that
+can be represented by a Universal Character Name in C++ and other programming
+languages. These are the characters $, @, ` (grave accent), and all characters
+with Unicode code points greater than or equal to U+00A0, except for the
+surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are
+excluded. (Universal Character Names are of the form \euHHHH or \eUHHHHHHHH
+where H is a hexadecimal digit. Note that the Xuc property does not match these
+sequences but the characters that they represent.)
.
.
.\" HTML <a name="resetmatchstart"></a>
@@ -2979,6 +2989,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 23 February 2013
+Last updated: 27 February 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
diff --git a/doc/pcresyntax.3 b/doc/pcresyntax.3
index 868f427..fb229d3 100644
--- a/doc/pcresyntax.3
+++ b/doc/pcresyntax.3
@@ -1,4 +1,4 @@
-.TH PCRESYNTAX 3 "11 November 2012" "PCRE 8.32"
+.TH PCRESYNTAX 3 "27 February 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY"
@@ -116,6 +116,8 @@ PCRE_UCP option.
Xan Alphanumeric: union of properties L and N
Xps POSIX space: property Z or tab, NL, VT, FF, CR
Xsp Perl space: property Z or tab, NL, FF, CR
+ Xuc Univerally-named character: one that can be
+ represented by a Universal Character Name
Xwd Perl word: property Xan or underscore
.
.
@@ -491,6 +493,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 11 November 2012
-Copyright (c) 1997-2012 University of Cambridge.
+Last updated: 27 February 2013
+Copyright (c) 1997-2013 University of Cambridge.
.fi
diff --git a/maint/GenerateUtt.py b/maint/GenerateUtt.py
index 92adf47..cd69bb1 100755
--- a/maint/GenerateUtt.py
+++ b/maint/GenerateUtt.py
@@ -1,6 +1,8 @@
#! /usr/bin/python
-# Generate utt tables.
+# Generate utt tables. Note: this script is written in Python 2 and is
+# incompatible with Python 3. However, the 2to3 conversion script has been
+# successfully tested on it.
# The source file pcre_tables.c contains (amongst other things), a table that
# is indexed by script name. In order to reduce the number of relocations when
@@ -18,6 +20,7 @@
# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0
# Modified by ChPe 30-September-2012 to add this note; no other changes were
# necessary for Unicode 6.2.0 support.
+# Modfied by PH 26-February-2013 to add the Xuc special category.
script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
@@ -60,6 +63,7 @@ utt_table.append(('L&', 'PT_LAMP'))
utt_table.append(('Xan', 'PT_ALNUM'))
utt_table.append(('Xps', 'PT_PXSPACE'))
utt_table.append(('Xsp', 'PT_SPACE'))
+utt_table.append(('Xuc', 'PT_UCNC'))
utt_table.append(('Xwd', 'PT_WORD'))
# Sort the table.
@@ -86,8 +90,8 @@ print ''
print 'const char PRIV(utt_names)[] =';
last = ''
for utt in utt_table:
- if utt == utt_table[-1]:
- last = ';'
+ if utt == utt_table[-1]:
+ last = ';'
print ' STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last)
# This was how it was done before the EBCDIC-compatible modification.
# print ' "%s\\0"%s' % (utt[0], last)
@@ -96,13 +100,13 @@ print '\nconst ucp_type_table PRIV(utt)[] = {'
offset = 0
last = ','
for utt in utt_table:
- if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
- 'PT_SPACE', 'PT_WORD'):
- value = '0'
- else:
- value = 'ucp_' + utt[0]
- if utt == utt_table[-1]:
- last = ''
- print ' { %3d, %s, %s }%s' % (offset, utt[1], value, last)
- offset += len(utt[0]) + 1
+ if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
+ 'PT_SPACE', 'PT_UCNC', 'PT_WORD'):
+ value = '0'
+ else:
+ value = 'ucp_' + utt[0]
+ if utt == utt_table[-1]:
+ last = ''
+ print ' { %3d, %s, %s }%s' % (offset, utt[1], value, last)
+ offset += len(utt[0]) + 1
print '};'
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 79b7b9f..a99a850 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -1119,6 +1119,12 @@ for (;;)
if (c == *cp++) { OK = TRUE; break; }
}
break;
+
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1364,6 +1370,12 @@ for (;;)
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1602,6 +1614,12 @@ for (;;)
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1865,6 +1883,12 @@ for (;;)
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
diff --git a/pcre_exec.c b/pcre_exec.c
index 2be0fcb..1338927 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -308,7 +308,7 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
- RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
+ RM61, RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
/* These versions of the macros use the stack, as normal. There are debugging
versions and production versions. Note that the "rw" argument of RMATCH isn't
@@ -2628,6 +2628,13 @@ for (;;)
{ if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
}
break;
+
+ case PT_UCNC:
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == (op == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
/* This should never occur */
@@ -4246,6 +4253,22 @@ for (;;)
}
}
break;
+
+ case PT_UCNC:
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
/* This should not occur */
@@ -4992,6 +5015,25 @@ for (;;)
}
}
/* Control never gets here */
+
+ case PT_UCNC:
+ for (fi = min;; fi++)
+ {
+ RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ /* Control never gets here */
/* This should never occur */
default:
@@ -5487,6 +5529,24 @@ for (;;)
GOT_MAX:
break;
+ case PT_UCNC:
+ for (i = min; i < max; i++)
+ {
+ int len = 1;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ GETCHARLENTEST(c, eptr, len);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ break;
+ eptr += len;
+ }
+ break;
+
default:
RRETURN(PCRE_ERROR_INTERNAL);
}
@@ -6128,7 +6188,7 @@ switch (frame->Xwhere)
LBL(32) LBL(34) LBL(42) LBL(46)
#ifdef SUPPORT_UCP
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
- LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
+ LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
#endif /* SUPPORT_UCP */
#endif /* SUPPORT_UTF */
default:
diff --git a/pcre_internal.h b/pcre_internal.h
index 78c4030..31ecbc4 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1835,6 +1835,7 @@ only. */
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
#define PT_WORD 8 /* Word - L plus N plus underscore */
#define PT_CLIST 9 /* Pseudo-property: match character list */
+#define PT_UCNC 10 /* Universal Character nameable character */
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain characters with values greater than 255. */
diff --git a/pcre_tables.c b/pcre_tables.c
index 34ee048..f38ab52 100644
--- a/pcre_tables.c
+++ b/pcre_tables.c
@@ -346,6 +346,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Xan0 STR_X STR_a STR_n "\0"
#define STRING_Xps0 STR_X STR_p STR_s "\0"
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
+#define STRING_Xuc0 STR_X STR_u STR_c "\0"
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
@@ -493,6 +494,7 @@ const char PRIV(utt_names)[] =
STRING_Xan0
STRING_Xps0
STRING_Xsp0
+ STRING_Xuc0
STRING_Xwd0
STRING_Yi0
STRING_Z0
@@ -640,12 +642,13 @@ const ucp_type_table PRIV(utt)[] = {
{ 1011, PT_ALNUM, 0 },
{ 1015, PT_PXSPACE, 0 },
{ 1019, PT_SPACE, 0 },
- { 1023, PT_WORD, 0 },
- { 1027, PT_SC, ucp_Yi },
- { 1030, PT_GC, ucp_Z },
- { 1032, PT_PC, ucp_Zl },
- { 1035, PT_PC, ucp_Zp },
- { 1038, PT_PC, ucp_Zs }
+ { 1023, PT_UCNC, 0 },
+ { 1027, PT_WORD, 0 },
+ { 1031, PT_SC, ucp_Yi },
+ { 1034, PT_GC, ucp_Z },
+ { 1036, PT_PC, ucp_Zl },
+ { 1039, PT_PC, ucp_Zp },
+ { 1042, PT_PC, ucp_Zs }
};
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
diff --git a/pcre_xclass.c b/pcre_xclass.c
index fa73cd8..ddc2844 100644
--- a/pcre_xclass.c
+++ b/pcre_xclass.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -179,6 +179,20 @@ while ((t = *data++) != XCL_END)
== (t == XCL_PROP))
return !negated;
break;
+
+ case PT_UCNC:
+ if (c < 0xa0)
+ {
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT) == (t == XCL_PROP))
+ return !negated;
+ }
+ else
+ {
+ if ((c < 0xd800 || c > 0xdfff) == (t == XCL_PROP))
+ return !negated;
+ }
+ break;
/* This should never occur, but compilers may mutter if there is no
default. */
diff --git a/testdata/testinput10 b/testdata/testinput10
index f20dcb3..7f522e6 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -1334,4 +1334,68 @@
/is{2}t/8i
iskt
+/^\p{Xuc}/8
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+ ** Failers
+ abc
+
+/^\p{Xuc}+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}+?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}+?\*/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}++/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}{3,5}/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}{3,5}?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^[\p{Xuc}]/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^[\p{Xuc}]+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\P{Xuc}/8
+ abc
+ ** Failers
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+
+/^[\P{Xuc}]/8
+ abc
+ ** Failers
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+
/-- End of testinput10 --/
diff --git a/testdata/testinput7 b/testdata/testinput7
index b265f1f..24c00d2 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -668,5 +668,71 @@ of case for anything other than the ASCII letters. --/
/is{2}t/8i
iskt
+
+/-- This property is a PCRE special --/
+
+/^\p{Xuc}/8
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+ ** Failers
+ abc
+
+/^\p{Xuc}+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}+?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}+?\*/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}++/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}{3,5}/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}{3,5}?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^[\p{Xuc}]/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^[\p{Xuc}]+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\P{Xuc}/8
+ abc
+ ** Failers
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+
+/^[\P{Xuc}]/8
+ abc
+ ** Failers
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
/-- End of testinput7 --/
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 049d446..9ee0f76 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -2723,4 +2723,129 @@ No match
iskt
No match
+/^\p{Xuc}/8
+ $abc
+ 0: $
+ @abc
+ 0: @
+ `abc
+ 0: `
+ \x{1234}abc
+ 0: \x{1234}
+ ** Failers
+No match
+ abc
+No match
+
+/^\p{Xuc}+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ 1: $@`\x{a0}\x{1234}
+ 2: $@`\x{a0}
+ 3: $@`
+ 4: $@
+ 5: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}+?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ 1: $@`\x{a0}\x{1234}
+ 2: $@`\x{a0}
+ 3: $@`
+ 4: $@
+ 5: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}+?\*/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}*
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}++/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}{3,5}/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}
+ 1: $@`\x{a0}
+ 2: $@`
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}{3,5}?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}
+ 1: $@`\x{a0}
+ 2: $@`
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^[\p{Xuc}]/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^[\p{Xuc}]+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ 1: $@`\x{a0}\x{1234}
+ 2: $@`\x{a0}
+ 3: $@`
+ 4: $@
+ 5: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\P{Xuc}/8
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ $abc
+No match
+ @abc
+No match
+ `abc
+No match
+ \x{1234}abc
+No match
+
+/^[\P{Xuc}]/8
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ $abc
+No match
+ @abc
+No match
+ `abc
+No match
+ \x{1234}abc
+No match
+
/-- End of testinput10 --/
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 4f8b7b9..ddd96fc 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -1469,5 +1469,113 @@ No match
/is{2}t/8i
iskt
No match
+
+/-- This property is a PCRE special --/
+
+/^\p{Xuc}/8
+ $abc
+ 0: $
+ @abc
+ 0: @
+ `abc
+ 0: `
+ \x{1234}abc
+ 0: \x{1234}
+ ** Failers
+No match
+ abc
+No match
+
+/^\p{Xuc}+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}+?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}+?\*/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}*
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}++/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}{3,5}/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}{3,5}?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^[\p{Xuc}]/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^[\p{Xuc}]+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\P{Xuc}/8
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ $abc
+No match
+ @abc
+No match
+ `abc
+No match
+ \x{1234}abc
+No match
+
+/^[\P{Xuc}]/8
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ $abc
+No match
+ @abc
+No match
+ `abc
+No match
+ \x{1234}abc
+No match
/-- End of testinput7 --/