summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-11-30 21:39:16 -0700
committerFather Chrysostomos <sprout@cpan.org>2010-12-01 18:10:21 -0800
commit7fcd3a28e7ffbe190def12c7e16a921175e2339c (patch)
tree2ece6b78813bf7019bf770824c8b60b174e03a0c
parent60c7e6729abcbf29933292741d6e80291f00a7c0 (diff)
downloadperl-7fcd3a28e7ffbe190def12c7e16a921175e2339c.tar.gz
regcomp.sym: Add REFFU and NREFFU nodes
These will be used for matching capture buffers case-insensitively using Unicode semantics. make regen will regenerate the delivered regnodes.h
-rw-r--r--regcomp.sym7
-rw-r--r--regnodes.h29
2 files changed, 27 insertions, 9 deletions
diff --git a/regcomp.sym b/regcomp.sym
index ab57929310..4e787a7bf3 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -194,6 +194,13 @@ NHORIZWS NHORIZWS, none 0 S ; not horizontal whitespace (Perl 6)
FOLDCHAR FOLDCHAR, codepoint 1 ; codepoint with tricky case folding properties.
EXACTFU EXACT, str ; Match this string, folded, Unicode semantics for non-utf8 (prec. by length).
+# These could have been implemented using the FLAGS field of the regnode, but
+# by having a separate node type, we can use the existing switch statement to
+# avoid some tests
+REFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8
+NREFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8
+
+
# NEW STUFF ABOVE THIS LINE
################################################################################
diff --git a/regnodes.h b/regnodes.h
index 97ac60766a..09ab661561 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -6,8 +6,8 @@
/* Regops and State definitions */
-#define REGNODE_MAX 91
-#define REGMATCH_STATE_MAX 131
+#define REGNODE_MAX 93
+#define REGMATCH_STATE_MAX 133
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
@@ -70,7 +70,7 @@
#define MINMOD 58 /* 0x3a Next operator is not greedy. */
#define LOGICAL 59 /* 0x3b Next opcode should set the flag only. */
#define RENUM 60 /* 0x3c Group with independently numbered parens. */
-#define TRIE 61 /* 0x3d Match many EXACT(FL?)? at once. flags==type */
+#define TRIE 61 /* 0x3d Match many EXACT(F[LU]?)? at once. flags==type */
#define TRIEC 62 /* 0x3e Same as TRIE, but with embedded charclass data */
#define AHOCORASICK 63 /* 0x3f Aho Corasick stclass. flags==type */
#define AHOCORASICKC 64 /* 0x40 Same as AHOCORASICK, but with embedded charclass data */
@@ -99,8 +99,10 @@
#define NHORIZWS 87 /* 0x57 not horizontal whitespace (Perl 6) */
#define FOLDCHAR 88 /* 0x58 codepoint with tricky case folding properties. */
#define EXACTFU 89 /* 0x59 Match this string, folded, Unicode semantics for non-utf8 (prec. by length). */
-#define OPTIMIZED 90 /* 0x5a Placeholder for dump. */
-#define PSEUDO 91 /* 0x5b Pseudo opcode for internal use. */
+#define REFFU 90 /* 0x5a Match already matched string, folded using unicode semantics for non-utf8 */
+#define NREFFU 91 /* 0x5b Match already matched string, folded using unicode semantics for non-utf8 */
+#define OPTIMIZED 92 /* 0x5c Placeholder for dump. */
+#define PSEUDO 93 /* 0x5d Pseudo opcode for internal use. */
/* ------------ States ------------- */
#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
@@ -239,6 +241,8 @@ EXTCONST U8 PL_regkind[] = {
NHORIZWS, /* NHORIZWS */
FOLDCHAR, /* FOLDCHAR */
EXACT, /* EXACTFU */
+ REF, /* REFFU */
+ REF, /* NREFFU */
NOTHING, /* OPTIMIZED */
PSEUDO, /* PSEUDO */
/* ------------ States ------------- */
@@ -379,6 +383,8 @@ static const U8 regarglen[] = {
0, /* NHORIZWS */
EXTRA_SIZE(struct regnode_1), /* FOLDCHAR */
0, /* EXACTFU */
+ EXTRA_SIZE(struct regnode_1), /* REFFU */
+ EXTRA_SIZE(struct regnode_1), /* NREFFU */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
@@ -476,6 +482,8 @@ static const char reg_off_by_arg[] = {
0, /* NHORIZWS */
0, /* FOLDCHAR */
0, /* EXACTFU */
+ 0, /* REFFU */
+ 0, /* NREFFU */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
@@ -578,8 +586,10 @@ EXTCONST char * const PL_reg_name[] = {
"NHORIZWS", /* 0x57 */
"FOLDCHAR", /* 0x58 */
"EXACTFU", /* 0x59 */
- "OPTIMIZED", /* 0x5a */
- "PSEUDO", /* 0x5b */
+ "REFFU", /* 0x5a */
+ "NREFFU", /* 0x5b */
+ "OPTIMIZED", /* 0x5c */
+ "PSEUDO", /* 0x5d */
/* ------------ States ------------- */
"TRIE_next", /* REGNODE_MAX +0x01 */
"TRIE_next_fail", /* REGNODE_MAX +0x02 */
@@ -674,7 +684,8 @@ EXTCONST U8 PL_varies[] __attribute__deprecated__;
#else
EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
CLUMP, BRANCH, BACK, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM,
- REF, REFF, REFFL, SUSPEND, IFTHEN, BRANCHJ, NREF, NREFF, NREFFL,
+ REF, REFF, REFFL, SUSPEND, IFTHEN, BRANCHJ, NREF, NREFF, NREFFL, REFFU,
+ NREFFU,
0
};
#endif /* DOINIT */
@@ -683,7 +694,7 @@ EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
EXTCONST U8 PL_varies_bitmask[];
#else
EXTCONST U8 PL_varies_bitmask[] = {
- 0x00, 0x00, 0x00, 0xC0, 0xC1, 0x9F, 0x33, 0x01, 0x38, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0x00, 0xC0, 0xC1, 0x9F, 0x33, 0x01, 0x38, 0x00, 0x00, 0x0C
};
#endif /* DOINIT */