summaryrefslogtreecommitdiff
path: root/regnodes.h
diff options
context:
space:
mode:
authorLarry Wall <larry@wall.org>1998-07-24 05:44:33 +0000
committerLarry Wall <larry@wall.org>1998-07-24 05:44:33 +0000
commita0ed51b321531af4b47cce24205ab9656f043f0f (patch)
tree610356407b37a4041ea8bcaf44571579b2da5613 /regnodes.h
parent9332a1c1d80ded85a2b1f32b1c8968a35e3b0fbb (diff)
downloadperl-a0ed51b321531af4b47cce24205ab9656f043f0f.tar.gz
Here are the long-expected Unicode/UTF-8 modifications.
p4raw-id: //depot/utfperl@1651
Diffstat (limited to 'regnodes.h')
-rw-r--r--regnodes.h170
1 files changed, 121 insertions, 49 deletions
diff --git a/regnodes.h b/regnodes.h
index c494daed12..c64c4b7944 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -3,7 +3,7 @@
Any changes made here will be lost!
*/
-#define END 0 /* 0 End of program. */
+#define END 0 /* 0x0 End of program. */
#define SUCCEED 1 /* 0x1 Return from a subroutine, basically. */
#define BOL 2 /* 0x2 Match "" at beginning of line. */
#define MBOL 3 /* 0x3 Same, assuming multiline. */
@@ -13,54 +13,72 @@
#define MEOL 7 /* 0x7 Same, assuming multiline. */
#define SEOL 8 /* 0x8 Same, assuming singleline. */
#define BOUND 9 /* 0x9 Match "" at any word boundary */
-#define BOUNDL 10 /* 0xa Match "" at any word boundary */
-#define NBOUND 11 /* 0xb Match "" at any word non-boundary */
-#define NBOUNDL 12 /* 0xc Match "" at any word non-boundary */
-#define GPOS 13 /* 0xd Matches where last m//g left off. */
-#define ANY 14 /* 0xe Match any one character (except newline). */
-#define SANY 15 /* 0xf Match any one character. */
-#define ANYOF 16 /* 0x10 Match character in (or not in) this class. */
-#define ALNUM 17 /* 0x11 Match any alphanumeric character */
-#define ALNUML 18 /* 0x12 Match any alphanumeric char in locale */
-#define NALNUM 19 /* 0x13 Match any non-alphanumeric character */
-#define NALNUML 20 /* 0x14 Match any non-alphanumeric char in locale */
-#define SPACE 21 /* 0x15 Match any whitespace character */
-#define SPACEL 22 /* 0x16 Match any whitespace char in locale */
-#define NSPACE 23 /* 0x17 Match any non-whitespace character */
-#define NSPACEL 24 /* 0x18 Match any non-whitespace char in locale */
-#define DIGIT 25 /* 0x19 Match any numeric character */
-#define NDIGIT 26 /* 0x1a Match any non-numeric character */
-#define BRANCH 27 /* 0x1b Match this alternative, or the next... */
-#define BACK 28 /* 0x1c Match "", "next" ptr points backward. */
-#define EXACT 29 /* 0x1d Match this string (preceded by length). */
-#define EXACTF 30 /* 0x1e Match this string, folded (prec. by length). */
-#define EXACTFL 31 /* 0x1f Match this string, folded in locale (w/len). */
-#define NOTHING 32 /* 0x20 Match empty string. */
-#define TAIL 33 /* 0x21 Match empty string. Can jump here from outside. */
-#define STAR 34 /* 0x22 Match this (simple) thing 0 or more times. */
-#define PLUS 35 /* 0x23 Match this (simple) thing 1 or more times. */
-#define CURLY 36 /* 0x24 Match this simple thing {n,m} times. */
-#define CURLYN 37 /* 0x25 Match next-after-this simple thing */
-#define CURLYM 38 /* 0x26 Match this medium-complex thing {n,m} times. */
-#define CURLYX 39 /* 0x27 Match this complex thing {n,m} times. */
-#define WHILEM 40 /* 0x28 Do curly processing and see if rest matches. */
-#define OPEN 41 /* 0x29 Mark this point in input as start of #n. */
-#define CLOSE 42 /* 0x2a Analogous to OPEN. */
-#define REF 43 /* 0x2b Match some already matched string */
-#define REFF 44 /* 0x2c Match already matched string, folded */
-#define REFFL 45 /* 0x2d Match already matched string, folded in loc. */
-#define IFMATCH 46 /* 0x2e Succeeds if the following matches. */
-#define UNLESSM 47 /* 0x2f Fails if the following matches. */
-#define SUSPEND 48 /* 0x30 "Independent" sub-RE. */
-#define IFTHEN 49 /* 0x31 Switch, should be preceeded by switcher . */
-#define GROUPP 50 /* 0x32 Whether the group matched. */
-#define LONGJMP 51 /* 0x33 Jump far away. */
-#define BRANCHJ 52 /* 0x34 BRANCH with long offset. */
-#define EVAL 53 /* 0x35 Execute some Perl code. */
-#define MINMOD 54 /* 0x36 Next operator is not greedy. */
-#define LOGICAL 55 /* 0x37 Next opcode should set the flag only. */
-#define RENUM 56 /* 0x38 Group with independently numbered parens. */
-#define OPTIMIZED 57 /* 0x39 Placeholder for dump. */
+#define BOUNDUTF8 10 /* 0xa Match "" at any word boundary */
+#define BOUNDL 11 /* 0xb Match "" at any word boundary */
+#define BOUNDLUTF8 12 /* 0xc Match "" at any word boundary */
+#define NBOUND 13 /* 0xd Match "" at any word non-boundary */
+#define NBOUNDUTF8 14 /* 0xe Match "" at any word non-boundary */
+#define NBOUNDL 15 /* 0xf Match "" at any word non-boundary */
+#define NBOUNDLUTF8 16 /* 0x10 Match "" at any word non-boundary */
+#define GPOS 17 /* 0x11 Matches where last m//g left off. */
+#define ANY 18 /* 0x12 Match any one character (except newline). */
+#define ANYUTF8 19 /* 0x13 Match any one Unicode character (except newline). */
+#define SANY 20 /* 0x14 Match any one character. */
+#define SANYUTF8 21 /* 0x15 Match any one Unicode character. */
+#define ANYOF 22 /* 0x16 Match character in (or not in) this class. */
+#define ANYOFUTF8 23 /* 0x17 Match character in (or not in) this class. */
+#define ALNUM 24 /* 0x18 Match any alphanumeric character */
+#define ALNUMUTF8 25 /* 0x19 Match any alphanumeric character */
+#define ALNUML 26 /* 0x1a Match any alphanumeric char in locale */
+#define ALNUMLUTF8 27 /* 0x1b Match any alphanumeric char in locale */
+#define NALNUM 28 /* 0x1c Match any non-alphanumeric character */
+#define NALNUMUTF8 29 /* 0x1d Match any non-alphanumeric character */
+#define NALNUML 30 /* 0x1e Match any non-alphanumeric char in locale */
+#define NALNUMLUTF8 31 /* 0x1f Match any non-alphanumeric char in locale */
+#define SPACE 32 /* 0x20 Match any whitespace character */
+#define SPACEUTF8 33 /* 0x21 Match any whitespace character */
+#define SPACEL 34 /* 0x22 Match any whitespace char in locale */
+#define SPACELUTF8 35 /* 0x23 Match any whitespace char in locale */
+#define NSPACE 36 /* 0x24 Match any non-whitespace character */
+#define NSPACEUTF8 37 /* 0x25 Match any non-whitespace character */
+#define NSPACEL 38 /* 0x26 Match any non-whitespace char in locale */
+#define NSPACELUTF8 39 /* 0x27 Match any non-whitespace char in locale */
+#define DIGIT 40 /* 0x28 Match any numeric character */
+#define DIGITUTF8 41 /* 0x29 Match any numeric character */
+#define NDIGIT 42 /* 0x2a Match any non-numeric character */
+#define NDIGITUTF8 43 /* 0x2b Match any non-numeric character */
+#define CLUMP 44 /* 0x2c Match any combining character sequence */
+#define BRANCH 45 /* 0x2d Match this alternative, or the next... */
+#define BACK 46 /* 0x2e Match "", "next" ptr points backward. */
+#define EXACT 47 /* 0x2f Match this string (preceded by length). */
+#define EXACTF 48 /* 0x30 Match this string, folded (prec. by length). */
+#define EXACTFL 49 /* 0x31 Match this string, folded in locale (w/len). */
+#define NOTHING 50 /* 0x32 Match empty string. */
+#define TAIL 51 /* 0x33 Match empty string. Can jump here from outside. */
+#define STAR 52 /* 0x34 Match this (simple) thing 0 or more times. */
+#define PLUS 53 /* 0x35 Match this (simple) thing 1 or more times. */
+#define CURLY 54 /* 0x36 Match this simple thing {n,m} times. */
+#define CURLYN 55 /* 0x37 Match next-after-this simple thing */
+#define CURLYM 56 /* 0x38 Match this medium-complex thing {n,m} times. */
+#define CURLYX 57 /* 0x39 Match this complex thing {n,m} times. */
+#define WHILEM 58 /* 0x3a Do curly processing and see if rest matches. */
+#define OPEN 59 /* 0x3b Mark this point in input as start of #n. */
+#define CLOSE 60 /* 0x3c Analogous to OPEN. */
+#define REF 61 /* 0x3d Match some already matched string */
+#define REFF 62 /* 0x3e Match already matched string, folded */
+#define REFFL 63 /* 0x3f Match already matched string, folded in loc. */
+#define IFMATCH 64 /* 0x40 Succeeds if the following matches. */
+#define UNLESSM 65 /* 0x41 Fails if the following matches. */
+#define SUSPEND 66 /* 0x42 "Independent" sub-RE. */
+#define IFTHEN 67 /* 0x43 Switch, should be preceeded by switcher . */
+#define GROUPP 68 /* 0x44 Whether the group matched. */
+#define LONGJMP 69 /* 0x45 Jump far away. */
+#define BRANCHJ 70 /* 0x46 BRANCH with long offset. */
+#define EVAL 71 /* 0x47 Execute some Perl code. */
+#define MINMOD 72 /* 0x48 Next operator is not greedy. */
+#define LOGICAL 73 /* 0x49 Next opcode should set the flag only. */
+#define RENUM 74 /* 0x4a Group with independently numbered parens. */
+#define OPTIMIZED 75 /* 0x4b Placeholder for dump. */
#ifndef DOINIT
EXTCONST U8 regkind[];
@@ -76,23 +94,41 @@ EXTCONST U8 regkind[] = {
EOL, /* MEOL */
EOL, /* SEOL */
BOUND, /* BOUND */
+ BOUND, /* BOUNDUTF8 */
BOUND, /* BOUNDL */
+ BOUND, /* BOUNDLUTF8 */
NBOUND, /* NBOUND */
+ NBOUND, /* NBOUNDUTF8 */
NBOUND, /* NBOUNDL */
+ NBOUND, /* NBOUNDLUTF8 */
GPOS, /* GPOS */
ANY, /* ANY */
+ ANY, /* ANYUTF8 */
ANY, /* SANY */
+ ANY, /* SANYUTF8 */
ANYOF, /* ANYOF */
+ ANYOF, /* ANYOFUTF8 */
ALNUM, /* ALNUM */
+ ALNUM, /* ALNUMUTF8 */
ALNUM, /* ALNUML */
+ ALNUM, /* ALNUMLUTF8 */
NALNUM, /* NALNUM */
+ NALNUM, /* NALNUMUTF8 */
NALNUM, /* NALNUML */
+ NALNUM, /* NALNUMLUTF8 */
SPACE, /* SPACE */
+ SPACE, /* SPACEUTF8 */
SPACE, /* SPACEL */
+ SPACE, /* SPACELUTF8 */
NSPACE, /* NSPACE */
+ NSPACE, /* NSPACEUTF8 */
NSPACE, /* NSPACEL */
+ NSPACE, /* NSPACELUTF8 */
DIGIT, /* DIGIT */
+ DIGIT, /* DIGITUTF8 */
NDIGIT, /* NDIGIT */
+ NDIGIT, /* NDIGITUTF8 */
+ CLUMP, /* CLUMP */
BRANCH, /* BRANCH */
BACK, /* BACK */
EXACT, /* EXACT */
@@ -140,23 +176,41 @@ const static U8 regarglen[] = {
0, /* MEOL */
0, /* SEOL */
0, /* BOUND */
+ 0, /* BOUNDUTF8 */
0, /* BOUNDL */
+ 0, /* BOUNDLUTF8 */
0, /* NBOUND */
+ 0, /* NBOUNDUTF8 */
0, /* NBOUNDL */
+ 0, /* NBOUNDLUTF8 */
0, /* GPOS */
0, /* ANY */
+ 0, /* ANYUTF8 */
0, /* SANY */
+ 0, /* SANYUTF8 */
0, /* ANYOF */
+ EXTRA_SIZE(struct regnode_1), /* ANYOFUTF8 */
0, /* ALNUM */
+ 0, /* ALNUMUTF8 */
0, /* ALNUML */
+ 0, /* ALNUMLUTF8 */
0, /* NALNUM */
+ 0, /* NALNUMUTF8 */
0, /* NALNUML */
+ 0, /* NALNUMLUTF8 */
0, /* SPACE */
+ 0, /* SPACEUTF8 */
0, /* SPACEL */
+ 0, /* SPACELUTF8 */
0, /* NSPACE */
+ 0, /* NSPACEUTF8 */
0, /* NSPACEL */
+ 0, /* NSPACELUTF8 */
0, /* DIGIT */
+ 0, /* DIGITUTF8 */
0, /* NDIGIT */
+ 0, /* NDIGITUTF8 */
+ 0, /* CLUMP */
0, /* BRANCH */
0, /* BACK */
0, /* EXACT */
@@ -201,23 +255,41 @@ const static char reg_off_by_arg[] = {
0, /* MEOL */
0, /* SEOL */
0, /* BOUND */
+ 0, /* BOUNDUTF8 */
0, /* BOUNDL */
+ 0, /* BOUNDLUTF8 */
0, /* NBOUND */
+ 0, /* NBOUNDUTF8 */
0, /* NBOUNDL */
+ 0, /* NBOUNDLUTF8 */
0, /* GPOS */
0, /* ANY */
+ 0, /* ANYUTF8 */
0, /* SANY */
+ 0, /* SANYUTF8 */
0, /* ANYOF */
+ 0, /* ANYOFUTF8 */
0, /* ALNUM */
+ 0, /* ALNUMUTF8 */
0, /* ALNUML */
+ 0, /* ALNUMLUTF8 */
0, /* NALNUM */
+ 0, /* NALNUMUTF8 */
0, /* NALNUML */
+ 0, /* NALNUMLUTF8 */
0, /* SPACE */
+ 0, /* SPACEUTF8 */
0, /* SPACEL */
+ 0, /* SPACELUTF8 */
0, /* NSPACE */
+ 0, /* NSPACEUTF8 */
0, /* NSPACEL */
+ 0, /* NSPACELUTF8 */
0, /* DIGIT */
+ 0, /* DIGITUTF8 */
0, /* NDIGIT */
+ 0, /* NDIGITUTF8 */
+ 0, /* CLUMP */
0, /* BRANCH */
0, /* BACK */
0, /* EXACT */