diff options
author | Larry Wall <larry@wall.org> | 1998-07-24 05:44:33 +0000 |
---|---|---|
committer | Larry Wall <larry@wall.org> | 1998-07-24 05:44:33 +0000 |
commit | a0ed51b321531af4b47cce24205ab9656f043f0f (patch) | |
tree | 610356407b37a4041ea8bcaf44571579b2da5613 /regnodes.h | |
parent | 9332a1c1d80ded85a2b1f32b1c8968a35e3b0fbb (diff) | |
download | perl-a0ed51b321531af4b47cce24205ab9656f043f0f.tar.gz |
Here are the long-expected Unicode/UTF-8 modifications.
p4raw-id: //depot/utfperl@1651
Diffstat (limited to 'regnodes.h')
-rw-r--r-- | regnodes.h | 170 |
1 files changed, 121 insertions, 49 deletions
diff --git a/regnodes.h b/regnodes.h index c494daed12..c64c4b7944 100644 --- a/regnodes.h +++ b/regnodes.h @@ -3,7 +3,7 @@ Any changes made here will be lost! */ -#define END 0 /* 0 End of program. */ +#define END 0 /* 0x0 End of program. */ #define SUCCEED 1 /* 0x1 Return from a subroutine, basically. */ #define BOL 2 /* 0x2 Match "" at beginning of line. */ #define MBOL 3 /* 0x3 Same, assuming multiline. */ @@ -13,54 +13,72 @@ #define MEOL 7 /* 0x7 Same, assuming multiline. */ #define SEOL 8 /* 0x8 Same, assuming singleline. */ #define BOUND 9 /* 0x9 Match "" at any word boundary */ -#define BOUNDL 10 /* 0xa Match "" at any word boundary */ -#define NBOUND 11 /* 0xb Match "" at any word non-boundary */ -#define NBOUNDL 12 /* 0xc Match "" at any word non-boundary */ -#define GPOS 13 /* 0xd Matches where last m//g left off. */ -#define ANY 14 /* 0xe Match any one character (except newline). */ -#define SANY 15 /* 0xf Match any one character. */ -#define ANYOF 16 /* 0x10 Match character in (or not in) this class. */ -#define ALNUM 17 /* 0x11 Match any alphanumeric character */ -#define ALNUML 18 /* 0x12 Match any alphanumeric char in locale */ -#define NALNUM 19 /* 0x13 Match any non-alphanumeric character */ -#define NALNUML 20 /* 0x14 Match any non-alphanumeric char in locale */ -#define SPACE 21 /* 0x15 Match any whitespace character */ -#define SPACEL 22 /* 0x16 Match any whitespace char in locale */ -#define NSPACE 23 /* 0x17 Match any non-whitespace character */ -#define NSPACEL 24 /* 0x18 Match any non-whitespace char in locale */ -#define DIGIT 25 /* 0x19 Match any numeric character */ -#define NDIGIT 26 /* 0x1a Match any non-numeric character */ -#define BRANCH 27 /* 0x1b Match this alternative, or the next... */ -#define BACK 28 /* 0x1c Match "", "next" ptr points backward. */ -#define EXACT 29 /* 0x1d Match this string (preceded by length). */ -#define EXACTF 30 /* 0x1e Match this string, folded (prec. by length). */ -#define EXACTFL 31 /* 0x1f Match this string, folded in locale (w/len). */ -#define NOTHING 32 /* 0x20 Match empty string. */ -#define TAIL 33 /* 0x21 Match empty string. Can jump here from outside. */ -#define STAR 34 /* 0x22 Match this (simple) thing 0 or more times. */ -#define PLUS 35 /* 0x23 Match this (simple) thing 1 or more times. */ -#define CURLY 36 /* 0x24 Match this simple thing {n,m} times. */ -#define CURLYN 37 /* 0x25 Match next-after-this simple thing */ -#define CURLYM 38 /* 0x26 Match this medium-complex thing {n,m} times. */ -#define CURLYX 39 /* 0x27 Match this complex thing {n,m} times. */ -#define WHILEM 40 /* 0x28 Do curly processing and see if rest matches. */ -#define OPEN 41 /* 0x29 Mark this point in input as start of #n. */ -#define CLOSE 42 /* 0x2a Analogous to OPEN. */ -#define REF 43 /* 0x2b Match some already matched string */ -#define REFF 44 /* 0x2c Match already matched string, folded */ -#define REFFL 45 /* 0x2d Match already matched string, folded in loc. */ -#define IFMATCH 46 /* 0x2e Succeeds if the following matches. */ -#define UNLESSM 47 /* 0x2f Fails if the following matches. */ -#define SUSPEND 48 /* 0x30 "Independent" sub-RE. */ -#define IFTHEN 49 /* 0x31 Switch, should be preceeded by switcher . */ -#define GROUPP 50 /* 0x32 Whether the group matched. */ -#define LONGJMP 51 /* 0x33 Jump far away. */ -#define BRANCHJ 52 /* 0x34 BRANCH with long offset. */ -#define EVAL 53 /* 0x35 Execute some Perl code. */ -#define MINMOD 54 /* 0x36 Next operator is not greedy. */ -#define LOGICAL 55 /* 0x37 Next opcode should set the flag only. */ -#define RENUM 56 /* 0x38 Group with independently numbered parens. */ -#define OPTIMIZED 57 /* 0x39 Placeholder for dump. */ +#define BOUNDUTF8 10 /* 0xa Match "" at any word boundary */ +#define BOUNDL 11 /* 0xb Match "" at any word boundary */ +#define BOUNDLUTF8 12 /* 0xc Match "" at any word boundary */ +#define NBOUND 13 /* 0xd Match "" at any word non-boundary */ +#define NBOUNDUTF8 14 /* 0xe Match "" at any word non-boundary */ +#define NBOUNDL 15 /* 0xf Match "" at any word non-boundary */ +#define NBOUNDLUTF8 16 /* 0x10 Match "" at any word non-boundary */ +#define GPOS 17 /* 0x11 Matches where last m//g left off. */ +#define ANY 18 /* 0x12 Match any one character (except newline). */ +#define ANYUTF8 19 /* 0x13 Match any one Unicode character (except newline). */ +#define SANY 20 /* 0x14 Match any one character. */ +#define SANYUTF8 21 /* 0x15 Match any one Unicode character. */ +#define ANYOF 22 /* 0x16 Match character in (or not in) this class. */ +#define ANYOFUTF8 23 /* 0x17 Match character in (or not in) this class. */ +#define ALNUM 24 /* 0x18 Match any alphanumeric character */ +#define ALNUMUTF8 25 /* 0x19 Match any alphanumeric character */ +#define ALNUML 26 /* 0x1a Match any alphanumeric char in locale */ +#define ALNUMLUTF8 27 /* 0x1b Match any alphanumeric char in locale */ +#define NALNUM 28 /* 0x1c Match any non-alphanumeric character */ +#define NALNUMUTF8 29 /* 0x1d Match any non-alphanumeric character */ +#define NALNUML 30 /* 0x1e Match any non-alphanumeric char in locale */ +#define NALNUMLUTF8 31 /* 0x1f Match any non-alphanumeric char in locale */ +#define SPACE 32 /* 0x20 Match any whitespace character */ +#define SPACEUTF8 33 /* 0x21 Match any whitespace character */ +#define SPACEL 34 /* 0x22 Match any whitespace char in locale */ +#define SPACELUTF8 35 /* 0x23 Match any whitespace char in locale */ +#define NSPACE 36 /* 0x24 Match any non-whitespace character */ +#define NSPACEUTF8 37 /* 0x25 Match any non-whitespace character */ +#define NSPACEL 38 /* 0x26 Match any non-whitespace char in locale */ +#define NSPACELUTF8 39 /* 0x27 Match any non-whitespace char in locale */ +#define DIGIT 40 /* 0x28 Match any numeric character */ +#define DIGITUTF8 41 /* 0x29 Match any numeric character */ +#define NDIGIT 42 /* 0x2a Match any non-numeric character */ +#define NDIGITUTF8 43 /* 0x2b Match any non-numeric character */ +#define CLUMP 44 /* 0x2c Match any combining character sequence */ +#define BRANCH 45 /* 0x2d Match this alternative, or the next... */ +#define BACK 46 /* 0x2e Match "", "next" ptr points backward. */ +#define EXACT 47 /* 0x2f Match this string (preceded by length). */ +#define EXACTF 48 /* 0x30 Match this string, folded (prec. by length). */ +#define EXACTFL 49 /* 0x31 Match this string, folded in locale (w/len). */ +#define NOTHING 50 /* 0x32 Match empty string. */ +#define TAIL 51 /* 0x33 Match empty string. Can jump here from outside. */ +#define STAR 52 /* 0x34 Match this (simple) thing 0 or more times. */ +#define PLUS 53 /* 0x35 Match this (simple) thing 1 or more times. */ +#define CURLY 54 /* 0x36 Match this simple thing {n,m} times. */ +#define CURLYN 55 /* 0x37 Match next-after-this simple thing */ +#define CURLYM 56 /* 0x38 Match this medium-complex thing {n,m} times. */ +#define CURLYX 57 /* 0x39 Match this complex thing {n,m} times. */ +#define WHILEM 58 /* 0x3a Do curly processing and see if rest matches. */ +#define OPEN 59 /* 0x3b Mark this point in input as start of #n. */ +#define CLOSE 60 /* 0x3c Analogous to OPEN. */ +#define REF 61 /* 0x3d Match some already matched string */ +#define REFF 62 /* 0x3e Match already matched string, folded */ +#define REFFL 63 /* 0x3f Match already matched string, folded in loc. */ +#define IFMATCH 64 /* 0x40 Succeeds if the following matches. */ +#define UNLESSM 65 /* 0x41 Fails if the following matches. */ +#define SUSPEND 66 /* 0x42 "Independent" sub-RE. */ +#define IFTHEN 67 /* 0x43 Switch, should be preceeded by switcher . */ +#define GROUPP 68 /* 0x44 Whether the group matched. */ +#define LONGJMP 69 /* 0x45 Jump far away. */ +#define BRANCHJ 70 /* 0x46 BRANCH with long offset. */ +#define EVAL 71 /* 0x47 Execute some Perl code. */ +#define MINMOD 72 /* 0x48 Next operator is not greedy. */ +#define LOGICAL 73 /* 0x49 Next opcode should set the flag only. */ +#define RENUM 74 /* 0x4a Group with independently numbered parens. */ +#define OPTIMIZED 75 /* 0x4b Placeholder for dump. */ #ifndef DOINIT EXTCONST U8 regkind[]; @@ -76,23 +94,41 @@ EXTCONST U8 regkind[] = { EOL, /* MEOL */ EOL, /* SEOL */ BOUND, /* BOUND */ + BOUND, /* BOUNDUTF8 */ BOUND, /* BOUNDL */ + BOUND, /* BOUNDLUTF8 */ NBOUND, /* NBOUND */ + NBOUND, /* NBOUNDUTF8 */ NBOUND, /* NBOUNDL */ + NBOUND, /* NBOUNDLUTF8 */ GPOS, /* GPOS */ ANY, /* ANY */ + ANY, /* ANYUTF8 */ ANY, /* SANY */ + ANY, /* SANYUTF8 */ ANYOF, /* ANYOF */ + ANYOF, /* ANYOFUTF8 */ ALNUM, /* ALNUM */ + ALNUM, /* ALNUMUTF8 */ ALNUM, /* ALNUML */ + ALNUM, /* ALNUMLUTF8 */ NALNUM, /* NALNUM */ + NALNUM, /* NALNUMUTF8 */ NALNUM, /* NALNUML */ + NALNUM, /* NALNUMLUTF8 */ SPACE, /* SPACE */ + SPACE, /* SPACEUTF8 */ SPACE, /* SPACEL */ + SPACE, /* SPACELUTF8 */ NSPACE, /* NSPACE */ + NSPACE, /* NSPACEUTF8 */ NSPACE, /* NSPACEL */ + NSPACE, /* NSPACELUTF8 */ DIGIT, /* DIGIT */ + DIGIT, /* DIGITUTF8 */ NDIGIT, /* NDIGIT */ + NDIGIT, /* NDIGITUTF8 */ + CLUMP, /* CLUMP */ BRANCH, /* BRANCH */ BACK, /* BACK */ EXACT, /* EXACT */ @@ -140,23 +176,41 @@ const static U8 regarglen[] = { 0, /* MEOL */ 0, /* SEOL */ 0, /* BOUND */ + 0, /* BOUNDUTF8 */ 0, /* BOUNDL */ + 0, /* BOUNDLUTF8 */ 0, /* NBOUND */ + 0, /* NBOUNDUTF8 */ 0, /* NBOUNDL */ + 0, /* NBOUNDLUTF8 */ 0, /* GPOS */ 0, /* ANY */ + 0, /* ANYUTF8 */ 0, /* SANY */ + 0, /* SANYUTF8 */ 0, /* ANYOF */ + EXTRA_SIZE(struct regnode_1), /* ANYOFUTF8 */ 0, /* ALNUM */ + 0, /* ALNUMUTF8 */ 0, /* ALNUML */ + 0, /* ALNUMLUTF8 */ 0, /* NALNUM */ + 0, /* NALNUMUTF8 */ 0, /* NALNUML */ + 0, /* NALNUMLUTF8 */ 0, /* SPACE */ + 0, /* SPACEUTF8 */ 0, /* SPACEL */ + 0, /* SPACELUTF8 */ 0, /* NSPACE */ + 0, /* NSPACEUTF8 */ 0, /* NSPACEL */ + 0, /* NSPACELUTF8 */ 0, /* DIGIT */ + 0, /* DIGITUTF8 */ 0, /* NDIGIT */ + 0, /* NDIGITUTF8 */ + 0, /* CLUMP */ 0, /* BRANCH */ 0, /* BACK */ 0, /* EXACT */ @@ -201,23 +255,41 @@ const static char reg_off_by_arg[] = { 0, /* MEOL */ 0, /* SEOL */ 0, /* BOUND */ + 0, /* BOUNDUTF8 */ 0, /* BOUNDL */ + 0, /* BOUNDLUTF8 */ 0, /* NBOUND */ + 0, /* NBOUNDUTF8 */ 0, /* NBOUNDL */ + 0, /* NBOUNDLUTF8 */ 0, /* GPOS */ 0, /* ANY */ + 0, /* ANYUTF8 */ 0, /* SANY */ + 0, /* SANYUTF8 */ 0, /* ANYOF */ + 0, /* ANYOFUTF8 */ 0, /* ALNUM */ + 0, /* ALNUMUTF8 */ 0, /* ALNUML */ + 0, /* ALNUMLUTF8 */ 0, /* NALNUM */ + 0, /* NALNUMUTF8 */ 0, /* NALNUML */ + 0, /* NALNUMLUTF8 */ 0, /* SPACE */ + 0, /* SPACEUTF8 */ 0, /* SPACEL */ + 0, /* SPACELUTF8 */ 0, /* NSPACE */ + 0, /* NSPACEUTF8 */ 0, /* NSPACEL */ + 0, /* NSPACELUTF8 */ 0, /* DIGIT */ + 0, /* DIGITUTF8 */ 0, /* NDIGIT */ + 0, /* NDIGITUTF8 */ + 0, /* CLUMP */ 0, /* BRANCH */ 0, /* BACK */ 0, /* EXACT */ |