diff options
author | Ilya Zakharevich <ilya@math.berkeley.edu> | 1998-06-26 22:55:26 -0400 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 1998-06-28 21:09:48 +0000 |
commit | b85d18e97b6ae9e0cc168f99b999fd3fd33104bd (patch) | |
tree | fba77924a69fae7095030b314eb5e20f98b74006 | |
parent | b174585de5ccc9973ba572393b2b34e1a6a5b749 (diff) | |
download | perl-b85d18e97b6ae9e0cc168f99b999fd3fd33104bd.tar.gz |
applied patch, tweaked doc, and regen regnodes.h
Message-Id: <199806270655.CAA29144@monk.mps.ohio-state.edu>
Subject: [PATCH 5.004_68] \z in RE
p4raw-id: //depot/perl@1250
-rw-r--r-- | pod/perlre.pod | 7 | ||||
-rw-r--r-- | regcomp.c | 10 | ||||
-rw-r--r-- | regcomp.sym | 1 | ||||
-rw-r--r-- | regexec.c | 4 | ||||
-rw-r--r-- | regnodes.h | 108 | ||||
-rw-r--r-- | t/op/re_tests | 5 | ||||
-rwxr-xr-x | t/op/regexp.t | 6 | ||||
-rw-r--r-- | toke.c | 2 |
8 files changed, 85 insertions, 58 deletions
diff --git a/pod/perlre.pod b/pod/perlre.pod index ebd58582d0..30608ced75 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -177,8 +177,9 @@ Perl defines the following zero-width assertions: \b Match a word boundary \B Match a non-(word boundary) - \A Match at only beginning of string - \Z Match at only end of string (or before newline at the end) + \A Match only at beginning of string + \Z Match only at end of string, or before newline at the end + \z Match only at end of string \G Match only where previous m//g left off (works only with /g) A word boundary (C<\b>) is defined as a spot between two characters that @@ -189,7 +190,7 @@ represents backspace rather than a word boundary.) The C<\A> and C<\Z> are just like "^" and "$", except that they won't match multiple times when the C</m> modifier is used, while "^" and "$" will match at every internal line boundary. To match the actual end of the string, not ignoring newline, -you can use C<\Z(?!\n)>. The C<\G> assertion can be used to chain global +you can use C<\z>. The C<\G> assertion can be used to chain global matches (using C<m//g>), as described in L<perlop/"Regexp Quote-Like Operators">. @@ -1539,6 +1539,12 @@ tryagain: *flagp |= SIMPLE; nextchar(); break; + case 'z': + ret = reg_node(EOS); + *flagp |= SIMPLE; + seen_zerolen++; /* Do not optimize RE away */ + nextchar(); + break; case 'w': ret = reg_node((regflags & PMf_LOCALE) ? ALNUML : ALNUM); *flagp |= HASWIDTH|SIMPLE; @@ -1665,6 +1671,7 @@ tryagain: case 'A': case 'G': case 'Z': + case 'z': case 'w': case 'W': case 'b': @@ -2329,6 +2336,9 @@ regprop(SV *sv, regnode *o) case EOL: p = "EOL"; break; + case EOS: + p = "EOS"; + break; case MEOL: p = "MEOL"; break; diff --git a/regcomp.sym b/regcomp.sym index aa18d119cc..9775b9374d 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -11,6 +11,7 @@ SUCCEED END, no Return from a subroutine, basically. BOL BOL, no Match "" at beginning of line. MBOL BOL, no Same, assuming multiline. SBOL BOL, no Same, assuming singleline. +EOS EOL, no Match "" at end of string. EOL EOL, no Match "" at end of line. MEOL EOL, no Same, assuming multiline. SEOL EOL, no Same, assuming singleline. @@ -824,6 +824,10 @@ regmatch(regnode *prog) if (regeol - locinput > 1) sayNO; break; + case EOS: + if (regeol != locinput) + sayNO; + break; case SANY: if (!nextchr && locinput >= regeol) sayNO; diff --git a/regnodes.h b/regnodes.h index 445d0b2803..c494daed12 100644 --- a/regnodes.h +++ b/regnodes.h @@ -8,58 +8,59 @@ #define BOL 2 /* 0x2 Match "" at beginning of line. */ #define MBOL 3 /* 0x3 Same, assuming multiline. */ #define SBOL 4 /* 0x4 Same, assuming singleline. */ -#define EOL 5 /* 0x5 Match "" at end of line. */ -#define MEOL 6 /* 0x6 Same, assuming multiline. */ -#define SEOL 7 /* 0x7 Same, assuming singleline. */ -#define BOUND 8 /* 0x8 Match "" at any word boundary */ -#define BOUNDL 9 /* 0x9 Match "" at any word boundary */ -#define NBOUND 10 /* 0xa Match "" at any word non-boundary */ -#define NBOUNDL 11 /* 0xb Match "" at any word non-boundary */ -#define GPOS 12 /* 0xc Matches where last m//g left off. */ -#define ANY 13 /* 0xd Match any one character (except newline). */ -#define SANY 14 /* 0xe Match any one character. */ -#define ANYOF 15 /* 0xf Match character in (or not in) this class. */ -#define ALNUM 16 /* 0x10 Match any alphanumeric character */ -#define ALNUML 17 /* 0x11 Match any alphanumeric char in locale */ -#define NALNUM 18 /* 0x12 Match any non-alphanumeric character */ -#define NALNUML 19 /* 0x13 Match any non-alphanumeric char in locale */ -#define SPACE 20 /* 0x14 Match any whitespace character */ -#define SPACEL 21 /* 0x15 Match any whitespace char in locale */ -#define NSPACE 22 /* 0x16 Match any non-whitespace character */ -#define NSPACEL 23 /* 0x17 Match any non-whitespace char in locale */ -#define DIGIT 24 /* 0x18 Match any numeric character */ -#define NDIGIT 25 /* 0x19 Match any non-numeric character */ -#define BRANCH 26 /* 0x1a Match this alternative, or the next... */ -#define BACK 27 /* 0x1b Match "", "next" ptr points backward. */ -#define EXACT 28 /* 0x1c Match this string (preceded by length). */ -#define EXACTF 29 /* 0x1d Match this string, folded (prec. by length). */ -#define EXACTFL 30 /* 0x1e Match this string, folded in locale (w/len). */ -#define NOTHING 31 /* 0x1f Match empty string. */ -#define TAIL 32 /* 0x20 Match empty string. Can jump here from outside. */ -#define STAR 33 /* 0x21 Match this (simple) thing 0 or more times. */ -#define PLUS 34 /* 0x22 Match this (simple) thing 1 or more times. */ -#define CURLY 35 /* 0x23 Match this simple thing {n,m} times. */ -#define CURLYN 36 /* 0x24 Match next-after-this simple thing */ -#define CURLYM 37 /* 0x25 Match this medium-complex thing {n,m} times. */ -#define CURLYX 38 /* 0x26 Match this complex thing {n,m} times. */ -#define WHILEM 39 /* 0x27 Do curly processing and see if rest matches. */ -#define OPEN 40 /* 0x28 Mark this point in input as start of #n. */ -#define CLOSE 41 /* 0x29 Analogous to OPEN. */ -#define REF 42 /* 0x2a Match some already matched string */ -#define REFF 43 /* 0x2b Match already matched string, folded */ -#define REFFL 44 /* 0x2c Match already matched string, folded in loc. */ -#define IFMATCH 45 /* 0x2d Succeeds if the following matches. */ -#define UNLESSM 46 /* 0x2e Fails if the following matches. */ -#define SUSPEND 47 /* 0x2f "Independent" sub-RE. */ -#define IFTHEN 48 /* 0x30 Switch, should be preceeded by switcher . */ -#define GROUPP 49 /* 0x31 Whether the group matched. */ -#define LONGJMP 50 /* 0x32 Jump far away. */ -#define BRANCHJ 51 /* 0x33 BRANCH with long offset. */ -#define EVAL 52 /* 0x34 Execute some Perl code. */ -#define MINMOD 53 /* 0x35 Next operator is not greedy. */ -#define LOGICAL 54 /* 0x36 Next opcode should set the flag only. */ -#define RENUM 55 /* 0x37 Group with independently numbered parens. */ -#define OPTIMIZED 56 /* 0x38 Placeholder for dump. */ +#define EOS 5 /* 0x5 Match "" at end of string. */ +#define EOL 6 /* 0x6 Match "" at end of line. */ +#define MEOL 7 /* 0x7 Same, assuming multiline. */ +#define SEOL 8 /* 0x8 Same, assuming singleline. */ +#define BOUND 9 /* 0x9 Match "" at any word boundary */ +#define BOUNDL 10 /* 0xa Match "" at any word boundary */ +#define NBOUND 11 /* 0xb Match "" at any word non-boundary */ +#define NBOUNDL 12 /* 0xc Match "" at any word non-boundary */ +#define GPOS 13 /* 0xd Matches where last m//g left off. */ +#define ANY 14 /* 0xe Match any one character (except newline). */ +#define SANY 15 /* 0xf Match any one character. */ +#define ANYOF 16 /* 0x10 Match character in (or not in) this class. */ +#define ALNUM 17 /* 0x11 Match any alphanumeric character */ +#define ALNUML 18 /* 0x12 Match any alphanumeric char in locale */ +#define NALNUM 19 /* 0x13 Match any non-alphanumeric character */ +#define NALNUML 20 /* 0x14 Match any non-alphanumeric char in locale */ +#define SPACE 21 /* 0x15 Match any whitespace character */ +#define SPACEL 22 /* 0x16 Match any whitespace char in locale */ +#define NSPACE 23 /* 0x17 Match any non-whitespace character */ +#define NSPACEL 24 /* 0x18 Match any non-whitespace char in locale */ +#define DIGIT 25 /* 0x19 Match any numeric character */ +#define NDIGIT 26 /* 0x1a Match any non-numeric character */ +#define BRANCH 27 /* 0x1b Match this alternative, or the next... */ +#define BACK 28 /* 0x1c Match "", "next" ptr points backward. */ +#define EXACT 29 /* 0x1d Match this string (preceded by length). */ +#define EXACTF 30 /* 0x1e Match this string, folded (prec. by length). */ +#define EXACTFL 31 /* 0x1f Match this string, folded in locale (w/len). */ +#define NOTHING 32 /* 0x20 Match empty string. */ +#define TAIL 33 /* 0x21 Match empty string. Can jump here from outside. */ +#define STAR 34 /* 0x22 Match this (simple) thing 0 or more times. */ +#define PLUS 35 /* 0x23 Match this (simple) thing 1 or more times. */ +#define CURLY 36 /* 0x24 Match this simple thing {n,m} times. */ +#define CURLYN 37 /* 0x25 Match next-after-this simple thing */ +#define CURLYM 38 /* 0x26 Match this medium-complex thing {n,m} times. */ +#define CURLYX 39 /* 0x27 Match this complex thing {n,m} times. */ +#define WHILEM 40 /* 0x28 Do curly processing and see if rest matches. */ +#define OPEN 41 /* 0x29 Mark this point in input as start of #n. */ +#define CLOSE 42 /* 0x2a Analogous to OPEN. */ +#define REF 43 /* 0x2b Match some already matched string */ +#define REFF 44 /* 0x2c Match already matched string, folded */ +#define REFFL 45 /* 0x2d Match already matched string, folded in loc. */ +#define IFMATCH 46 /* 0x2e Succeeds if the following matches. */ +#define UNLESSM 47 /* 0x2f Fails if the following matches. */ +#define SUSPEND 48 /* 0x30 "Independent" sub-RE. */ +#define IFTHEN 49 /* 0x31 Switch, should be preceeded by switcher . */ +#define GROUPP 50 /* 0x32 Whether the group matched. */ +#define LONGJMP 51 /* 0x33 Jump far away. */ +#define BRANCHJ 52 /* 0x34 BRANCH with long offset. */ +#define EVAL 53 /* 0x35 Execute some Perl code. */ +#define MINMOD 54 /* 0x36 Next operator is not greedy. */ +#define LOGICAL 55 /* 0x37 Next opcode should set the flag only. */ +#define RENUM 56 /* 0x38 Group with independently numbered parens. */ +#define OPTIMIZED 57 /* 0x39 Placeholder for dump. */ #ifndef DOINIT EXTCONST U8 regkind[]; @@ -70,6 +71,7 @@ EXTCONST U8 regkind[] = { BOL, /* BOL */ BOL, /* MBOL */ BOL, /* SBOL */ + EOL, /* EOS */ EOL, /* EOL */ EOL, /* MEOL */ EOL, /* SEOL */ @@ -133,6 +135,7 @@ const static U8 regarglen[] = { 0, /* BOL */ 0, /* MBOL */ 0, /* SBOL */ + 0, /* EOS */ 0, /* EOL */ 0, /* MEOL */ 0, /* SEOL */ @@ -193,6 +196,7 @@ const static char reg_off_by_arg[] = { 0, /* BOL */ 0, /* MBOL */ 0, /* SBOL */ + 0, /* EOS */ 0, /* EOL */ 0, /* MEOL */ 0, /* SEOL */ diff --git a/t/op/re_tests b/t/op/re_tests index b506306dbb..dd54a2a5b5 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -439,3 +439,8 @@ $(?<=^(a)) a y $1 a ((?>[^()]+)|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x (?<=x+)y - c - /(?<=x+)y/: variable length lookbehind not implemented a{37,17} - c - /a{37,17}/: Can't do {n,m} with n > m +a\Z a\nb\n n - - +b\Z a\nb\n y - - +b\z a\nb\n n - - +b\Z a\nb y - - +b\z a\nb y - - diff --git a/t/op/regexp.t b/t/op/regexp.t index e3eb336c3b..7e43526f63 100755 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -21,7 +21,7 @@ # Column 5 contains the expected result of double-quote # interpolating that string after the match, or start of error message. # -# Columns 1, 2 and 5 are \n-interpolated. +# \n in the tests are interpolated. # # If you want to add a regular expression test that can't be expressed # in this format, don't add it here: put it in op/pat.t instead. @@ -40,7 +40,9 @@ $| = 1; print "1..$numtests\n# $iters iterations\n"; TEST: while (<TESTS>) { - ($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_); + chomp; + s/\\n/\n/g; + ($pat, $subject, $result, $repl, $expect) = split(/\t/,$_); $input = join(':',$pat,$subject,$result,$repl,$expect); infty_subst(\$pat); infty_subst(\$expect); @@ -832,7 +832,7 @@ scan_const(char *start) /* leaveit is the set of acceptably-backslashed characters */ char *leaveit = lex_inpat - ? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxc0123456789[{]} \t\n\r\f\v#" + ? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#" : ""; while (s < send || dorange) { |