applied patch, tweaked doc, and regen regnodes.h

Message-Id: <199806270655.CAA29144@monk.mps.ohio-state.edu> Subject: [PATCH 5.004_68] \z in RE p4raw-id: //depot/perl@1250
author: Ilya Zakharevich <ilya@math.berkeley.edu> 1998-06-26 22:55:26 -0400
committer: Gurusamy Sarathy <gsar@cpan.org> 1998-06-28 21:09:48 +0000
commit: b85d18e97b6ae9e0cc168f99b999fd3fd33104bd (patch)
tree: fba77924a69fae7095030b314eb5e20f98b74006
parent: b174585de5ccc9973ba572393b2b34e1a6a5b749 (diff)
download: perl-b85d18e97b6ae9e0cc168f99b999fd3fd33104bd.tar.gz
8 files changed, 85 insertions, 58 deletions
diff --git a/pod/perlre.pod b/pod/perlre.pod
index ebd58582d0..30608ced75 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -177,8 +177,9 @@ Perl defines the following zero-width assertions:
 
     \b	Match a word boundary
     \B	Match a non-(word boundary)
-    \A	Match at only beginning of string
-    \Z	Match at only end of string (or before newline at the end)
+    \A	Match only at beginning of string
+    \Z	Match only at end of string, or before newline at the end
+    \z	Match only at end of string
     \G	Match only where previous m//g left off (works only with /g)
 
 A word boundary (C<\b>) is defined as a spot between two characters that
@@ -189,7 +190,7 @@ represents backspace rather than a word boundary.)  The C<\A> and C<\Z> are
 just like "^" and "$", except that they won't match multiple times when the
 C</m> modifier is used, while "^" and "$" will match at every internal line
 boundary.  To match the actual end of the string, not ignoring newline,
-you can use C<\Z(?!\n)>.  The C<\G> assertion can be used to chain global
+you can use C<\z>.  The C<\G> assertion can be used to chain global
 matches (using C<m//g>), as described in
 L<perlop/"Regexp Quote-Like Operators">.
 
diff --git a/regcomp.c b/regcomp.c
index 2b71d99a40..5475d7897f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1539,6 +1539,12 @@ tryagain:
 	    *flagp |= SIMPLE;
 	    nextchar();
 	    break;
+	case 'z':
+	    ret = reg_node(EOS);
+	    *flagp |= SIMPLE;
+	    seen_zerolen++;		/* Do not optimize RE away */
+	    nextchar();
+	    break;
 	case 'w':
 	    ret = reg_node((regflags & PMf_LOCALE) ? ALNUML : ALNUM);
 	    *flagp |= HASWIDTH|SIMPLE;
@@ -1665,6 +1671,7 @@ tryagain:
 		    case 'A':
 		    case 'G':
 		    case 'Z':
+		    case 'z':
 		    case 'w':
 		    case 'W':
 		    case 'b':
@@ -2329,6 +2336,9 @@ regprop(SV *sv, regnode *o)
     case EOL:
 	p = "EOL";
 	break;
+    case EOS:
+	p = "EOS";
+	break;
     case MEOL:
 	p = "MEOL";
 	break;
diff --git a/regcomp.sym b/regcomp.sym
index aa18d119cc..9775b9374d 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -11,6 +11,7 @@ SUCCEED		END,    no	Return from a subroutine, basically.
 BOL		BOL,    no	Match "" at beginning of line.
 MBOL		BOL,    no	Same, assuming multiline.
 SBOL		BOL,    no	Same, assuming singleline.
+EOS		EOL,    no	Match "" at end of string.
 EOL		EOL,    no	Match "" at end of line.
 MEOL		EOL,    no	Same, assuming multiline.
 SEOL		EOL,    no	Same, assuming singleline.
diff --git a/regexec.c b/regexec.c
index dd51bc1e22..107d68bcae 100644
--- a/regexec.c
+++ b/regexec.c
@@ -824,6 +824,10 @@ regmatch(regnode *prog)
 	    if (regeol - locinput > 1)
 		sayNO;
 	    break;
+	case EOS:
+	    if (regeol != locinput)
+		sayNO;
+	    break;
 	case SANY:
 	    if (!nextchr && locinput >= regeol)
 		sayNO;
diff --git a/regnodes.h b/regnodes.h
index 445d0b2803..c494daed12 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -8,58 +8,59 @@
 #define	BOL	2	/*  0x2 Match "" at beginning of line. */
 #define	MBOL	3	/*  0x3 Same, assuming multiline. */
 #define	SBOL	4	/*  0x4 Same, assuming singleline. */
-#define	EOL	5	/*  0x5 Match "" at end of line. */
-#define	MEOL	6	/*  0x6 Same, assuming multiline. */
-#define	SEOL	7	/*  0x7 Same, assuming singleline. */
-#define	BOUND	8	/*  0x8 Match "" at any word boundary */
-#define	BOUNDL	9	/*  0x9 Match "" at any word boundary */
-#define	NBOUND	10	/*  0xa Match "" at any word non-boundary */
-#define	NBOUNDL	11	/*  0xb Match "" at any word non-boundary */
-#define	GPOS	12	/*  0xc Matches where last m//g left off. */
-#define	ANY	13	/*  0xd Match any one character (except newline). */
-#define	SANY	14	/*  0xe Match any one character. */
-#define	ANYOF	15	/*  0xf Match character in (or not in) this class. */
-#define	ALNUM	16	/* 0x10 Match any alphanumeric character */
-#define	ALNUML	17	/* 0x11 Match any alphanumeric char in locale */
-#define	NALNUM	18	/* 0x12 Match any non-alphanumeric character */
-#define	NALNUML	19	/* 0x13 Match any non-alphanumeric char in locale */
-#define	SPACE	20	/* 0x14 Match any whitespace character */
-#define	SPACEL	21	/* 0x15 Match any whitespace char in locale */
-#define	NSPACE	22	/* 0x16 Match any non-whitespace character */
-#define	NSPACEL	23	/* 0x17 Match any non-whitespace char in locale */
-#define	DIGIT	24	/* 0x18 Match any numeric character */
-#define	NDIGIT	25	/* 0x19 Match any non-numeric character */
-#define	BRANCH	26	/* 0x1a Match this alternative, or the next... */
-#define	BACK	27	/* 0x1b Match "", "next" ptr points backward. */
-#define	EXACT	28	/* 0x1c Match this string (preceded by length). */
-#define	EXACTF	29	/* 0x1d Match this string, folded (prec. by length). */
-#define	EXACTFL	30	/* 0x1e Match this string, folded in locale (w/len). */
-#define	NOTHING	31	/* 0x1f Match empty string. */
-#define	TAIL	32	/* 0x20 Match empty string. Can jump here from outside. */
-#define	STAR	33	/* 0x21 Match this (simple) thing 0 or more times. */
-#define	PLUS	34	/* 0x22 Match this (simple) thing 1 or more times. */
-#define	CURLY	35	/* 0x23 Match this simple thing {n,m} times. */
-#define	CURLYN	36	/* 0x24 Match next-after-this simple thing  */
-#define	CURLYM	37	/* 0x25 Match this medium-complex thing {n,m} times. */
-#define	CURLYX	38	/* 0x26 Match this complex thing {n,m} times. */
-#define	WHILEM	39	/* 0x27 Do curly processing and see if rest matches. */
-#define	OPEN	40	/* 0x28 Mark this point in input as start of #n. */
-#define	CLOSE	41	/* 0x29 Analogous to OPEN. */
-#define	REF	42	/* 0x2a Match some already matched string */
-#define	REFF	43	/* 0x2b Match already matched string, folded */
-#define	REFFL	44	/* 0x2c Match already matched string, folded in loc. */
-#define	IFMATCH	45	/* 0x2d Succeeds if the following matches. */
-#define	UNLESSM	46	/* 0x2e Fails if the following matches. */
-#define	SUSPEND	47	/* 0x2f "Independent" sub-RE. */
-#define	IFTHEN	48	/* 0x30 Switch, should be preceeded by switcher . */
-#define	GROUPP	49	/* 0x31 Whether the group matched. */
-#define	LONGJMP	50	/* 0x32 Jump far away. */
-#define	BRANCHJ	51	/* 0x33 BRANCH with long offset. */
-#define	EVAL	52	/* 0x34 Execute some Perl code. */
-#define	MINMOD	53	/* 0x35 Next operator is not greedy. */
-#define	LOGICAL	54	/* 0x36 Next opcode should set the flag only. */
-#define	RENUM	55	/* 0x37 Group with independently numbered parens. */
-#define	OPTIMIZED	56	/* 0x38 Placeholder for dump. */
+#define	EOS	5	/*  0x5 Match "" at end of string. */
+#define	EOL	6	/*  0x6 Match "" at end of line. */
+#define	MEOL	7	/*  0x7 Same, assuming multiline. */
+#define	SEOL	8	/*  0x8 Same, assuming singleline. */
+#define	BOUND	9	/*  0x9 Match "" at any word boundary */
+#define	BOUNDL	10	/*  0xa Match "" at any word boundary */
+#define	NBOUND	11	/*  0xb Match "" at any word non-boundary */
+#define	NBOUNDL	12	/*  0xc Match "" at any word non-boundary */
+#define	GPOS	13	/*  0xd Matches where last m//g left off. */
+#define	ANY	14	/*  0xe Match any one character (except newline). */
+#define	SANY	15	/*  0xf Match any one character. */
+#define	ANYOF	16	/* 0x10 Match character in (or not in) this class. */
+#define	ALNUM	17	/* 0x11 Match any alphanumeric character */
+#define	ALNUML	18	/* 0x12 Match any alphanumeric char in locale */
+#define	NALNUM	19	/* 0x13 Match any non-alphanumeric character */
+#define	NALNUML	20	/* 0x14 Match any non-alphanumeric char in locale */
+#define	SPACE	21	/* 0x15 Match any whitespace character */
+#define	SPACEL	22	/* 0x16 Match any whitespace char in locale */
+#define	NSPACE	23	/* 0x17 Match any non-whitespace character */
+#define	NSPACEL	24	/* 0x18 Match any non-whitespace char in locale */
+#define	DIGIT	25	/* 0x19 Match any numeric character */
+#define	NDIGIT	26	/* 0x1a Match any non-numeric character */
+#define	BRANCH	27	/* 0x1b Match this alternative, or the next... */
+#define	BACK	28	/* 0x1c Match "", "next" ptr points backward. */
+#define	EXACT	29	/* 0x1d Match this string (preceded by length). */
+#define	EXACTF	30	/* 0x1e Match this string, folded (prec. by length). */
+#define	EXACTFL	31	/* 0x1f Match this string, folded in locale (w/len). */
+#define	NOTHING	32	/* 0x20 Match empty string. */
+#define	TAIL	33	/* 0x21 Match empty string. Can jump here from outside. */
+#define	STAR	34	/* 0x22 Match this (simple) thing 0 or more times. */
+#define	PLUS	35	/* 0x23 Match this (simple) thing 1 or more times. */
+#define	CURLY	36	/* 0x24 Match this simple thing {n,m} times. */
+#define	CURLYN	37	/* 0x25 Match next-after-this simple thing  */
+#define	CURLYM	38	/* 0x26 Match this medium-complex thing {n,m} times. */
+#define	CURLYX	39	/* 0x27 Match this complex thing {n,m} times. */
+#define	WHILEM	40	/* 0x28 Do curly processing and see if rest matches. */
+#define	OPEN	41	/* 0x29 Mark this point in input as start of #n. */
+#define	CLOSE	42	/* 0x2a Analogous to OPEN. */
+#define	REF	43	/* 0x2b Match some already matched string */
+#define	REFF	44	/* 0x2c Match already matched string, folded */
+#define	REFFL	45	/* 0x2d Match already matched string, folded in loc. */
+#define	IFMATCH	46	/* 0x2e Succeeds if the following matches. */
+#define	UNLESSM	47	/* 0x2f Fails if the following matches. */
+#define	SUSPEND	48	/* 0x30 "Independent" sub-RE. */
+#define	IFTHEN	49	/* 0x31 Switch, should be preceeded by switcher . */
+#define	GROUPP	50	/* 0x32 Whether the group matched. */
+#define	LONGJMP	51	/* 0x33 Jump far away. */
+#define	BRANCHJ	52	/* 0x34 BRANCH with long offset. */
+#define	EVAL	53	/* 0x35 Execute some Perl code. */
+#define	MINMOD	54	/* 0x36 Next operator is not greedy. */
+#define	LOGICAL	55	/* 0x37 Next opcode should set the flag only. */
+#define	RENUM	56	/* 0x38 Group with independently numbered parens. */
+#define	OPTIMIZED	57	/* 0x39 Placeholder for dump. */
 
 #ifndef DOINIT
 EXTCONST U8 regkind[];
@@ -70,6 +71,7 @@ EXTCONST U8 regkind[] = {
 	BOL,		/* BOL */
 	BOL,		/* MBOL */
 	BOL,		/* SBOL */
+	EOL,		/* EOS */
 	EOL,		/* EOL */
 	EOL,		/* MEOL */
 	EOL,		/* SEOL */
@@ -133,6 +135,7 @@ const static U8 regarglen[] = {
 	0,		/* BOL */
 	0,		/* MBOL */
 	0,		/* SBOL */
+	0,		/* EOS */
 	0,		/* EOL */
 	0,		/* MEOL */
 	0,		/* SEOL */
@@ -193,6 +196,7 @@ const static char reg_off_by_arg[] = {
 	0,		/* BOL */
 	0,		/* MBOL */
 	0,		/* SBOL */
+	0,		/* EOS */
 	0,		/* EOL */
 	0,		/* MEOL */
 	0,		/* SEOL */
diff --git a/t/op/re_tests b/t/op/re_tests
index b506306dbb..dd54a2a5b5 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -439,3 +439,8 @@ $(?<=^(a))	a	y	$1	a
 ((?>[^()]+)|\([^()]*\))+	((abc(ade)ufh()()x	y	$&	abc(ade)ufh()()x
 (?<=x+)y	-	c	-	/(?<=x+)y/: variable length lookbehind not implemented
 a{37,17}	-	c	-	/a{37,17}/: Can't do {n,m} with n > m
+a\Z	a\nb\n	n	-	-
+b\Z	a\nb\n	y	-	-
+b\z	a\nb\n	n	-	-
+b\Z	a\nb	y	-	-
+b\z	a\nb	y	-	-
diff --git a/t/op/regexp.t b/t/op/regexp.t
index e3eb336c3b..7e43526f63 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -21,7 +21,7 @@
 # Column 5 contains the expected result of double-quote
 # interpolating that string after the match, or start of error message.
 #
-# Columns 1, 2 and 5 are \n-interpolated.
+# \n in the tests are interpolated.
 #
 # If you want to add a regular expression test that can't be expressed
 # in this format, don't add it here: put it in op/pat.t instead.
@@ -40,7 +40,9 @@ $| = 1;
 print "1..$numtests\n# $iters iterations\n";
 TEST:
 while (<TESTS>) {
-    ($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_);
+    chomp;
+    s/\\n/\n/g;
+    ($pat, $subject, $result, $repl, $expect) = split(/\t/,$_);
     $input = join(':',$pat,$subject,$result,$repl,$expect);
     infty_subst(\$pat);
     infty_subst(\$expect);
diff --git a/toke.c b/toke.c
index 4aa96d3821..6738dc189d 100644
--- a/toke.c
+++ b/toke.c
@@ -832,7 +832,7 @@ scan_const(char *start)
     /* leaveit is the set of acceptably-backslashed characters */
     char *leaveit =
 	lex_inpat
-	    ? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxc0123456789[{]} \t\n\r\f\v#"
+	    ? "\\.^$@AGZdDwWsSbB+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
 	    : "";
 
     while (s < send || dorange) {
author	Ilya Zakharevich <ilya@math.berkeley.edu>	1998-06-26 22:55:26 -0400
committer	Gurusamy Sarathy <gsar@cpan.org>	1998-06-28 21:09:48 +0000
commit	b85d18e97b6ae9e0cc168f99b999fd3fd33104bd (patch)
tree	fba77924a69fae7095030b314eb5e20f98b74006
parent	b174585de5ccc9973ba572393b2b34e1a6a5b749 (diff)
download	perl-b85d18e97b6ae9e0cc168f99b999fd3fd33104bd.tar.gz