EBCDIC: make t/op/pat #242 and 243 finally succeed.

p4raw-id: //depot/perl@16556
author: Jarkko Hietaniemi <jhi@iki.fi> 2002-05-12 15:29:36 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2002-05-12 15:29:36 +0000
commit: 1b2d223bc80114ab97e5af7bf5270927ae350ba4 (patch)
tree: b0495c5afa84a084c55f59a788dec505857bb867
parent: da32f63e54ca956472e919d15f2271b9494ad04e (diff)
download: perl-1b2d223bc80114ab97e5af7bf5270927ae350ba4.tar.gz
2 files changed, 20 insertions, 3 deletions
diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod
index a9f1d0fc3f..44ad6b9f0d 100644
--- a/pod/perlebcdic.pod
+++ b/pod/perlebcdic.pod
@@ -837,7 +837,11 @@ As of perl 5.005_03 the letter range regular expression such as
 [A-Z] and [a-z] have been especially coded to not pick up gap 
 characters.  For example, characters such as E<ocirc> C<o WITH CIRCUMFLEX> 
 that lie between I and J would not be matched by the 
-regular expression range C</[H-K]/>.  
+regular expression range C</[H-K]/>.  This works in
+the other direction, too, if either of the range end points is
+explicitly numeric: C<[\x89-\x91]> will match C<\x8e>, even
+though C<\x89> is C<i> and C<\x91 > is C<j>, and C<\x8e>
+is a gap character from the alphabetic viewpoint.
 
 If you do want to match the alphabet gap characters in a single octet 
 regular expression try matching the hex or octal code such 
diff --git a/regcomp.c b/regcomp.c
index a435be9c6a..6b17be12af 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3523,6 +3523,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     UV n;
     bool optimize_invert   = TRUE;
     AV* unicode_alternate  = 0;
+#ifdef EBCDIC
+    UV literal_endpoint = 0;
+#endif
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
@@ -3685,6 +3688,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 		break;
 	    }
 	} /* end of \blah */
+#ifdef EBCDIC
+	else
+	    literal_endpoint++;
+#endif
 
 	if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
 
@@ -4087,8 +4094,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	        IV ceilvalue = value < 256 ? value : 255;
 
 #ifdef EBCDIC
-		if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
-		    (isUPPER(prevvalue) && isUPPER(ceilvalue)))
+		/* In EBCDIC [\x89-\x91] should include
+		 * the \x8e but [i-j] should not. */
+		if (literal_endpoint == 2 &&
+		    ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+		     (isUPPER(prevvalue) && isUPPER(ceilvalue))))
 		{
 		    if (isLOWER(prevvalue)) {
 			for (i = prevvalue; i <= ceilvalue; i++)
@@ -4168,6 +4178,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 		    }
 		}
 	    }
+#ifdef EBCDIC
+	    literal_endpoint = 0;
+#endif
         }
 
 	range = 0; /* this range (if it was one) is done now */
author	Jarkko Hietaniemi <jhi@iki.fi>	2002-05-12 15:29:36 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2002-05-12 15:29:36 +0000
commit	1b2d223bc80114ab97e5af7bf5270927ae350ba4 (patch)
tree	b0495c5afa84a084c55f59a788dec505857bb867
parent	da32f63e54ca956472e919d15f2271b9494ad04e (diff)
download	perl-1b2d223bc80114ab97e5af7bf5270927ae350ba4.tar.gz