summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-05-12 15:29:36 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-05-12 15:29:36 +0000
commit1b2d223bc80114ab97e5af7bf5270927ae350ba4 (patch)
treeb0495c5afa84a084c55f59a788dec505857bb867
parentda32f63e54ca956472e919d15f2271b9494ad04e (diff)
downloadperl-1b2d223bc80114ab97e5af7bf5270927ae350ba4.tar.gz
EBCDIC: make t/op/pat #242 and 243 finally succeed.
p4raw-id: //depot/perl@16556
-rw-r--r--pod/perlebcdic.pod6
-rw-r--r--regcomp.c17
2 files changed, 20 insertions, 3 deletions
diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod
index a9f1d0fc3f..44ad6b9f0d 100644
--- a/pod/perlebcdic.pod
+++ b/pod/perlebcdic.pod
@@ -837,7 +837,11 @@ As of perl 5.005_03 the letter range regular expression such as
[A-Z] and [a-z] have been especially coded to not pick up gap
characters. For example, characters such as E<ocirc> C<o WITH CIRCUMFLEX>
that lie between I and J would not be matched by the
-regular expression range C</[H-K]/>.
+regular expression range C</[H-K]/>. This works in
+the other direction, too, if either of the range end points is
+explicitly numeric: C<[\x89-\x91]> will match C<\x8e>, even
+though C<\x89> is C<i> and C<\x91 > is C<j>, and C<\x8e>
+is a gap character from the alphabetic viewpoint.
If you do want to match the alphabet gap characters in a single octet
regular expression try matching the hex or octal code such
diff --git a/regcomp.c b/regcomp.c
index a435be9c6a..6b17be12af 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3523,6 +3523,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
UV n;
bool optimize_invert = TRUE;
AV* unicode_alternate = 0;
+#ifdef EBCDIC
+ UV literal_endpoint = 0;
+#endif
ret = reganode(pRExC_state, ANYOF, 0);
@@ -3685,6 +3688,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
break;
}
} /* end of \blah */
+#ifdef EBCDIC
+ else
+ literal_endpoint++;
+#endif
if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
@@ -4087,8 +4094,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
IV ceilvalue = value < 256 ? value : 255;
#ifdef EBCDIC
- if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
- (isUPPER(prevvalue) && isUPPER(ceilvalue)))
+ /* In EBCDIC [\x89-\x91] should include
+ * the \x8e but [i-j] should not. */
+ if (literal_endpoint == 2 &&
+ ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+ (isUPPER(prevvalue) && isUPPER(ceilvalue))))
{
if (isLOWER(prevvalue)) {
for (i = prevvalue; i <= ceilvalue; i++)
@@ -4168,6 +4178,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
}
}
}
+#ifdef EBCDIC
+ literal_endpoint = 0;
+#endif
}
range = 0; /* this range (if it was one) is done now */