diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2002-05-12 15:29:36 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2002-05-12 15:29:36 +0000 |
commit | 1b2d223bc80114ab97e5af7bf5270927ae350ba4 (patch) | |
tree | b0495c5afa84a084c55f59a788dec505857bb867 | |
parent | da32f63e54ca956472e919d15f2271b9494ad04e (diff) | |
download | perl-1b2d223bc80114ab97e5af7bf5270927ae350ba4.tar.gz |
EBCDIC: make t/op/pat #242 and 243 finally succeed.
p4raw-id: //depot/perl@16556
-rw-r--r-- | pod/perlebcdic.pod | 6 | ||||
-rw-r--r-- | regcomp.c | 17 |
2 files changed, 20 insertions, 3 deletions
diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod index a9f1d0fc3f..44ad6b9f0d 100644 --- a/pod/perlebcdic.pod +++ b/pod/perlebcdic.pod @@ -837,7 +837,11 @@ As of perl 5.005_03 the letter range regular expression such as [A-Z] and [a-z] have been especially coded to not pick up gap characters. For example, characters such as E<ocirc> C<o WITH CIRCUMFLEX> that lie between I and J would not be matched by the -regular expression range C</[H-K]/>. +regular expression range C</[H-K]/>. This works in +the other direction, too, if either of the range end points is +explicitly numeric: C<[\x89-\x91]> will match C<\x8e>, even +though C<\x89> is C<i> and C<\x91 > is C<j>, and C<\x8e> +is a gap character from the alphabetic viewpoint. If you do want to match the alphabet gap characters in a single octet regular expression try matching the hex or octal code such @@ -3523,6 +3523,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) UV n; bool optimize_invert = TRUE; AV* unicode_alternate = 0; +#ifdef EBCDIC + UV literal_endpoint = 0; +#endif ret = reganode(pRExC_state, ANYOF, 0); @@ -3685,6 +3688,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) break; } } /* end of \blah */ +#ifdef EBCDIC + else + literal_endpoint++; +#endif if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */ @@ -4087,8 +4094,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) IV ceilvalue = value < 256 ? value : 255; #ifdef EBCDIC - if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || - (isUPPER(prevvalue) && isUPPER(ceilvalue))) + /* In EBCDIC [\x89-\x91] should include + * the \x8e but [i-j] should not. */ + if (literal_endpoint == 2 && + ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || + (isUPPER(prevvalue) && isUPPER(ceilvalue)))) { if (isLOWER(prevvalue)) { for (i = prevvalue; i <= ceilvalue; i++) @@ -4168,6 +4178,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } } } +#ifdef EBCDIC + literal_endpoint = 0; +#endif } range = 0; /* this range (if it was one) is done now */ |