diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-02-25 18:46:36 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-02-25 18:46:36 +0000 |
commit | 1ba5c6696e3dcd1de2d0e5ac9b9f54e28fc0282e (patch) | |
tree | 3d929d834f5ad8e21fd801fd5b460feb974fe637 | |
parent | d7d9ad0c0eccd5b9ff687cafaaaa26c85d95fc9a (diff) | |
download | perl-1ba5c6696e3dcd1de2d0e5ac9b9f54e28fc0282e.tar.gz |
Retract #8929,8930,8932,8933 for now.
p4raw-id: //depot/perl@8935
-rw-r--r-- | perl.h | 4 | ||||
-rw-r--r-- | regcomp.c | 74 | ||||
-rwxr-xr-x | t/op/pat.t | 48 | ||||
-rwxr-xr-x | t/op/tr.t | 43 | ||||
-rw-r--r-- | toke.c | 13 |
5 files changed, 71 insertions, 111 deletions
@@ -3517,10 +3517,6 @@ typedef struct am_table_short AMTS; #define EXEC_ARGV_CAST(x) x #endif -#ifdef EBCDIC -#define ALPHAS_HAVE_GAPS -#endif - /* and finally... */ #define PERL_PATCHLEVEL_H_IMPLICIT #include "patchlevel.h" @@ -3185,10 +3185,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) register char *e; UV n; bool dont_optimize_invert = FALSE; -#ifdef ALPHAS_HAVE_GAPS - bool explicit_alpha = TRUE; - bool explicit_alpha_prev = TRUE; -#endif ret = reganode(pRExC_state, ANYOF, 0); @@ -3375,6 +3371,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isALNUM(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; case ANYOF_NALNUM: @@ -3385,6 +3382,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isALNUM(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break; case ANYOF_ALNUMC: @@ -3395,6 +3393,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isALNUMC(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break; case ANYOF_NALNUMC: @@ -3405,6 +3404,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isALNUMC(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break; case ANYOF_ALPHA: @@ -3415,6 +3415,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isALPHA(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break; case ANYOF_NALPHA: @@ -3425,36 +3426,39 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isALPHA(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break; case ANYOF_ASCII: if (LOC) ANYOF_CLASS_SET(ret, ANYOF_ASCII); else { -#ifdef ALPHAS_HAVE_GAPS +#ifdef ASCIIish + for (value = 0; value < 128; value++) + ANYOF_BITMAP_SET(ret, value); +#else /* EBCDIC */ for (value = 0; value < 256; value++) if (isASCII(value)) ANYOF_BITMAP_SET(ret, value); -#else - for (value = 0; value < 128; value++) - ANYOF_BITMAP_SET(ret, value); -#endif +#endif /* EBCDIC */ } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break; case ANYOF_NASCII: if (LOC) ANYOF_CLASS_SET(ret, ANYOF_NASCII); else { -#ifdef ALPHAS_HAVE_GAPS +#ifdef ASCIIish + for (value = 128; value < 256; value++) + ANYOF_BITMAP_SET(ret, value); +#else /* EBCDIC */ for (value = 0; value < 256; value++) if (!isASCII(value)) ANYOF_BITMAP_SET(ret, value); -#else - for (value = 128; value < 256; value++) - ANYOF_BITMAP_SET(ret, value); -#endif +#endif /* EBCDIC */ } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break; case ANYOF_BLANK: @@ -3465,6 +3469,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isBLANK(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n"); break; case ANYOF_NBLANK: @@ -3475,6 +3480,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isBLANK(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n"); break; case ANYOF_CNTRL: @@ -3485,6 +3491,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isCNTRL(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break; case ANYOF_NCNTRL: @@ -3495,6 +3502,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isCNTRL(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break; case ANYOF_DIGIT: @@ -3505,6 +3513,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) for (value = '0'; value <= '9'; value++) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break; case ANYOF_NDIGIT: @@ -3517,6 +3526,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) for (value = '9' + 1; value < 256; value++) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break; case ANYOF_GRAPH: @@ -3527,6 +3537,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isGRAPH(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break; case ANYOF_NGRAPH: @@ -3537,6 +3548,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isGRAPH(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break; case ANYOF_LOWER: @@ -3547,6 +3559,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isLOWER(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break; case ANYOF_NLOWER: @@ -3557,6 +3570,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isLOWER(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break; case ANYOF_PRINT: @@ -3567,6 +3581,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isPRINT(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break; case ANYOF_NPRINT: @@ -3577,6 +3592,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isPRINT(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break; case ANYOF_PSXSPC: @@ -3587,6 +3603,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isPSXSPC(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; case ANYOF_NPSXSPC: @@ -3597,6 +3614,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isPSXSPC(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; case ANYOF_PUNCT: @@ -3607,6 +3625,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isPUNCT(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break; case ANYOF_NPUNCT: @@ -3617,6 +3636,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isPUNCT(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break; case ANYOF_SPACE: @@ -3627,6 +3647,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isSPACE(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n"); break; case ANYOF_NSPACE: @@ -3637,6 +3658,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isSPACE(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n"); break; case ANYOF_UPPER: @@ -3647,6 +3669,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isUPPER(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; case ANYOF_NUPPER: @@ -3657,6 +3680,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isUPPER(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break; case ANYOF_XDIGIT: @@ -3667,6 +3691,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (isXDIGIT(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break; case ANYOF_NXDIGIT: @@ -3677,6 +3702,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!isXDIGIT(value)) ANYOF_BITMAP_SET(ret, value); } + dont_optimize_invert = TRUE; Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break; default: @@ -3685,7 +3711,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } if (LOC) ANYOF_FLAGS(ret) |= ANYOF_CLASS; - dont_optimize_invert = TRUE; continue; } } /* end of namedclass \blah */ @@ -3701,10 +3726,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } else { lastvalue = value; /* save the beginning of the range */ -#ifdef ALPHAS_HAVE_GAPS - explicit_alpha_prev = explicit_alpha; - explicit_alpha = isALPHA(value); -#endif if (*RExC_parse == '-' && RExC_parse+1 < RExC_end && RExC_parse[1] != ']') { RExC_parse++; @@ -3728,18 +3749,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) /* now is the next time */ if (!SIZE_ONLY) { if (lastvalue < 256 && value < 256) { -#ifdef ALPHAS_HAVE_GAPS - /* In EBCDIC the letters are not an unbroken range - * numerically, there's are gaps between i-j, r-s, - * I-J, R-S. We DWIM that if the endpoints of the - * range are specified as explicitly alphabetic, - * an alphabetic range is requested, otherwise - * (the else branch) (say, explicit numeric endpoints - * like \xHH are used) we do a straightforward - * numeric range. */ - if (explicit_alpha_prev && explicit_alpha && - ((isLOWER(lastvalue) && isLOWER(value)) || - ((isUPPER(lastvalue) && isUPPER(value))))) +#ifndef ASCIIish /* EBCDIC, for example. */ + if ((isLOWER(lastvalue) && isLOWER(value)) || + (isUPPER(lastvalue) && isUPPER(value))) { IV i; if (isLOWER(lastvalue)) { diff --git a/t/op/pat.t b/t/op/pat.t index 590c268ffa..237ea44c4e 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -4,7 +4,7 @@ # the format supported by op/regexp.t. If you want to add a test # that does fit that format, add it to op/re_tests, not here. -print "1..244\n"; +print "1..242\n"; BEGIN { chdir 't' if -d 't'; @@ -1183,29 +1183,23 @@ if (/(\C)/g) { } } -if (ord('i') == 0x89 && ord('j') == 0x91) { # EBCDIC - if ("\x8e" =~ /[\x89-\x91]/) { - print "ok 241\n"; - } else { - print "not ok 241\n"; - } - if ("\x8e" !~ /[i-j]/) { - print "ok 242\n"; - } else { - print "not ok 242\n"; - } - if ("\xce" =~ /[\xc9-\xd1]/) { - print "ok 243\n"; - } else { - print "not ok 243\n"; - } - if ("\xce" !~ /[I-J]/) { - print "ok 244\n"; - } else { - print "not ok 244\n"; - } -} else { - for (241..244) { - print "ok $_ # Skip: not EBCDIC\n"; - } -} +# 241..242 +# +# The tr is admittedly NOT a regular expression operator, +# but this test is more of an EBCDIC test, the background is +# that \x89 is 'i' and \x90 is 'j', and \x8e is not a letter, +# not even a printable character. Now for the trick: +# if the range is specified using letters, the \x8e should most +# probably not match, but if the range is specified using explicit +# numeric endpoints, it probably should match. The first case, +# not matching if using letters, is already tested elsewhere, +# here we test for the matching cases. + +$_ = qq/\x8E/; + +print "not " unless /[\x89-\x91]/; +print "ok 241\n"; + +print "not " unless tr/\x89-\x91//d == 1; +print "ok 242\n"; + @@ -5,7 +5,7 @@ BEGIN { @INC = '../lib'; } -print "1..58\n"; +print "1..51\n"; $_ = "abcdefghijklmnopqrstuvwxyz"; @@ -296,44 +296,3 @@ print "ok 50\n"; ($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/; print "not " unless $a eq v300.300.172.302.301.172; print "ok 51\n"; - -# Tricky on EBCDIC: while [a-z] must not match the gap characters, -# (i-j, r-s, I-J, R-S), [\x89-\x91] has to match them, from Karsten -# Sperling. - -if (ord('i') == 0x89 & ord('j') == 0x91) { - -$c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/\x89-\x91/X/; -print "not " unless $c == 8 and $a eq "XXXXXXXX"; -print "ok 52\n"; - -$c = ($a = "\x89\x8a\x8b\x8c\x8d\x8f\x90\x91") =~ tr/i-j/X/; -print "not " unless $c == 2 and $a eq "X\x8a\x8b\x8c\x8d\x8f\x90X"; -print "ok 53\n"; - -$c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/\xc9-\xd1/X/; -print "not " unless $c == 8 and $a eq "XXXXXXXX"; -print "ok 54\n"; - -$c = ($a = "\xc9\xca\xcb\xcc\xcd\xcf\xd0\xd1") =~ tr/I-J/X/; -print "not " unless $c == 2 and $a eq "X\xca\xcb\xcc\xcd\xcf\xd0X"; -print "ok 55\n"; - -} else { - for (52..55) { print "ok $_ # Skip: not EBCDIC\n" } -} - -# some more wide-char tests from Karsten Sperling - -($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c; -print "not " unless $a eq "X"; -print "ok 56\n"; - -($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c; -print "not " unless $a eq "X"; -print "ok 57\n"; - -($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c; -print "not " unless $a eq "X"; -print ok "58\n"; - @@ -1226,9 +1226,9 @@ S_scan_const(pTHX_ char *start) if (PL_lex_inwhat == OP_TRANS) { /* expand a range A-Z to the full set of characters. AIE! */ if (dorange) { - UV i; /* current expanded character */ - UV min; /* first character in range */ - UV max; /* last character in range */ + I32 i; /* current expanded character */ + I32 min; /* first character in range */ + I32 max; /* last character in range */ i = d - SvPVX(sv); /* remember current offset */ SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */ @@ -1240,12 +1240,11 @@ S_scan_const(pTHX_ char *start) if (min > max) { Perl_croak(aTHX_ - "Invalid [] range \"\\x%"UVxf"-\\x%"UVxf"\" in transliteration operator", - min, max); + "Invalid [] range \"%c-%c\" in transliteration operator", + (char)min, (char)max); } -#ifdef ALPHAS_HAVE_GAPS - /* BROKEN FOR EBCDIC, see regcomp.c:reglass() */ +#ifndef ASCIIish if ((isLOWER(min) && isLOWER(max)) || (isUPPER(min) && isUPPER(max))) { if (isLOWER(min)) { |