diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-05-11 14:08:20 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-05-11 14:08:20 +0000 |
commit | 3a3c44472a318717ec0cdc0a7f768125ae0f001f (patch) | |
tree | 5e068b0d32d9904e2ad08658e1039a9e19088d72 /regcomp.c | |
parent | c80f55d1ea34dba13189e54d4f4e9bce7de39357 (diff) | |
download | perl-3a3c44472a318717ec0cdc0a7f768125ae0f001f.tar.gz |
Remove the 'asciir' re subpragma. Should instead implement
the 'physical vs logical' range scheme:
\xAA-\xCC is a native physical range, you want that range of
codepoints in your native encoding. In EBCDIC the codepoints
in the gaps (between i-j and r-s) should be included.
\x{AA}-\x{CC} is a physical Unicode range, you want that range of
codepoints in Unicode.
a-z is a logical range, you want that range of 'logical' codepoints
in your native encoding. In EBCDIC the codepoints in the gaps
(between i-j and r-s) should not be included.
Mixed cases (a-\xAA, etc) should either be errors, or maybe
the 'logical' endpoints should be converted to native/Unicode
codepoints, and the range handled as a physical range.
'Logical endpoints' are to be recognized only in the A-Z, a-z,
and 0-9 ranges. Probably a warning should be given for mixed
cases like A-z or a-9 (since such expressions are encoding
dependent), with a recommendation to use physical ranges.
p4raw-id: //depot/perl@10085
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 38 |
1 files changed, 7 insertions, 31 deletions
@@ -3515,14 +3515,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) ANYOF_BITMAP_SET(ret, value); #else /* EBCDIC */ for (value = 0; value < 256; value++) { - if (PL_hints & HINT_RE_ASCIIR) { - if (NATIVE_TO_ASCII(value) < 128) - ANYOF_BITMAP_SET(ret, value); - } - else { - if (isASCII(value)) - ANYOF_BITMAP_SET(ret, value); - } + if (isASCII(value)) + ANYOF_BITMAP_SET(ret, value); } #endif /* EBCDIC */ } @@ -3537,14 +3531,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) ANYOF_BITMAP_SET(ret, value); #else /* EBCDIC */ for (value = 0; value < 256; value++) { - if (PL_hints & HINT_RE_ASCIIR) { - if (NATIVE_TO_ASCII(value) >= 128) - ANYOF_BITMAP_SET(ret, value); - } - else { - if (!isASCII(value)) - ANYOF_BITMAP_SET(ret, value); - } + if (!isASCII(value)) + ANYOF_BITMAP_SET(ret, value); } #endif /* EBCDIC */ } @@ -3783,9 +3771,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } /* end of namedclass \blah */ if (range) { - if (((prevvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) || - ((NATIVE_TO_UNI(prevvalue) > NATIVE_TO_UNI(value)) && - (PL_hints & HINT_RE_ASCIIR))) /* b-a */ { + if (prevvalue > value) /* b-a */ { Simple_vFAIL4("Invalid [] range \"%*.*s\"", RExC_parse - rangebegin, RExC_parse - rangebegin, @@ -3823,18 +3809,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) IV ceilvalue = value < 256 ? value : 255; #ifdef EBCDIC - if (PL_hints & HINT_RE_ASCIIR) { - /* New style scheme for ranges: - * use re 'asciir'; - * do ranges in ASCII/Unicode space - */ - for (i = NATIVE_TO_ASCII(prevvalue); - i <= NATIVE_TO_ASCII(ceilvalue); - i++) - ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i)); - } - else if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || - (isUPPER(prevvalue) && isUPPER(ceilvalue))) + if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) || + (isUPPER(prevvalue) && isUPPER(ceilvalue))) { if (isLOWER(prevvalue)) { for (i = prevvalue; i <= ceilvalue; i++) |