summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-03-28 15:47:43 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-03-28 15:47:43 +0000
commitffbc6a930f7d2050ba54ac6bb9f15db93c1fab59 (patch)
tree7e81ef584643250fd840d7fda63c1eb7f7a6ddb6 /regcomp.c
parented4a5f9986e389ab30cb5ae84d8409ef8d4dcaec (diff)
downloadperl-ffbc6a930f7d2050ba54ac6bb9f15db93c1fab59.tar.gz
Integrate perlio:
[ 9400] More EBCDIC tweaks: - one more swash issue &~(0xA0-1) did not do the right thing, for UTF-EBCDIC where &~(0x80-1) does for UTF-8. - add "use re 'asciirange'" to make [!-~] etc. work use it in MIME::QuotedPrint and t/op/regexp.t and t/op/pat.t - Choose a key for t/op/each.t test which gets encoded. - Skip utf8decode if this is UTF-EBCDIC. p4raw-link: @9400 on //depot/perlio: daf0f78e031c718c75590ef9ef573756f805776e p4raw-id: //depot/perl@9407
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c42
1 files changed, 34 insertions, 8 deletions
diff --git a/regcomp.c b/regcomp.c
index 33765fff9d..85f0e4532e 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3402,9 +3402,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
for (value = 0; value < 128; value++)
ANYOF_BITMAP_SET(ret, value);
#else /* EBCDIC */
- for (value = 0; value < 256; value++)
- if (isASCII(value))
- ANYOF_BITMAP_SET(ret, value);
+ for (value = 0; value < 256; value++) {
+ if (PL_hints & HINT_RE_ASCIIR) {
+ if (NATIVE_TO_ASCII(value) < 128)
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ else {
+ if (isASCII(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ }
#endif /* EBCDIC */
}
dont_optimize_invert = TRUE;
@@ -3418,9 +3425,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
for (value = 128; value < 256; value++)
ANYOF_BITMAP_SET(ret, value);
#else /* EBCDIC */
- for (value = 0; value < 256; value++)
- if (!isASCII(value))
- ANYOF_BITMAP_SET(ret, value);
+ for (value = 0; value < 256; value++) {
+ if (PL_hints & HINT_RE_ASCIIR) {
+ if (NATIVE_TO_ASCII(value) >= 128)
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ else {
+ if (!isASCII(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ }
#endif /* EBCDIC */
}
dont_optimize_invert = TRUE;
@@ -3681,7 +3695,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
} /* end of namedclass \blah */
if (range) {
- if (lastvalue > value) /* b-a */ {
+ if (((lastvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
+ ((NATIVE_TO_UNI(lastvalue) > NATIVE_TO_UNI(value)) && (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
Simple_vFAIL4("Invalid [] range \"%*.*s\"",
RExC_parse - rangebegin,
RExC_parse - rangebegin,
@@ -3715,7 +3730,17 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
if (!SIZE_ONLY) {
if (lastvalue < 256 && value < 256) {
#ifdef EBCDIC /* EBCDIC, for example. */
- if ((isLOWER(lastvalue) && isLOWER(value)) ||
+ if (PL_hints & HINT_RE_ASCIIR) {
+ IV i;
+ /* New style scheme for ranges:
+ * after :
+ * use re 'asciir';
+ * do ranges in ASCII/Unicode space
+ */
+ for (i = NATIVE_TO_ASCII(lastvalue) ; i <= NATIVE_TO_ASCII(value); i++)
+ ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
+ }
+ else if ((isLOWER(lastvalue) && isLOWER(value)) ||
(isUPPER(lastvalue) && isUPPER(value)))
{
IV i;
@@ -4519,3 +4544,4 @@ clear_re(pTHXo_ void *r)
{
ReREFCNT_dec((regexp *)r);
}
+