Remove the 'asciir' re subpragma. Should instead implement

the 'physical vs logical' range scheme: \xAA-\xCC is a native physical range, you want that range of codepoints in your native encoding. In EBCDIC the codepoints in the gaps (between i-j and r-s) should be included. \x{AA}-\x{CC} is a physical Unicode range, you want that range of codepoints in Unicode. a-z is a logical range, you want that range of 'logical' codepoints in your native encoding. In EBCDIC the codepoints in the gaps (between i-j and r-s) should not be included. Mixed cases (a-\xAA, etc) should either be errors, or maybe the 'logical' endpoints should be converted to native/Unicode codepoints, and the range handled as a physical range. 'Logical endpoints' are to be recognized only in the A-Z, a-z, and 0-9 ranges. Probably a warning should be given for mixed cases like A-z or a-9 (since such expressions are encoding dependent), with a recommendation to use physical ranges. p4raw-id: //depot/perl@10085
author: Jarkko Hietaniemi <jhi@iki.fi> 2001-05-11 14:08:20 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2001-05-11 14:08:20 +0000
commit: 3a3c44472a318717ec0cdc0a7f768125ae0f001f (patch)
tree: 5e068b0d32d9904e2ad08658e1039a9e19088d72 /regcomp.c
parent: c80f55d1ea34dba13189e54d4f4e9bce7de39357 (diff)
download: perl-3a3c44472a318717ec0cdc0a7f768125ae0f001f.tar.gz
1 files changed, 7 insertions, 31 deletions
diff --git a/regcomp.c b/regcomp.c
index bf77ee62f5..a1defc6af9 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3515,14 +3515,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
 			for (value = 0; value < 256; value++) {
-			    if (PL_hints & HINT_RE_ASCIIR) {
-				if (NATIVE_TO_ASCII(value) < 128)
-				    ANYOF_BITMAP_SET(ret, value);
-			    }
-			    else {
-				if (isASCII(value))
-				    ANYOF_BITMAP_SET(ret, value);
-			    }
+			    if (isASCII(value))
+			        ANYOF_BITMAP_SET(ret, value);
 			}
 #endif /* EBCDIC */
 		    }
@@ -3537,14 +3531,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 			    ANYOF_BITMAP_SET(ret, value);
 #else  /* EBCDIC */
 			for (value = 0; value < 256; value++) {
-			    if (PL_hints & HINT_RE_ASCIIR) {
-				if (NATIVE_TO_ASCII(value) >= 128)
-				    ANYOF_BITMAP_SET(ret, value);
-			    }
-			    else {
-				if (!isASCII(value))
-				    ANYOF_BITMAP_SET(ret, value);
-			    }
+			    if (!isASCII(value))
+			        ANYOF_BITMAP_SET(ret, value);
 			}
 #endif /* EBCDIC */
 		    }
@@ -3783,9 +3771,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	} /* end of namedclass \blah */
 
 	if (range) {
-	    if (((prevvalue > value) && !(PL_hints & HINT_RE_ASCIIR)) ||
-                ((NATIVE_TO_UNI(prevvalue) > NATIVE_TO_UNI(value)) &&
-		 (PL_hints & HINT_RE_ASCIIR))) /* b-a */ {
+	    if (prevvalue > value) /* b-a */ {
 		Simple_vFAIL4("Invalid [] range \"%*.*s\"",
 			      RExC_parse - rangebegin,
 			      RExC_parse - rangebegin,
@@ -3823,18 +3809,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
 	        IV ceilvalue = value < 256 ? value : 255;
 
 #ifdef EBCDIC
-		if (PL_hints & HINT_RE_ASCIIR) {
-		    /* New style scheme for ranges:
-		     * use re 'asciir';
-		     * do ranges in ASCII/Unicode space
-		     */
-		    for (i  = NATIVE_TO_ASCII(prevvalue);
-			 i <= NATIVE_TO_ASCII(ceilvalue);
-			 i++)
-		      ANYOF_BITMAP_SET(ret, ASCII_TO_NATIVE(i));
-		}
-		else if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
-			 (isUPPER(prevvalue) && isUPPER(ceilvalue)))
+		if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+		    (isUPPER(prevvalue) && isUPPER(ceilvalue)))
 		{
 		    if (isLOWER(prevvalue)) {
 			for (i = prevvalue; i <= ceilvalue; i++)
author	Jarkko Hietaniemi <jhi@iki.fi>	2001-05-11 14:08:20 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2001-05-11 14:08:20 +0000
commit	3a3c44472a318717ec0cdc0a7f768125ae0f001f (patch)
tree	5e068b0d32d9904e2ad08658e1039a9e19088d72 /regcomp.c
parent	c80f55d1ea34dba13189e54d4f4e9bce7de39357 (diff)
download	perl-3a3c44472a318717ec0cdc0a7f768125ae0f001f.tar.gz