reg_namedseq: Restructure so doesn't duplicate code

This routine now calls reg() recursively after converting the parse to something the rest of the code understands. This eliminates duplicated code, and allows for uniform treatment of code points, as things were getting out of sync. It also eliminates the restrction on how many characters a named sequence can expand to. toke now converts its input (which is in Unicode terms) to native on EBCDIC platforms, so the rest of the code can can continue to ignore that. The restriction on the length of the number of characters a named sequence is hereby removed, because reg() handles that.
author: Karl Williamson <public@khwilliamson.com> 2011-03-20 10:25:17 -0600
committer: Karl Williamson <public@khwilliamson.com> 2011-03-20 12:16:12 -0600
commit: e2a7e16564e5652c046ec138d11bfa77e7c86836 (patch)
tree: e0d2fb7425a57217663e09e894580464d92d9ea9 /toke.c
parent: 9d64099bb0a1ca98620e6124baa4038dd20cf89e (diff)
download: perl-e2a7e16564e5652c046ec138d11bfa77e7c86836.tar.gz
1 files changed, 20 insertions, 6 deletions
diff --git a/toke.c b/toke.c
index 2dbe7f7d07..6933e62af4 100644
--- a/toke.c
+++ b/toke.c
@@ -3140,12 +3140,22 @@ S_scan_const(pTHX_ char *start)
 
 		    if (PL_lex_inpat) {
 
-			/* Pass through to the regex compiler unchanged.  The
-			 * reason we evaluated the number above is to make sure
-			 * there wasn't a syntax error. */
+			/* On non-EBCDIC platforms, pass through to the regex
+			 * compiler unchanged.  The reason we evaluated the
+			 * number above is to make sure there wasn't a syntax
+			 * error.  But on EBCDIC we convert to native so
+			 * downstream code can continue to assume it's native
+			 */
 			s -= 5;	    /* Include the '\N{U+' */
+#ifdef EBCDIC
+			d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
+							       and the \0 */
+				    "\\N{U+%X}",
+				    (unsigned int) UNI_TO_NATIVE(uv));
+#else
 			Copy(s, d, e - s + 1, char);	/* 1 = include the } */
 			d += e - s + 1;
+#endif
 		    }
 		    else {  /* Not a pattern: convert the hex to string */
 
@@ -3239,10 +3249,13 @@ S_scan_const(pTHX_ char *start)
 			    }
 
 			    /* Convert first code point to hex, including the
-			     * boiler plate before it */
+			     * boiler plate before it.  For all these, we
+			     * convert to native format so that downstream code
+			     * can continue to assume the input is native */
 			    output_length =
 				my_snprintf(hex_string, sizeof(hex_string),
-					    "\\N{U+%X", (unsigned int) uv);
+					    "\\N{U+%X",
+					    (unsigned int) UNI_TO_NATIVE(uv));
 
 			    /* Make sure there is enough space to hold it */
 			    d = off + SvGROW(sv, off
@@ -3267,7 +3280,8 @@ S_scan_const(pTHX_ char *start)
 
 				output_length =
 				    my_snprintf(hex_string, sizeof(hex_string),
-						".%X", (unsigned int) uv);
+					    ".%X",
+					    (unsigned int) UNI_TO_NATIVE(uv));
 
 				d = off + SvGROW(sv, off
 						     + output_length
author	Karl Williamson <public@khwilliamson.com>	2011-03-20 10:25:17 -0600
committer	Karl Williamson <public@khwilliamson.com>	2011-03-20 12:16:12 -0600
commit	e2a7e16564e5652c046ec138d11bfa77e7c86836 (patch)
tree	e0d2fb7425a57217663e09e894580464d92d9ea9 /toke.c
parent	9d64099bb0a1ca98620e6124baa4038dd20cf89e (diff)
download	perl-e2a7e16564e5652c046ec138d11bfa77e7c86836.tar.gz