perl #77654: quotemeta quotes non-ASCII consistently

As described in the pod changes in this commit, this changes quotemeta() to consistenly quote non-ASCII characters when used under unicode_strings. The behavior is changed for these and UTF-8 encoded strings to more closely align with Unicode's recommendations. The end result is that we *could* at some future point start using other characters as metacharacters than the 12 we do now.
author: Karl Williamson <public@khwilliamson.com> 2012-02-15 11:31:27 -0700
committer: Karl Williamson <public@khwilliamson.com> 2012-02-15 18:02:35 -0700
commit: 2e2b25717dbde8d9ce48b4b8dc443e1d08166347 (patch)
tree: ca10f48aa5a2fa0549aebebed4109a9d8c59aa24 /pp.c
parent: adfec83175578461303ab5cfcc90d37cb3114126 (diff)
download: perl-2e2b25717dbde8d9ce48b4b8dc443e1d08166347.tar.gz
1 files changed, 29 insertions, 8 deletions
diff --git a/pp.c b/pp.c
index b12772c2de..93e59fa09f 100644
--- a/pp.c
+++ b/pp.c
@@ -4088,24 +4088,45 @@ PP(pp_quotemeta)
 	d = SvPVX(TARG);
 	if (DO_UTF8(sv)) {
 	    while (len) {
-		if (UTF8_IS_CONTINUED(*s)) {
 		    STRLEN ulen = UTF8SKIP(s);
+		bool to_quote = FALSE;
+
+		if (UTF8_IS_INVARIANT(*s)) {
+		    if (_isQUOTEMETA(*s)) {
+			to_quote = TRUE;
+		    }
+		}
+		else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+		    if (_isQUOTEMETA(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1))))
+		    {
+			to_quote = TRUE;
+		    }
+		}
+		else if (_is_utf8_quotemeta(s)) {
+		    to_quote = TRUE;
+		}
+
+		if (to_quote) {
+		    *d++ = '\\';
+		}
 		    if (ulen > len)
 			ulen = len;
 		    len -= ulen;
 		    while (ulen--)
 			*d++ = *s++;
-		}
-		else {
-		    if (!isALNUM(*s))
-			*d++ = '\\';
-		    *d++ = *s++;
-		    len--;
-		}
 	    }
 	    SvUTF8_on(TARG);
 	}
+	else if (IN_UNI_8_BIT) {
+	    while (len--) {
+		if (_isQUOTEMETA(*s))
+		    *d++ = '\\';
+		*d++ = *s++;
+	    }
+	}
 	else {
+	    /* For non UNI_8_BIT (and hence in locale) just quote all \W
+	     * including everything above ASCII */
 	    while (len--) {
 		if (!isWORDCHAR_A(*s))
 		    *d++ = '\\';
author	Karl Williamson <public@khwilliamson.com>	2012-02-15 11:31:27 -0700
committer	Karl Williamson <public@khwilliamson.com>	2012-02-15 18:02:35 -0700
commit	2e2b25717dbde8d9ce48b4b8dc443e1d08166347 (patch)
tree	ca10f48aa5a2fa0549aebebed4109a9d8c59aa24 /pp.c
parent	adfec83175578461303ab5cfcc90d37cb3114126 (diff)
download	perl-2e2b25717dbde8d9ce48b4b8dc443e1d08166347.tar.gz