pp.c: Eliminate custom macro and use Copy() instead

I think it's clearer to use Copy. When I wrote this custom macro, we didn't have the infrastructure to generate a UTF-8 encoded string at compile time.
author: Karl Williamson <public@khwilliamson.com> 2013-05-04 20:23:14 -0600
committer: Karl Williamson <public@khwilliamson.com> 2013-05-20 11:01:51 -0600
commit: a78bc3c6c5c9dd76fdfbdae88ef4e89136aff637 (patch)
tree: d8b78ec25696f5d58481dd9d822ae316c722d70e
parent: a96bc63507f57f3547f982d5e9a8b95f68e87e3b (diff)
download: perl-a78bc3c6c5c9dd76fdfbdae88ef4e89136aff637.tar.gz
3 files changed, 17 insertions, 15 deletions
diff --git a/pp.c b/pp.c
index bd2d8c907e..48710ccf98 100644
--- a/pp.c
+++ b/pp.c
@@ -47,6 +47,9 @@ extern Pid_t getpid (void);
     _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
 #endif
 
+static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
+static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
+
 /* variations on pp_null */
 
 PP(pp_stub)
@@ -3430,15 +3433,6 @@ PP(pp_crypt)
 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level.  So 
  * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
 
-/* Generates code to store a unicode codepoint c that is known to occupy
- * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1,
- * and p is advanced to point to the next available byte after the two bytes */
-#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c)					    \
-    STMT_START {							    \
-	*(p)++ = UTF8_TWO_BYTE_HI(c);					    \
-	*((p)++) = UTF8_TWO_BYTE_LO(c);					    \
-    } STMT_END
-
 PP(pp_ucfirst)
 {
     /* Actually is both lcfirst() and ucfirst().  Only the first character
@@ -3762,10 +3756,8 @@ PP(pp_uc)
 	    if (in_iota_subscript && ! _is_utf8_mark(s)) {
 
 		/* A non-mark.  Time to output the iota subscript */
-#define GREEK_CAPITAL_LETTER_IOTA 0x0399
-#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
-
-		CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+		Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+                d += capital_iota_len;
 		in_iota_subscript = FALSE;
             }
 
@@ -3775,6 +3767,8 @@ PP(pp_uc)
             u = UTF8SKIP(s);
             uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
 				      cBOOL(IN_LOCALE_RUNTIME), &tainted);
+#define GREEK_CAPITAL_LETTER_IOTA 0x0399
+#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
             if (uv == GREEK_CAPITAL_LETTER_IOTA
                 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
             {
@@ -3800,7 +3794,8 @@ PP(pp_uc)
             s += u;
 	}
 	if (in_iota_subscript) {
-	    CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+            Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+            d += capital_iota_len;
 	}
 	SvUTF8_on(dest);
 	*d = '\0';
@@ -4231,7 +4226,8 @@ PP(pp_fc)
                                                 (send -s) * 2 + 1);
                     d = (U8*)SvPVX(dest) + len;
 
-                    CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU);
+                    Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
+                    d += small_mu_len;
                     s++;
                     for (; s < send; s++) {
                         STRLEN ulen;
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index 1977fbdfc5..0c45cf0eec 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -145,6 +145,9 @@ U+0300 string
 U+0301 string
 U+0308 string
 
+U+0399 string
+U+03BC string
+
 U+03B9 string
 
 U+03C5 string
diff --git a/unicode_constants.h b/unicode_constants.h
index 19f3acd33f..90521ec216 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -25,6 +25,9 @@
 #define COMBINING_ACUTE_ACCENT_UTF8  "\xCC\x81"    /* U+0301 */
 #define COMBINING_DIAERESIS_UTF8  "\xCC\x88"    /* U+0308 */
 
+#define GREEK_CAPITAL_LETTER_IOTA_UTF8  "\xCE\x99"    /* U+0399 */
+#define GREEK_SMALL_LETTER_MU_UTF8  "\xCE\xBC"    /* U+03BC */
+
 #define GREEK_SMALL_LETTER_IOTA_UTF8  "\xCE\xB9"    /* U+03B9 */
 
 #define GREEK_SMALL_LETTER_UPSILON_UTF8  "\xCF\x85"    /* U+03C5 */
author	Karl Williamson <public@khwilliamson.com>	2013-05-04 20:23:14 -0600
committer	Karl Williamson <public@khwilliamson.com>	2013-05-20 11:01:51 -0600
commit	a78bc3c6c5c9dd76fdfbdae88ef4e89136aff637 (patch)
tree	d8b78ec25696f5d58481dd9d822ae316c722d70e
parent	a96bc63507f57f3547f982d5e9a8b95f68e87e3b (diff)
download	perl-a78bc3c6c5c9dd76fdfbdae88ef4e89136aff637.tar.gz