summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-05-04 20:23:14 -0600
committerKarl Williamson <public@khwilliamson.com>2013-05-20 11:01:51 -0600
commita78bc3c6c5c9dd76fdfbdae88ef4e89136aff637 (patch)
treed8b78ec25696f5d58481dd9d822ae316c722d70e
parenta96bc63507f57f3547f982d5e9a8b95f68e87e3b (diff)
downloadperl-a78bc3c6c5c9dd76fdfbdae88ef4e89136aff637.tar.gz
pp.c: Eliminate custom macro and use Copy() instead
I think it's clearer to use Copy. When I wrote this custom macro, we didn't have the infrastructure to generate a UTF-8 encoded string at compile time.
-rw-r--r--pp.c26
-rw-r--r--regen/unicode_constants.pl3
-rw-r--r--unicode_constants.h3
3 files changed, 17 insertions, 15 deletions
diff --git a/pp.c b/pp.c
index bd2d8c907e..48710ccf98 100644
--- a/pp.c
+++ b/pp.c
@@ -47,6 +47,9 @@ extern Pid_t getpid (void);
_LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
#endif
+static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
+static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
+
/* variations on pp_null */
PP(pp_stub)
@@ -3430,15 +3433,6 @@ PP(pp_crypt)
/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
* most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
-/* Generates code to store a unicode codepoint c that is known to occupy
- * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1,
- * and p is advanced to point to the next available byte after the two bytes */
-#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
- STMT_START { \
- *(p)++ = UTF8_TWO_BYTE_HI(c); \
- *((p)++) = UTF8_TWO_BYTE_LO(c); \
- } STMT_END
-
PP(pp_ucfirst)
{
/* Actually is both lcfirst() and ucfirst(). Only the first character
@@ -3762,10 +3756,8 @@ PP(pp_uc)
if (in_iota_subscript && ! _is_utf8_mark(s)) {
/* A non-mark. Time to output the iota subscript */
-#define GREEK_CAPITAL_LETTER_IOTA 0x0399
-#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
-
- CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+ Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+ d += capital_iota_len;
in_iota_subscript = FALSE;
}
@@ -3775,6 +3767,8 @@ PP(pp_uc)
u = UTF8SKIP(s);
uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
cBOOL(IN_LOCALE_RUNTIME), &tainted);
+#define GREEK_CAPITAL_LETTER_IOTA 0x0399
+#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
if (uv == GREEK_CAPITAL_LETTER_IOTA
&& utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
{
@@ -3800,7 +3794,8 @@ PP(pp_uc)
s += u;
}
if (in_iota_subscript) {
- CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+ Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+ d += capital_iota_len;
}
SvUTF8_on(dest);
*d = '\0';
@@ -4231,7 +4226,8 @@ PP(pp_fc)
(send -s) * 2 + 1);
d = (U8*)SvPVX(dest) + len;
- CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU);
+ Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
+ d += small_mu_len;
s++;
for (; s < send; s++) {
STRLEN ulen;
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index 1977fbdfc5..0c45cf0eec 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -145,6 +145,9 @@ U+0300 string
U+0301 string
U+0308 string
+U+0399 string
+U+03BC string
+
U+03B9 string
U+03C5 string
diff --git a/unicode_constants.h b/unicode_constants.h
index 19f3acd33f..90521ec216 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -25,6 +25,9 @@
#define COMBINING_ACUTE_ACCENT_UTF8 "\xCC\x81" /* U+0301 */
#define COMBINING_DIAERESIS_UTF8 "\xCC\x88" /* U+0308 */
+#define GREEK_CAPITAL_LETTER_IOTA_UTF8 "\xCE\x99" /* U+0399 */
+#define GREEK_SMALL_LETTER_MU_UTF8 "\xCE\xBC" /* U+03BC */
+
#define GREEK_SMALL_LETTER_IOTA_UTF8 "\xCE\xB9" /* U+03B9 */
#define GREEK_SMALL_LETTER_UPSILON_UTF8 "\xCF\x85" /* U+03C5 */