diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-05-04 20:23:14 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-05-20 11:01:51 -0600 |
commit | a78bc3c6c5c9dd76fdfbdae88ef4e89136aff637 (patch) | |
tree | d8b78ec25696f5d58481dd9d822ae316c722d70e | |
parent | a96bc63507f57f3547f982d5e9a8b95f68e87e3b (diff) | |
download | perl-a78bc3c6c5c9dd76fdfbdae88ef4e89136aff637.tar.gz |
pp.c: Eliminate custom macro and use Copy() instead
I think it's clearer to use Copy. When I wrote this custom macro, we
didn't have the infrastructure to generate a UTF-8 encoded string at
compile time.
-rw-r--r-- | pp.c | 26 | ||||
-rw-r--r-- | regen/unicode_constants.pl | 3 | ||||
-rw-r--r-- | unicode_constants.h | 3 |
3 files changed, 17 insertions, 15 deletions
@@ -47,6 +47,9 @@ extern Pid_t getpid (void); _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_; #endif +static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1; +static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1; + /* variations on pp_null */ PP(pp_stub) @@ -3430,15 +3433,6 @@ PP(pp_crypt) /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ -/* Generates code to store a unicode codepoint c that is known to occupy - * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1, - * and p is advanced to point to the next available byte after the two bytes */ -#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \ - STMT_START { \ - *(p)++ = UTF8_TWO_BYTE_HI(c); \ - *((p)++) = UTF8_TWO_BYTE_LO(c); \ - } STMT_END - PP(pp_ucfirst) { /* Actually is both lcfirst() and ucfirst(). Only the first character @@ -3762,10 +3756,8 @@ PP(pp_uc) if (in_iota_subscript && ! _is_utf8_mark(s)) { /* A non-mark. Time to output the iota subscript */ -#define GREEK_CAPITAL_LETTER_IOTA 0x0399 -#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 - - CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); + Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8); + d += capital_iota_len; in_iota_subscript = FALSE; } @@ -3775,6 +3767,8 @@ PP(pp_uc) u = UTF8SKIP(s); uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, cBOOL(IN_LOCALE_RUNTIME), &tainted); +#define GREEK_CAPITAL_LETTER_IOTA 0x0399 +#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 if (uv == GREEK_CAPITAL_LETTER_IOTA && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI) { @@ -3800,7 +3794,8 @@ PP(pp_uc) s += u; } if (in_iota_subscript) { - CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); + Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8); + d += capital_iota_len; } SvUTF8_on(dest); *d = '\0'; @@ -4231,7 +4226,8 @@ PP(pp_fc) (send -s) * 2 + 1); d = (U8*)SvPVX(dest) + len; - CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU); + Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8); + d += small_mu_len; s++; for (; s < send; s++) { STRLEN ulen; diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl index 1977fbdfc5..0c45cf0eec 100644 --- a/regen/unicode_constants.pl +++ b/regen/unicode_constants.pl @@ -145,6 +145,9 @@ U+0300 string U+0301 string U+0308 string +U+0399 string +U+03BC string + U+03B9 string U+03C5 string diff --git a/unicode_constants.h b/unicode_constants.h index 19f3acd33f..90521ec216 100644 --- a/unicode_constants.h +++ b/unicode_constants.h @@ -25,6 +25,9 @@ #define COMBINING_ACUTE_ACCENT_UTF8 "\xCC\x81" /* U+0301 */ #define COMBINING_DIAERESIS_UTF8 "\xCC\x88" /* U+0308 */ +#define GREEK_CAPITAL_LETTER_IOTA_UTF8 "\xCE\x99" /* U+0399 */ +#define GREEK_SMALL_LETTER_MU_UTF8 "\xCE\xBC" /* U+03BC */ + #define GREEK_SMALL_LETTER_IOTA_UTF8 "\xCE\xB9" /* U+03B9 */ #define GREEK_SMALL_LETTER_UPSILON_UTF8 "\xCF\x85" /* U+03C5 */ |