diff options
author | Karl Williamson <khw@cpan.org> | 2015-10-29 20:32:08 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-11-09 10:53:32 -0700 |
commit | 111e8ed9ecc83b21b1472dfeafdb1e1918ddd493 (patch) | |
tree | b252b765b50bdc33ec2ad0d4df3fdf90a78d840e | |
parent | 6937f88504ae9c43ca157cfe296151540cdf5d36 (diff) | |
download | perl-111e8ed9ecc83b21b1472dfeafdb1e1918ddd493.tar.gz |
utf8.h, utfebcdic.h: Use mnemonic constant
The magic number 13 is used in various places on ASCII platforms, and
7 correspondingly on EBCDIC. This moves the #defines for what these
represent to early in their files, and uses the symbolic name
thereafter.
-rw-r--r-- | utf8.h | 18 | ||||
-rw-r--r-- | utfebcdic.h | 29 |
2 files changed, 24 insertions, 23 deletions
@@ -97,6 +97,13 @@ than just the ASCII characters, so C<is_invariant_string> is preferred. #else /* ! EBCDIC */ START_EXTERN_C +/* How wide can a single UTF-8 encoded character become in bytes. */ +/* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8 + * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be + * expressed with 4 bytes. However, Perl thinks of UTF-8 as a way to encode + * non-negative integers in a binary format, even those above Unicode */ +#define UTF8_MAXBYTES 13 + #ifdef DOINIT EXTCONST unsigned char PL_utf8skip[] = { /* 0x00 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* ascii */ @@ -119,7 +126,7 @@ EXTCONST unsigned char PL_utf8skip[] = { /* Perl extended (never was official UTF-8). Up to 36 bit */ /* 0xFE */ 7, /* More extended, Up to 72 bits (64-bit + reserved) */ -/* 0xFF */ 13 +/* 0xFF */ UTF8_MAXBYTES }; #else EXTCONST unsigned char PL_utf8skip[]; @@ -264,7 +271,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF. (uv) < 0x200000 ? 4 : \ (uv) < 0x4000000 ? 5 : \ (uv) < 0x80000000 ? 6 : \ - (uv) < UTF8_QUAD_MAX ? 7 : 13 ) + (uv) < UTF8_QUAD_MAX ? 7 : UTF8_MAXBYTES ) #else /* No, I'm not even going to *TRY* putting #ifdef inside a #define */ #define OFFUNISKIP(uv) ( (uv) < 0x80 ? 1 : \ @@ -275,13 +282,6 @@ Perl's extended UTF-8 means we can have start bytes up to FF. (uv) < 0x80000000 ? 6 : 7 ) #endif -/* How wide can a single UTF-8 encoded character become in bytes. */ -/* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8 - * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be - * expressed with 4 bytes. However, Perl thinks of UTF-8 as a way to encode - * non-negative integers in a binary format, even those above Unicode */ -#define UTF8_MAXBYTES 13 - /* The maximum number of UTF-8 bytes a single Unicode character can * uppercase/lowercase/fold into. Unicode guarantees that the maximum * expansion is 3 characters. On ASCIIish platforms, the highest Unicode diff --git a/utfebcdic.h b/utfebcdic.h index 5912b3a142..1e4dc7c26a 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -145,6 +145,13 @@ END_EXTERN_C #define NATIVE_TO_UNI(ch) (FITS_IN_8_BITS(ch) ? NATIVE_TO_LATIN1(ch) : (ch)) #define UNI_TO_NATIVE(ch) (FITS_IN_8_BITS(ch) ? LATIN1_TO_NATIVE(ch) : (ch)) +/* How wide can a single UTF-8 encoded character become in bytes. */ +/* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8 + * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be + * expressed with 5 bytes. However, Perl thinks of UTF-8 as a way to encode + * non-negative integers in a binary format, even those above Unicode. */ +#define UTF8_MAXBYTES 7 + /* The following table is adapted from tr16, it shows I8 encoding of Unicode code points. @@ -164,12 +171,13 @@ END_EXTERN_C */ /* Input is a true Unicode (not-native) code point */ -#define OFFUNISKIP(uv) ( (uv) < 0xA0 ? 1 : \ - (uv) < 0x400 ? 2 : \ - (uv) < 0x4000 ? 3 : \ - (uv) < 0x40000 ? 4 : \ - (uv) < 0x400000 ? 5 : \ - (uv) < 0x4000000 ? 6 : 7 ) +#define OFFUNISKIP(uv) ( (uv) < 0xA0 ? 1 : \ + (uv) < 0x400 ? 2 : \ + (uv) < 0x4000 ? 3 : \ + (uv) < 0x40000 ? 4 : \ + (uv) < 0x400000 ? 5 : \ + (uv) < 0x4000000 ? 6 : \ + (uv) < 0x40000000 ? 7 : UTF8_MAXBYTES ) #define OFFUNI_IS_INVARIANT(c) (((UV)(c)) < 0xA0) @@ -185,7 +193,7 @@ END_EXTERN_C (uv) < 0x4000 ? 3 : \ (uv) < 0x40000 ? 4 : \ (uv) < 0x400000 ? 5 : \ - (uv) < 0x4000000 ? 6 : 7 ) + (uv) < 0x4000000 ? 6 : UTF8_MAXBYTES ) /* UTF-EBCDIC semantic macros - We used to transform back into I8 and then * compare, but now only have to do a single lookup by using a bit in @@ -221,13 +229,6 @@ END_EXTERN_C #define UTF_CONTINUATION_MASK ((U8)0x1f) #define UTF_ACCUMULATION_SHIFT 5 -/* How wide can a single UTF-8 encoded character become in bytes. */ -/* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8 - * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be - * expressed with 5 bytes. However, Perl thinks of UTF-8 as a way to encode - * non-negative integers in a binary format, even those above Unicode */ -#define UTF8_MAXBYTES 7 - /* The maximum number of UTF-8 bytes a single Unicode character can * uppercase/lowercase/fold into. Unicode guarantees that the maximum * expansion is 3 characters. On EBCDIC platforms, the highest Unicode |