diff options
author | Karl Williamson <khw@cpan.org> | 2021-06-25 10:50:44 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-08-07 05:14:43 -0600 |
commit | c26e6896c8580980b7d819fc5d57c410552da30c (patch) | |
tree | 8edbf9754bd92ead33251d23217c71a6dccda034 | |
parent | 03dc0b1b4f089b6205935b158692b9364ae3453b (diff) | |
download | perl-c26e6896c8580980b7d819fc5d57c410552da30c.tar.gz |
utf8.h: Move macro to earlier in file
This is now defined before first use
-rw-r--r-- | utf8.h | 26 |
1 files changed, 13 insertions, 13 deletions
@@ -307,6 +307,19 @@ C<cp> is Unicode if above 255; otherwise is platform-native. */ #define UVCHR_IS_INVARIANT(cp) (OFFUNI_IS_INVARIANT(NATIVE_TO_UNI(cp))) +/* This defines the 1-bits that are to be in the first byte of a multi-byte + * UTF-8 encoded character that mark it as a start byte and give the number of + * bytes that comprise the character. 'len' is that number. + * + * To illustrate: len = 2 => ((U8) ~ 0b0011_1111) or 1100_0000 + * 7 => ((U8) ~ 0b0000_0001) or 1111_1110 + * > 7 => 0xFF + * + * This is not to be used on a single-byte character. As in many places in + * perl, U8 must be 8 bits + */ +#define UTF_START_MARK(len) ((U8) ~(0xFF >> (len))) + /* Internal macro to be used only in this file to aid in constructing other * publicly accessible macros. * The number of bytes required to express this uv in UTF-8, for just those @@ -448,19 +461,6 @@ uppercase/lowercase/titlecase/fold into. #define I8_TO_NATIVE(ch) I8_TO_NATIVE_UTF8(ch) #define NATIVE8_TO_UNI(ch) NATIVE_TO_LATIN1(ch) -/* This defines the 1-bits that are to be in the first byte of a multi-byte - * UTF-8 encoded character that mark it as a start byte and give the number of - * bytes that comprise the character. 'len' is that number. - * - * To illustrate: len = 2 => ((U8) ~ 0b0011_1111) or 1100_0000 - * 7 => ((U8) ~ 0b0000_0001) or 1111_1110 - * > 7 => 0xFF - * - * This is not to be used on a single-byte character. As in many places in - * perl, U8 must be 8 bits - */ -#define UTF_START_MARK(len) ((U8) ~(0xFF >> (len))) - /* Masks out the initial one bits in a start byte, leaving the real data ones. * Doesn't work on an invariant byte. 'len' is the number of bytes in the * multi-byte sequence that comprises the character. */ |