summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--utf8.h26
1 files changed, 13 insertions, 13 deletions
diff --git a/utf8.h b/utf8.h
index e0cc161ced..89fcaf9712 100644
--- a/utf8.h
+++ b/utf8.h
@@ -307,6 +307,19 @@ C<cp> is Unicode if above 255; otherwise is platform-native.
*/
#define UVCHR_IS_INVARIANT(cp) (OFFUNI_IS_INVARIANT(NATIVE_TO_UNI(cp)))
+/* This defines the 1-bits that are to be in the first byte of a multi-byte
+ * UTF-8 encoded character that mark it as a start byte and give the number of
+ * bytes that comprise the character. 'len' is that number.
+ *
+ * To illustrate: len = 2 => ((U8) ~ 0b0011_1111) or 1100_0000
+ * 7 => ((U8) ~ 0b0000_0001) or 1111_1110
+ * > 7 => 0xFF
+ *
+ * This is not to be used on a single-byte character. As in many places in
+ * perl, U8 must be 8 bits
+ */
+#define UTF_START_MARK(len) ((U8) ~(0xFF >> (len)))
+
/* Internal macro to be used only in this file to aid in constructing other
* publicly accessible macros.
* The number of bytes required to express this uv in UTF-8, for just those
@@ -448,19 +461,6 @@ uppercase/lowercase/titlecase/fold into.
#define I8_TO_NATIVE(ch) I8_TO_NATIVE_UTF8(ch)
#define NATIVE8_TO_UNI(ch) NATIVE_TO_LATIN1(ch)
-/* This defines the 1-bits that are to be in the first byte of a multi-byte
- * UTF-8 encoded character that mark it as a start byte and give the number of
- * bytes that comprise the character. 'len' is that number.
- *
- * To illustrate: len = 2 => ((U8) ~ 0b0011_1111) or 1100_0000
- * 7 => ((U8) ~ 0b0000_0001) or 1111_1110
- * > 7 => 0xFF
- *
- * This is not to be used on a single-byte character. As in many places in
- * perl, U8 must be 8 bits
- */
-#define UTF_START_MARK(len) ((U8) ~(0xFF >> (len)))
-
/* Masks out the initial one bits in a start byte, leaving the real data ones.
* Doesn't work on an invariant byte. 'len' is the number of bytes in the
* multi-byte sequence that comprises the character. */