summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h9
1 files changed, 9 insertions, 0 deletions
diff --git a/utf8.h b/utf8.h
index 84ee9db822..ef5feccfaf 100644
--- a/utf8.h
+++ b/utf8.h
@@ -163,6 +163,15 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
#define UTF8_ACCUMULATE(old, new) (((old) << UTF_ACCUMULATION_SHIFT) | (((U8)new) & UTF_CONTINUATION_MASK))
+/* Convert a two (not one) byte utf8 character to a unicode code point value.
+ * Needs just one iteration of accumulate. Should not be used unless it is
+ * known that the two bytes are legal: 1) two-byte start, and 2) continuation.
+ * Note that the result can be larger than 255 if the input character is not
+ * downgradable */
+#define TWO_BYTE_UTF8_TO_UNI(HI, LO) \
+ UTF8_ACCUMULATE((NATIVE_TO_UTF(HI) & UTF_START_MASK(2)), \
+ NATIVE_TO_UTF(LO))
+
#define UTF8SKIP(s) PL_utf8skip[*(const U8*)(s)]
#define UTF8_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_UTF(c))