summaryrefslogtreecommitdiff
path: root/utfebcdic.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-03-02 12:12:11 -0700
committerKarl Williamson <public@khwilliamson.com>2013-08-29 09:56:00 -0600
commitee372ee9ae7c97db80e5f61d4d6178afe483a803 (patch)
treef4877a80afdb9de627511052df841b6b0bc299c3 /utfebcdic.h
parent3cd96634230fc4a063f58b18b2aa85cae3ffb1b2 (diff)
downloadperl-ee372ee9ae7c97db80e5f61d4d6178afe483a803.tar.gz
utf8.h: Clean up and use START_MARK definition
The previous definition broke good encapsulation rules. UTF_START_MARK should return something that fits in a byte; it shouldn't be the caller that does this. So the mask is moved into the definition. This means it can apply only to the portion that creates something larger than a byte. Further, the EBCDIC version can be simplified, since 7 is the largest possible number of bytes in an EBCDIC UTF8 character.
Diffstat (limited to 'utfebcdic.h')
-rw-r--r--utfebcdic.h4
1 files changed, 3 insertions, 1 deletions
diff --git a/utfebcdic.h b/utfebcdic.h
index b5a33f8cce..0489621147 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -723,7 +723,9 @@ END_EXTERN_C
&& NATIVE_UTF8_TO_I8(c) <= 0xC7)
#define UTF8_IS_ABOVE_LATIN1(c) (NATIVE_UTF8_TO_I8(c) >= 0xC8)
-#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len)))))
+/* Can't exceed 7 on EBCDIC platforms */
+#define UTF_START_MARK(len) (0xFF & (0xFE << (7-(len))))
+
#define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))
#define UTF_CONTINUATION_MARK 0xA0
#define UTF_CONTINUATION_MASK ((U8)0x1f)