utf8.h, pp.c: Add UTF8_IS_REPLACEMENT macro, and use it

This should speed things up slightly, as it looks directly at the UTF-8 source, instead of having to decode it first.
author: Karl Williamson <public@khwilliamson.com> 2012-04-28 18:38:24 -0600
committer: Karl Williamson <public@khwilliamson.com> 2012-05-22 08:24:18 -0600
commit: 28936164408fd41cfaa353665e07fdb257254b20 (patch)
tree: 2f1507e368e61c34b7a8c75ab6639d4be7a5ea9d /utf8.h
parent: a4f7a67c079118a2f900d5f95d3cada67e3475ea (diff)
download: perl-28936164408fd41cfaa353665e07fdb257254b20.tar.gz
1 files changed, 10 insertions, 0 deletions
diff --git a/utf8.h b/utf8.h
index 4d80d73b97..ad2b339a6c 100644
--- a/utf8.h
+++ b/utf8.h
@@ -347,8 +347,18 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
 #   define UTF8_IS_SURROGATE(s)  (*(s) == UTF_TO_NATIVE(0xF1)                 \
                                  && ((*((s) +1) == UTF_TO_NATIVE(0xB6))       \
 				     || *((s) + 1) == UTF_TO_NATIVE(0xB7)))
+    /* <send> points to one beyond the end of the string that starts at <s> */
+#   define UTF8_IS_REPLACEMENT(s, send) (*(s) == UTF_TO_NATIVE(0xEF)          \
+	                                 && (send - s) >= 4                   \
+	                                 && *((s) + 1) == UTF_TO_NATIVE(0xBF) \
+	                                 && *((s) + 2) == UTF_TO_NATIVE(0xBF) \
+	                                 && *((s) + 3) == UTF_TO_NATIVE(0xBD)
 #else
 #   define UTF8_IS_SURROGATE(s) (*(s) == 0xED && *((s) + 1) >= 0xA0)
+#   define UTF8_IS_REPLACEMENT(s, send) (*(s) == 0xEF          \
+                                         && (send - s) >= 3    \
+	                                 && *((s) + 1) == 0xBF \
+	                                 && *((s) + 2) == 0xBD)
 #endif
 
 /*		  ASCII		     EBCDIC I8
author	Karl Williamson <public@khwilliamson.com>	2012-04-28 18:38:24 -0600
committer	Karl Williamson <public@khwilliamson.com>	2012-05-22 08:24:18 -0600
commit	28936164408fd41cfaa353665e07fdb257254b20 (patch)
tree	2f1507e368e61c34b7a8c75ab6639d4be7a5ea9d /utf8.h
parent	a4f7a67c079118a2f900d5f95d3cada67e3475ea (diff)
download	perl-28936164408fd41cfaa353665e07fdb257254b20.tar.gz