summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2017-07-01 06:32:28 -0600
committerKarl Williamson <khw@cpan.org>2017-07-12 21:14:26 -0600
commita77c906e26e63e32dbf58d6de81399b8e3534fd1 (patch)
tree6116c40ccd62c46fd2f483da9464851a7469b2f1 /utf8.c
parentd6be65aef0919d5dceda4442de95f5de90b57e41 (diff)
downloadperl-a77c906e26e63e32dbf58d6de81399b8e3534fd1.tar.gz
utf8.c: Move a fcn within the file
This simply moves a function to later in the file. The next commIt will change it to needing a definition which, until this commit, came after it in the file, and so was not available to it.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c152
1 files changed, 76 insertions, 76 deletions
diff --git a/utf8.c b/utf8.c
index 50ce466adf..cc5e7d29f8 100644
--- a/utf8.c
+++ b/utf8.c
@@ -482,82 +482,6 @@ S_is_utf8_cp_above_31_bits(const U8 * const s, const U8 * const e)
#endif
-/* Anything larger than this will overflow the word if it were converted into a UV */
-#if defined(UV_IS_QUAD)
-# ifdef EBCDIC /* Actually is I8 */
-# define HIGHEST_REPRESENTABLE_UTF8 \
- "\xFF\xAF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
-# else
-# define HIGHEST_REPRESENTABLE_UTF8 \
- "\xFF\x80\x8F\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
-# endif
-#else /* 32-bit */
-# ifdef EBCDIC
-# define HIGHEST_REPRESENTABLE_UTF8 \
- "\xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA3\xBF\xBF\xBF\xBF\xBF\xBF"
-# else
-# define HIGHEST_REPRESENTABLE_UTF8 "\xFE\x83\xBF\xBF\xBF\xBF\xBF"
-# endif
-#endif
-
-PERL_STATIC_INLINE bool
-S_does_utf8_overflow(const U8 * const s, const U8 * e)
-{
- const U8 *x;
- const U8 * y = (const U8 *) HIGHEST_REPRESENTABLE_UTF8;
-
-#if ! defined(UV_IS_QUAD) && ! defined(EBCDIC)
-
- const STRLEN len = e - s;
-
-#endif
-
- /* Returns a boolean as to if this UTF-8 string would overflow a UV on this
- * platform, that is if it represents a code point larger than the highest
- * representable code point. (For ASCII platforms, we could use memcmp()
- * because we don't have to convert each byte to I8, but it's very rare
- * input indeed that would approach overflow, so the loop below will likely
- * only get executed once.
- *
- * 'e' must not be beyond a full character. If it is less than a full
- * character, the function returns FALSE if there is any input beyond 'e'
- * that could result in a non-overflowing code point */
-
- PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW;
- assert(s <= e && s + UTF8SKIP(s) >= e);
-
-#if ! defined(UV_IS_QUAD) && ! defined(EBCDIC)
-
- /* On 32 bit ASCII machines, many overlongs that start with FF don't
- * overflow */
-
- if (isFF_OVERLONG(s, len) > 0) {
- const U8 max_32_bit_overlong[] = "\xFF\x80\x80\x80\x80\x80\x80\x84";
- return memGE(s, max_32_bit_overlong,
- MIN(len, sizeof(max_32_bit_overlong) - 1));
- }
-
-#endif
-
- for (x = s; x < e; x++, y++) {
-
- if (UNLIKELY(NATIVE_UTF8_TO_I8(*x) == *y)) {
- continue;
- }
-
- /* If this byte is larger than the corresponding highest UTF-8 byte,
- * the sequence overflow; otherwise the byte is less than, and so the
- * sequence doesn't overflow */
- return NATIVE_UTF8_TO_I8(*x) > *y;
-
- }
-
- /* Got to the end and all bytes are the same. If the input is a whole
- * character, it doesn't overflow. And if it is a partial character,
- * there's not enough information to tell, so assume doesn't overflow */
- return FALSE;
-}
-
PERL_STATIC_INLINE int
S_is_utf8_overlong_given_start_byte_ok(const U8 * const s, const STRLEN len)
{
@@ -655,6 +579,82 @@ S_isFF_OVERLONG(const U8 * const s, const STRLEN len)
return -1;
}
+/* Anything larger than this will overflow the word if it were converted into a UV */
+#if defined(UV_IS_QUAD)
+# ifdef EBCDIC /* Actually is I8 */
+# define HIGHEST_REPRESENTABLE_UTF8 \
+ "\xFF\xAF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
+# else
+# define HIGHEST_REPRESENTABLE_UTF8 \
+ "\xFF\x80\x8F\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
+# endif
+#else /* 32-bit */
+# ifdef EBCDIC
+# define HIGHEST_REPRESENTABLE_UTF8 \
+ "\xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA3\xBF\xBF\xBF\xBF\xBF\xBF"
+# else
+# define HIGHEST_REPRESENTABLE_UTF8 "\xFE\x83\xBF\xBF\xBF\xBF\xBF"
+# endif
+#endif
+
+PERL_STATIC_INLINE bool
+S_does_utf8_overflow(const U8 * const s, const U8 * e)
+{
+ const U8 *x;
+ const U8 * y = (const U8 *) HIGHEST_REPRESENTABLE_UTF8;
+
+#if ! defined(UV_IS_QUAD) && ! defined(EBCDIC)
+
+ const STRLEN len = e - s;
+
+#endif
+
+ /* Returns a boolean as to if this UTF-8 string would overflow a UV on this
+ * platform, that is if it represents a code point larger than the highest
+ * representable code point. (For ASCII platforms, we could use memcmp()
+ * because we don't have to convert each byte to I8, but it's very rare
+ * input indeed that would approach overflow, so the loop below will likely
+ * only get executed once.
+ *
+ * 'e' must not be beyond a full character. If it is less than a full
+ * character, the function returns FALSE if there is any input beyond 'e'
+ * that could result in a non-overflowing code point */
+
+ PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW;
+ assert(s <= e && s + UTF8SKIP(s) >= e);
+
+#if ! defined(UV_IS_QUAD) && ! defined(EBCDIC)
+
+ /* On 32 bit ASCII machines, many overlongs that start with FF don't
+ * overflow */
+
+ if (isFF_OVERLONG(s, len) > 0) {
+ const U8 max_32_bit_overlong[] = "\xFF\x80\x80\x80\x80\x80\x80\x84";
+ return memGE(s, max_32_bit_overlong,
+ MIN(len, sizeof(max_32_bit_overlong) - 1));
+ }
+
+#endif
+
+ for (x = s; x < e; x++, y++) {
+
+ if (UNLIKELY(NATIVE_UTF8_TO_I8(*x) == *y)) {
+ continue;
+ }
+
+ /* If this byte is larger than the corresponding highest UTF-8 byte,
+ * the sequence overflow; otherwise the byte is less than, and so the
+ * sequence doesn't overflow */
+ return NATIVE_UTF8_TO_I8(*x) > *y;
+
+ }
+
+ /* Got to the end and all bytes are the same. If the input is a whole
+ * character, it doesn't overflow. And if it is a partial character,
+ * there's not enough information to tell, so assume doesn't overflow */
+ return FALSE;
+}
+
#undef F0_ABOVE_OVERLONG
#undef F8_ABOVE_OVERLONG
#undef FC_ABOVE_OVERLONG