utf8.c: Move and slightly change comment block

This is so there are fewer real differences shown in the next commit
author: Karl Williamson <khw@cpan.org> 2017-07-12 20:26:18 -0600
committer: Karl Williamson <khw@cpan.org> 2017-07-12 21:14:26 -0600
commit: 5f995336c78d31708a69477c3351b87e285d64b8 (patch)
tree: 7dcaff92af97a8be50e8a640da77b2ec1f6aceda /utf8.c
parent: c285bbc4a6321e4e787d0fac9f34c354c7647256 (diff)
download: perl-5f995336c78d31708a69477c3351b87e285d64b8.tar.gz
1 files changed, 18 insertions, 15 deletions
diff --git a/utf8.c b/utf8.c
index b55c93115e..fea8fae8c9 100644
--- a/utf8.c
+++ b/utf8.c
@@ -422,19 +422,6 @@ S_is_utf8_cp_above_31_bits(const U8 * const s, const U8 * const e)
      * several places in this file, so is centralized here.  It is based on the
      * following table:
      *
-     * U+7FFFFFFF (2 ** 31 - 1)
-     *      ASCII: \xFD\xBF\xBF\xBF\xBF\xBF
-     *   IBM-1047: \xFE\x41\x41\x41\x41\x41\x41\x42\x73\x73\x73\x73\x73\x73
-     *    IBM-037: \xFE\x41\x41\x41\x41\x41\x41\x42\x72\x72\x72\x72\x72\x72
-     *   POSIX-BC: \xFE\x41\x41\x41\x41\x41\x41\x42\x75\x75\x75\x75\x75\x75
-     *         I8: \xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA1\xBF\xBF\xBF\xBF\xBF\xBF
-     * U+80000000 (2 ** 31):
-     *      ASCII: \xFE\x82\x80\x80\x80\x80\x80
-     *              [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] 10  11  12  13
-     *   IBM-1047: \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
-     *    IBM-037: \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
-     *   POSIX-BC: \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
-     *         I8: \xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA2\xA0\xA0\xA0\xA0\xA0\xA0
      */
 
 #ifdef EBCDIC
@@ -472,8 +459,24 @@ S_is_utf8_cp_above_31_bits(const U8 * const s, const U8 * const e)
         return FALSE;
     }
 
-    /* Note that in UTF-EBCDIC, the two lowest possible continuation bytes are
-     * \x41 and \x42. */
+        /* U+7FFFFFFF (2 ** 31 - 1)
+         *              [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] 10  11  12  13
+         *   IBM-1047: \xFE\x41\x41\x41\x41\x41\x41\x42\x73\x73\x73\x73\x73\x73
+         *    IBM-037: \xFE\x41\x41\x41\x41\x41\x41\x42\x72\x72\x72\x72\x72\x72
+         *   POSIX-BC: \xFE\x41\x41\x41\x41\x41\x41\x42\x75\x75\x75\x75\x75\x75
+         *         I8: \xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA1\xBF\xBF\xBF\xBF\xBF\xBF
+         * U+80000000 (2 ** 31):
+         *   IBM-1047: \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
+         *    IBM-037: \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
+         *   POSIX-BC: \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
+         *         I8: \xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA2\xA0\xA0\xA0\xA0\xA0\xA0
+         */
+        /* (Note that in UTF-EBCDIC, the two lowest possible continuation bytes
+         * are \x41 and \x42.)  If we have enough bytes available to determine
+         * the answer, or the bytes we do have differ from the UTF-8 prefix of
+         * the highest 30-bit code point, we can compare them to get a
+         * definitive answer */
+
     return cBOOL(memGT(s + 1, prefix, cmp_len));
 
 #endif
author	Karl Williamson <khw@cpan.org>	2017-07-12 20:26:18 -0600
committer	Karl Williamson <khw@cpan.org>	2017-07-12 21:14:26 -0600
commit	5f995336c78d31708a69477c3351b87e285d64b8 (patch)
tree	7dcaff92af97a8be50e8a640da77b2ec1f6aceda /utf8.c
parent	c285bbc4a6321e4e787d0fac9f34c354c7647256 (diff)
download	perl-5f995336c78d31708a69477c3351b87e285d64b8.tar.gz