summaryrefslogtreecommitdiff
path: root/unicode_constants.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2022-02-13 21:08:22 -0700
committerKarl Williamson <khw@cpan.org>2022-03-19 23:17:51 -0600
commitc7b32e72c6b0a4931897121ac865a0fbc7445f17 (patch)
treeb799bc89198aef4a0c40ede7280e7d367c25b095 /unicode_constants.h
parent63cd44e4d01aafda8bc32c13f34dbab0035ac382 (diff)
downloadperl-c7b32e72c6b0a4931897121ac865a0fbc7445f17.tar.gz
unicode_constants.pl: Generate paired string delimiters
This commit causes several C strings to be generated containing bytes that match paired string delimiters beyond the four that have traditionally been used in Perl. This will allow a future commit to accept more matching delimiters around strings than those four. The code explains how the added delimiters are chosen.
Diffstat (limited to 'unicode_constants.h')
-rw-r--r--unicode_constants.h42
1 files changed, 42 insertions, 0 deletions
diff --git a/unicode_constants.h b/unicode_constants.h
index da240236d4..d588d5e8d8 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -89,6 +89,20 @@ bytes.
# define MICRO_SIGN_NATIVE 0xB5 /* U+00B5 */
# define MICRO_SIGN_UTF8 "\xC2\xB5" /* U+00B5 */
+# ifdef PERL_IN_TOKE_C
+ /* Paired characters for quote-like operators, in UTF-8 */
+# define EXTRA_OPENING_UTF8_BRACKETS "\x28\x3C\x5B\x7B\xC2\xAB\xE0\xBC\xBA\xE0\xBC\xBC\xE1\x9A\x9B\xE2\x80\xB9\xE2\x81\x85\xE2\x81\xBD\xE2\x82\x8D\xE2\x8C\x88\xE2\x8C\x8A\xE2\x8C\xA9\xE2\x9D\xA8\xE2\x9D\xAA\xE2\x9D\xAC\xE2\x9D\xAE\xE2\x9D\xB0\xE2\x9D\xB2\xE2\x9D\xB4\xE2\x9F\x85\xE2\x9F\xA6\xE2\x9F\xA8\xE2\x9F\xAA\xE2\x9F\xAC\xE2\x9F\xAE\xE2\xA6\x83\xE2\xA6\x85\xE2\xA6\x87\xE2\xA6\x89\xE2\xA6\x8B\xE2\xA6\x8D\xE2\xA6\x8F\xE2\xA6\x91\xE2\xA6\x93\xE2\xA6\x95\xE2\xA6\x97\xE2\xA7\x98\xE2\xA7\x9A\xE2\xA7\xBC\xE2\xB8\x82\xE2\xB8\x84\xE2\xB8\x89\xE2\xB8\x8C\xE2\xB8\x9C\xE2\xB8\xA0\xE2\xB8\xA2\xE2\xB8\xA4\xE2\xB8\xA6\xE2\xB8\xA8\xE2\xB9\x95\xE2\xB9\x97\xE2\xB9\x99\xE2\xB9\x9B\xE3\x80\x88\xE3\x80\x8A\xE3\x80\x8C\xE3\x80\x8E\xE3\x80\x90\xE3\x80\x94\xE3\x80\x96\xE3\x80\x98\xE3\x80\x9A\xEF\xB9\x99\xEF\xB9\x9B\xEF\xB9\x9D\xEF\xBC\x88\xEF\xBC\xBB\xEF\xBD\x9B\xEF\xBD\x9F\xEF\xBD\xA2"
+# define EXTRA_CLOSING_UTF8_BRACKETS "\x29\x3E\x5D\x7D\xC2\xBB\xE0\xBC\xBB\xE0\xBC\xBD\xE1\x9A\x9C\xE2\x80\xBA\xE2\x81\x86\xE2\x81\xBE\xE2\x82\x8E\xE2\x8C\x89\xE2\x8C\x8B\xE2\x8C\xAA\xE2\x9D\xA9\xE2\x9D\xAB\xE2\x9D\xAD\xE2\x9D\xAF\xE2\x9D\xB1\xE2\x9D\xB3\xE2\x9D\xB5\xE2\x9F\x86\xE2\x9F\xA7\xE2\x9F\xA9\xE2\x9F\xAB\xE2\x9F\xAD\xE2\x9F\xAF\xE2\xA6\x84\xE2\xA6\x86\xE2\xA6\x88\xE2\xA6\x8A\xE2\xA6\x8C\xE2\xA6\x90\xE2\xA6\x8E\xE2\xA6\x92\xE2\xA6\x94\xE2\xA6\x96\xE2\xA6\x98\xE2\xA7\x99\xE2\xA7\x9B\xE2\xA7\xBD\xE2\xB8\x83\xE2\xB8\x85\xE2\xB8\x8A\xE2\xB8\x8D\xE2\xB8\x9D\xE2\xB8\xA1\xE2\xB8\xA3\xE2\xB8\xA5\xE2\xB8\xA7\xE2\xB8\xA9\xE2\xB9\x96\xE2\xB9\x98\xE2\xB9\x9A\xE2\xB9\x9C\xE3\x80\x89\xE3\x80\x8B\xE3\x80\x8D\xE3\x80\x8F\xE3\x80\x91\xE3\x80\x95\xE3\x80\x97\xE3\x80\x99\xE3\x80\x9B\xEF\xB9\x9A\xEF\xB9\x9C\xEF\xB9\x9E\xEF\xBC\x89\xEF\xBC\xBD\xEF\xBD\x9D\xEF\xBD\xA0\xEF\xBD\xA3"
+
+ /* And not in UTF-8 */
+# define EXTRA_OPENING_NON_UTF8_BRACKETS "\x28\x3C\x5B\x7B\xAB"
+# define EXTRA_CLOSING_NON_UTF8_BRACKETS "\x29\x3E\x5D\x7D\xBB"
+
+ /* And what's deprecated */
+# define DEPRECATED_OPENING_UTF8_BRACKETS "\xC2\xAB\xE0\xBC\xBA\xE0\xBC\xBC\xE1\x9A\x9B\xE2\x80\xB9\xE2\x81\x85\xE2\x81\xBD\xE2\x82\x8D\xE2\x8C\x88\xE2\x8C\x8A\xE2\x8C\xA9\xE2\x9D\xA8\xE2\x9D\xAA\xE2\x9D\xAC\xE2\x9D\xAE\xE2\x9D\xB0\xE2\x9D\xB2\xE2\x9D\xB4\xE2\x9F\x85\xE2\x9F\xA6\xE2\x9F\xA8\xE2\x9F\xAA\xE2\x9F\xAC\xE2\x9F\xAE\xE2\xA6\x83\xE2\xA6\x85\xE2\xA6\x87\xE2\xA6\x89\xE2\xA6\x8B\xE2\xA6\x8D\xE2\xA6\x8F\xE2\xA6\x91\xE2\xA6\x93\xE2\xA6\x95\xE2\xA6\x97\xE2\xA7\x98\xE2\xA7\x9A\xE2\xA7\xBC\xE2\xB8\x82\xE2\xB8\x84\xE2\xB8\x89\xE2\xB8\x8C\xE2\xB8\x9C\xE2\xB8\xA0\xE2\xB8\xA2\xE2\xB8\xA4\xE2\xB8\xA6\xE2\xB8\xA8\xE2\xB9\x95\xE2\xB9\x97\xE2\xB9\x99\xE2\xB9\x9B\xE3\x80\x88\xE3\x80\x8A\xE3\x80\x8C\xE3\x80\x8E\xE3\x80\x90\xE3\x80\x94\xE3\x80\x96\xE3\x80\x98\xE3\x80\x9A\xEF\xB9\x99\xEF\xB9\x9B\xEF\xB9\x9D\xEF\xBC\x88\xEF\xBC\xBB\xEF\xBD\x9B\xEF\xBD\x9F\xEF\xBD\xA2"
+# define DEPRECATED_OPENING_NON_UTF8_BRACKETS "\xAB"
+# endif
+
# ifdef PERL_IN_REGCOMP_C
# define MAX_PRINT_A 0x7E /* The max code point that isPRINT_A */
# endif
@@ -136,6 +150,20 @@ bytes.
# define MICRO_SIGN_NATIVE 0xA0 /* U+00B5 */
# define MICRO_SIGN_UTF8 "\x80\x64" /* U+00B5 */
+# ifdef PERL_IN_TOKE_C
+ /* Paired characters for quote-like operators, in UTF-8 */
+# define EXTRA_OPENING_UTF8_BRACKETS "\x4D\x4C\xAD\xC0\x80\x52\xBA\x68\x69\xBA\x68\x70\xBC\x63\x6A\xCA\x42\x68\xCA\x43\x46\xCA\x44\x71\xCA\x45\x54\xCA\x67\x49\xCA\x67\x51\xCA\x68\x4A\xCB\x6A\x49\xCB\x6A\x51\xCB\x6A\x53\xCB\x6A\x55\xCB\x6A\x57\xCB\x6A\x59\xCB\x6A\x63\xCB\x72\x46\xCB\x73\x47\xCB\x73\x49\xCB\x73\x51\xCB\x73\x53\xCB\x73\x55\xCC\x53\x44\xCC\x53\x46\xCC\x53\x48\xCC\x53\x4A\xCC\x53\x52\xCC\x53\x54\xCC\x53\x56\xCC\x53\x58\xCC\x53\x62\xCC\x53\x64\xCC\x53\x66\xCC\x55\x67\xCC\x55\x69\xCC\x56\x70\xCD\x57\x43\xCD\x57\x45\xCD\x57\x4A\xCD\x57\x53\xCD\x57\x70\xCD\x58\x41\xCD\x58\x43\xCD\x58\x45\xCD\x58\x47\xCD\x58\x49\xCD\x59\x64\xCD\x59\x66\xCD\x59\x68\xCD\x59\x6A\xCE\x41\x49\xCE\x41\x51\xCE\x41\x53\xCE\x41\x55\xCE\x41\x57\xCE\x41\x63\xCE\x41\x65\xCE\x41\x67\xCE\x41\x69\xDD\x73\x59\x68\xDD\x73\x59\x6A\xDD\x73\x59\x71\xDD\x73\x67\x49\xDD\x73\x68\x6A\xDD\x73\x69\x6A\xDD\x73\x69\x73\xDD\x73\x6A\x43"
+# define EXTRA_CLOSING_UTF8_BRACKETS "\x5D\x6E\xBD\xD0\x80\x6A\xBA\x68\x6A\xBA\x68\x71\xBC\x63\x70\xCA\x42\x69\xCA\x43\x47\xCA\x44\x72\xCA\x45\x55\xCA\x67\x4A\xCA\x67\x52\xCA\x68\x51\xCB\x6A\x4A\xCB\x6A\x52\xCB\x6A\x54\xCB\x6A\x56\xCB\x6A\x58\xCB\x6A\x62\xCB\x6A\x64\xCB\x72\x47\xCB\x73\x48\xCB\x73\x4A\xCB\x73\x52\xCB\x73\x54\xCB\x73\x56\xCC\x53\x45\xCC\x53\x47\xCC\x53\x49\xCC\x53\x51\xCC\x53\x53\xCC\x53\x57\xCC\x53\x55\xCC\x53\x59\xCC\x53\x63\xCC\x53\x65\xCC\x53\x67\xCC\x55\x68\xCC\x55\x6A\xCC\x56\x71\xCD\x57\x44\xCD\x57\x46\xCD\x57\x51\xCD\x57\x54\xCD\x57\x71\xCD\x58\x42\xCD\x58\x44\xCD\x58\x46\xCD\x58\x48\xCD\x58\x4A\xCD\x59\x65\xCD\x59\x67\xCD\x59\x69\xCD\x59\x70\xCE\x41\x4A\xCE\x41\x52\xCE\x41\x54\xCE\x41\x56\xCE\x41\x58\xCE\x41\x64\xCE\x41\x66\xCE\x41\x68\xCE\x41\x6A\xDD\x73\x59\x69\xDD\x73\x59\x70\xDD\x73\x59\x72\xDD\x73\x67\x4A\xDD\x73\x68\x71\xDD\x73\x69\x71\xDD\x73\x6A\x41\xDD\x73\x6A\x44"
+
+ /* And not in UTF-8 */
+# define EXTRA_OPENING_NON_UTF8_BRACKETS "\x4D\x4C\xAD\xC0\x8A"
+# define EXTRA_CLOSING_NON_UTF8_BRACKETS "\x5D\x6E\xBD\xD0\x8B"
+
+ /* And what's deprecated */
+# define DEPRECATED_OPENING_UTF8_BRACKETS "\x80\x52\xBA\x68\x69\xBA\x68\x70\xBC\x63\x6A\xCA\x42\x68\xCA\x43\x46\xCA\x44\x71\xCA\x45\x54\xCA\x67\x49\xCA\x67\x51\xCA\x68\x4A\xCB\x6A\x49\xCB\x6A\x51\xCB\x6A\x53\xCB\x6A\x55\xCB\x6A\x57\xCB\x6A\x59\xCB\x6A\x63\xCB\x72\x46\xCB\x73\x47\xCB\x73\x49\xCB\x73\x51\xCB\x73\x53\xCB\x73\x55\xCC\x53\x44\xCC\x53\x46\xCC\x53\x48\xCC\x53\x4A\xCC\x53\x52\xCC\x53\x54\xCC\x53\x56\xCC\x53\x58\xCC\x53\x62\xCC\x53\x64\xCC\x53\x66\xCC\x55\x67\xCC\x55\x69\xCC\x56\x70\xCD\x57\x43\xCD\x57\x45\xCD\x57\x4A\xCD\x57\x53\xCD\x57\x70\xCD\x58\x41\xCD\x58\x43\xCD\x58\x45\xCD\x58\x47\xCD\x58\x49\xCD\x59\x64\xCD\x59\x66\xCD\x59\x68\xCD\x59\x6A\xCE\x41\x49\xCE\x41\x51\xCE\x41\x53\xCE\x41\x55\xCE\x41\x57\xCE\x41\x63\xCE\x41\x65\xCE\x41\x67\xCE\x41\x69\xDD\x73\x59\x68\xDD\x73\x59\x6A\xDD\x73\x59\x71\xDD\x73\x67\x49\xDD\x73\x68\x6A\xDD\x73\x69\x6A\xDD\x73\x69\x73\xDD\x73\x6A\x43"
+# define DEPRECATED_OPENING_NON_UTF8_BRACKETS "\x8A"
+# endif
+
# ifdef PERL_IN_REGCOMP_C
# define MAX_PRINT_A 0xF9 /* The max code point that isPRINT_A */
# endif
@@ -183,6 +211,20 @@ bytes.
# define MICRO_SIGN_NATIVE 0xA0 /* U+00B5 */
# define MICRO_SIGN_UTF8 "\x78\x63" /* U+00B5 */
+# ifdef PERL_IN_TOKE_C
+ /* Paired characters for quote-like operators, in UTF-8 */
+# define EXTRA_OPENING_UTF8_BRACKETS "\x4D\x4C\xBA\xC0\x78\x52\xB9\x67\x68\xB9\x67\x6A\xBD\x62\x69\xCA\x42\x67\xCA\x43\x46\xCA\x44\x70\xCA\x45\x54\xCA\x66\x49\xCA\x66\x51\xCA\x67\x4A\xCB\x69\x49\xCB\x69\x51\xCB\x69\x53\xCB\x69\x55\xCB\x69\x57\xCB\x69\x59\xCB\x69\x62\xCB\x71\x46\xCB\x72\x47\xCB\x72\x49\xCB\x72\x51\xCB\x72\x53\xCB\x72\x55\xCC\x53\x44\xCC\x53\x46\xCC\x53\x48\xCC\x53\x4A\xCC\x53\x52\xCC\x53\x54\xCC\x53\x56\xCC\x53\x58\xCC\x53\x5F\xCC\x53\x63\xCC\x53\x65\xCC\x55\x66\xCC\x55\x68\xCC\x56\x6A\xCD\x57\x43\xCD\x57\x45\xCD\x57\x4A\xCD\x57\x53\xCD\x57\x6A\xCD\x58\x41\xCD\x58\x43\xCD\x58\x45\xCD\x58\x47\xCD\x58\x49\xCD\x59\x63\xCD\x59\x65\xCD\x59\x67\xCD\x59\x69\xCE\x41\x49\xCE\x41\x51\xCE\x41\x53\xCE\x41\x55\xCE\x41\x57\xCE\x41\x62\xCE\x41\x64\xCE\x41\x66\xCE\x41\x68\xDD\x72\x59\x67\xDD\x72\x59\x69\xDD\x72\x59\x70\xDD\x72\x66\x49\xDD\x72\x67\x69\xDD\x72\x68\x69\xDD\x72\x68\x72\xDD\x72\x69\x43"
+# define EXTRA_CLOSING_UTF8_BRACKETS "\x5D\x6E\xBB\xD0\x78\x69\xB9\x67\x69\xB9\x67\x70\xBD\x62\x6A\xCA\x42\x68\xCA\x43\x47\xCA\x44\x71\xCA\x45\x55\xCA\x66\x4A\xCA\x66\x52\xCA\x67\x51\xCB\x69\x4A\xCB\x69\x52\xCB\x69\x54\xCB\x69\x56\xCB\x69\x58\xCB\x69\x5F\xCB\x69\x63\xCB\x71\x47\xCB\x72\x48\xCB\x72\x4A\xCB\x72\x52\xCB\x72\x54\xCB\x72\x56\xCC\x53\x45\xCC\x53\x47\xCC\x53\x49\xCC\x53\x51\xCC\x53\x53\xCC\x53\x57\xCC\x53\x55\xCC\x53\x59\xCC\x53\x62\xCC\x53\x64\xCC\x53\x66\xCC\x55\x67\xCC\x55\x69\xCC\x56\x70\xCD\x57\x44\xCD\x57\x46\xCD\x57\x51\xCD\x57\x54\xCD\x57\x70\xCD\x58\x42\xCD\x58\x44\xCD\x58\x46\xCD\x58\x48\xCD\x58\x4A\xCD\x59\x64\xCD\x59\x66\xCD\x59\x68\xCD\x59\x6A\xCE\x41\x4A\xCE\x41\x52\xCE\x41\x54\xCE\x41\x56\xCE\x41\x58\xCE\x41\x63\xCE\x41\x65\xCE\x41\x67\xCE\x41\x69\xDD\x72\x59\x68\xDD\x72\x59\x6A\xDD\x72\x59\x71\xDD\x72\x66\x4A\xDD\x72\x67\x70\xDD\x72\x68\x70\xDD\x72\x69\x41\xDD\x72\x69\x44"
+
+ /* And not in UTF-8 */
+# define EXTRA_OPENING_NON_UTF8_BRACKETS "\x4D\x4C\xBA\xC0\x8A"
+# define EXTRA_CLOSING_NON_UTF8_BRACKETS "\x5D\x6E\xBB\xD0\x8B"
+
+ /* And what's deprecated */
+# define DEPRECATED_OPENING_UTF8_BRACKETS "\x78\x52\xB9\x67\x68\xB9\x67\x6A\xBD\x62\x69\xCA\x42\x67\xCA\x43\x46\xCA\x44\x70\xCA\x45\x54\xCA\x66\x49\xCA\x66\x51\xCA\x67\x4A\xCB\x69\x49\xCB\x69\x51\xCB\x69\x53\xCB\x69\x55\xCB\x69\x57\xCB\x69\x59\xCB\x69\x62\xCB\x71\x46\xCB\x72\x47\xCB\x72\x49\xCB\x72\x51\xCB\x72\x53\xCB\x72\x55\xCC\x53\x44\xCC\x53\x46\xCC\x53\x48\xCC\x53\x4A\xCC\x53\x52\xCC\x53\x54\xCC\x53\x56\xCC\x53\x58\xCC\x53\x5F\xCC\x53\x63\xCC\x53\x65\xCC\x55\x66\xCC\x55\x68\xCC\x56\x6A\xCD\x57\x43\xCD\x57\x45\xCD\x57\x4A\xCD\x57\x53\xCD\x57\x6A\xCD\x58\x41\xCD\x58\x43\xCD\x58\x45\xCD\x58\x47\xCD\x58\x49\xCD\x59\x63\xCD\x59\x65\xCD\x59\x67\xCD\x59\x69\xCE\x41\x49\xCE\x41\x51\xCE\x41\x53\xCE\x41\x55\xCE\x41\x57\xCE\x41\x62\xCE\x41\x64\xCE\x41\x66\xCE\x41\x68\xDD\x72\x59\x67\xDD\x72\x59\x69\xDD\x72\x59\x70\xDD\x72\x66\x49\xDD\x72\x67\x69\xDD\x72\x68\x69\xDD\x72\x68\x72\xDD\x72\x69\x43"
+# define DEPRECATED_OPENING_NON_UTF8_BRACKETS "\x8A"
+# endif
+
# ifdef PERL_IN_REGCOMP_C
# define MAX_PRINT_A 0xF9 /* The max code point that isPRINT_A */
# endif