diff options
-rw-r--r-- | handy.h | 17 | ||||
-rw-r--r-- | regcomp.c | 11 | ||||
-rw-r--r-- | regen/unicode_constants.pl | 32 | ||||
-rw-r--r-- | regexec.c | 2 | ||||
-rw-r--r-- | unicode_constants.h | 20 | ||||
-rw-r--r-- | utf8.c | 6 |
6 files changed, 52 insertions, 36 deletions
@@ -1616,16 +1616,21 @@ END_EXTERN_C # endif /* Participates in a single-character fold with a character above 255 */ -# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_SIMPLE_FOLD))) +# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) +# define HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(c) \ + (( ! cBOOL(FITS_IN_8_BITS(c))) \ + || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_SIMPLE_FOLD))) + +# define IS_NON_FINAL_FOLD(c) _generic_isCC(c, _CC_NON_FINAL_FOLD) +# define IS_IN_SOME_FOLD_L1(c) _generic_isCC(c, _CC_IS_IN_SOME_FOLD) +# endif /* Like the above, but also can be part of a multi-char fold */ -# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD))) +# define HAS_NONLATIN1_FOLD_CLOSURE(c) \ + ( (! cBOOL(FITS_IN_8_BITS(c))) \ + || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD))) # define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA) -# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \ - _generic_isCC(c, _CC_NON_FINAL_FOLD) -# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \ - _generic_isCC(c, _CC_IS_IN_SOME_FOLD) /* is c a control character for which we have a mnemonic? */ # if defined(PERL_CORE) || defined(PERL_EXT) @@ -143,13 +143,6 @@ EXTERN_C const struct regexp_engine wild_reg_engine; #include "invlist_inline.h" #include "unicode_constants.h" -#define HAS_NONLATIN1_FOLD_CLOSURE(i) \ - _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i) -#define HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(i) \ - _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i) -#define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) -#define IS_IN_SOME_FOLD_L1(c) _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) - #ifndef STATIC #define STATIC static #endif @@ -2133,8 +2126,6 @@ S_ssc_clear_locale(regnode_ssc *ssc) ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS; } -#define NON_OTHER_COUNT NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C - STATIC bool S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc) { @@ -22237,8 +22228,6 @@ S_put_code_point(pTHX_ SV *sv, UV c) } } -#define MAX_PRINT_A MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C - STATIC void S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals) { diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl index aba447ae6a..44c633e711 100644 --- a/regen/unicode_constants.pl +++ b/regen/unicode_constants.pl @@ -162,9 +162,15 @@ foreach my $charset (get_supported_code_pages()) { for my $i (0x20 .. 0x7E) { $max_PRINT_A = $a2n[$i] if $a2n[$i] > $max_PRINT_A; } - printf $out_fh "# define MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C 0x%02X /* The max code point that isPRINT_A */\n", $max_PRINT_A; + $max_PRINT_A = sprintf "0x%02X", $max_PRINT_A; + print $out_fh <<"EOT"; - print $out_fh "\n" . get_conditional_compile_line_end(); +# ifdef PERL_IN_REGCOMP_C +# define MAX_PRINT_A $max_PRINT_A /* The max code point that isPRINT_A */ +# endif +EOT + + print $out_fh get_conditional_compile_line_end(); } @@ -178,9 +184,14 @@ for (my $i = 0; $i < @other_invlist; $i += 2) { : 0x110000) - $other_invlist[$i]; } -printf $out_fh "\n/* The number of code points not matching \\pC */\n" - . "#define NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C %d\n", - 0x110000 - $count; +$count = 0x110000 - $count; +print $out_fh <<~"EOT"; + + /* The number of code points not matching \\pC */ + #ifdef PERL_IN_REGCOMP_C + # define NON_OTHER_COUNT $count + #endif + EOT # If this release has both the CWCM and CWCF properties, find the highest code # point which changes under any case change. We can use this to short-circuit @@ -192,9 +203,14 @@ if (@cwcm) { my $max = ($cwcm[-1] < $cwcf[-1]) ? $cwcf[-1] : $cwcm[-1]; - printf $out_fh "\n/* The highest code point that has any type of case change */\n" - . "#define HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C 0x%X\n", - $max - 1; + $max = sprintf "0x%X", $max - 1; + print $out_fh <<~"EOS"; + + /* The highest code point that has any type of case change */ + #ifdef PERL_IN_UTF8_C + # define HIGHEST_CASE_CHANGING_CP $max + #endif + EOS } } @@ -118,8 +118,6 @@ static const char non_utf8_target_but_utf8_required[] goto target; \ } STMT_END -#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i) - #ifndef STATIC #define STATIC static #endif diff --git a/unicode_constants.h b/unicode_constants.h index 232f18c464..eea66f6ba4 100644 --- a/unicode_constants.h +++ b/unicode_constants.h @@ -88,8 +88,10 @@ bytes. # define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS_NATIVE 0xFF /* U+00FF */ # define MICRO_SIGN_NATIVE 0xB5 /* U+00B5 */ # define MICRO_SIGN_UTF8 "\xC2\xB5" /* U+00B5 */ -# define MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C 0x7E /* The max code point that isPRINT_A */ +# ifdef PERL_IN_REGCOMP_C +# define MAX_PRINT_A 0x7E /* The max code point that isPRINT_A */ +# endif #endif /* ASCII/Latin1 */ #if 'A' == 193 /* EBCDIC 1047 */ \ @@ -133,8 +135,10 @@ bytes. # define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS_NATIVE 0xDF /* U+00FF */ # define MICRO_SIGN_NATIVE 0xA0 /* U+00B5 */ # define MICRO_SIGN_UTF8 "\x80\x64" /* U+00B5 */ -# define MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C 0xF9 /* The max code point that isPRINT_A */ +# ifdef PERL_IN_REGCOMP_C +# define MAX_PRINT_A 0xF9 /* The max code point that isPRINT_A */ +# endif #endif /* EBCDIC 1047 */ #if 'A' == 193 /* EBCDIC 037 */ \ @@ -178,15 +182,21 @@ bytes. # define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS_NATIVE 0xDF /* U+00FF */ # define MICRO_SIGN_NATIVE 0xA0 /* U+00B5 */ # define MICRO_SIGN_UTF8 "\x78\x63" /* U+00B5 */ -# define MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C 0xF9 /* The max code point that isPRINT_A */ +# ifdef PERL_IN_REGCOMP_C +# define MAX_PRINT_A 0xF9 /* The max code point that isPRINT_A */ +# endif #endif /* EBCDIC 037 */ /* The number of code points not matching \pC */ -#define NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C 143698 +#ifdef PERL_IN_REGCOMP_C +# define NON_OTHER_COUNT 143698 +#endif /* The highest code point that has any type of case change */ -#define HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C 0x1E943 +#ifdef PERL_IN_UTF8_C +# define HIGHEST_CASE_CHANGING_CP 0x1E943 +#endif #endif /* PERL_UNICODE_CONSTANTS_H_ */ @@ -3335,10 +3335,8 @@ S__to_utf8_case(pTHX_ const UV uv1, const U8 *p, } goto cases_to_self; } -#ifdef HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C - if (UNLIKELY(uv1 - > HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C)) - { +#ifdef HIGHEST_CASE_CHANGING_CP + if (UNLIKELY(uv1 > HIGHEST_CASE_CHANGING_CP)) { goto cases_to_self; } |