summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--handy.h17
-rw-r--r--regcomp.c11
-rw-r--r--regen/unicode_constants.pl32
-rw-r--r--regexec.c2
-rw-r--r--unicode_constants.h20
-rw-r--r--utf8.c6
6 files changed, 52 insertions, 36 deletions
diff --git a/handy.h b/handy.h
index 7feedcb792..96f84fa5d1 100644
--- a/handy.h
+++ b/handy.h
@@ -1616,16 +1616,21 @@ END_EXTERN_C
# endif
/* Participates in a single-character fold with a character above 255 */
-# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_SIMPLE_FOLD)))
+# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+# define HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(c) \
+ (( ! cBOOL(FITS_IN_8_BITS(c))) \
+ || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_SIMPLE_FOLD)))
+
+# define IS_NON_FINAL_FOLD(c) _generic_isCC(c, _CC_NON_FINAL_FOLD)
+# define IS_IN_SOME_FOLD_L1(c) _generic_isCC(c, _CC_IS_IN_SOME_FOLD)
+# endif
/* Like the above, but also can be part of a multi-char fold */
-# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
+# define HAS_NONLATIN1_FOLD_CLOSURE(c) \
+ ( (! cBOOL(FITS_IN_8_BITS(c))) \
+ || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
# define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)
-# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
- _generic_isCC(c, _CC_NON_FINAL_FOLD)
-# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
- _generic_isCC(c, _CC_IS_IN_SOME_FOLD)
/* is c a control character for which we have a mnemonic? */
# if defined(PERL_CORE) || defined(PERL_EXT)
diff --git a/regcomp.c b/regcomp.c
index 853501c030..0c8beb0ead 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -143,13 +143,6 @@ EXTERN_C const struct regexp_engine wild_reg_engine;
#include "invlist_inline.h"
#include "unicode_constants.h"
-#define HAS_NONLATIN1_FOLD_CLOSURE(i) \
- _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
-#define HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(i) \
- _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
-#define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
-#define IS_IN_SOME_FOLD_L1(c) _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
-
#ifndef STATIC
#define STATIC static
#endif
@@ -2133,8 +2126,6 @@ S_ssc_clear_locale(regnode_ssc *ssc)
ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS;
}
-#define NON_OTHER_COUNT NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C
-
STATIC bool
S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
{
@@ -22237,8 +22228,6 @@ S_put_code_point(pTHX_ SV *sv, UV c)
}
}
-#define MAX_PRINT_A MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C
-
STATIC void
S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
{
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index aba447ae6a..44c633e711 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -162,9 +162,15 @@ foreach my $charset (get_supported_code_pages()) {
for my $i (0x20 .. 0x7E) {
$max_PRINT_A = $a2n[$i] if $a2n[$i] > $max_PRINT_A;
}
- printf $out_fh "# define MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C 0x%02X /* The max code point that isPRINT_A */\n", $max_PRINT_A;
+ $max_PRINT_A = sprintf "0x%02X", $max_PRINT_A;
+ print $out_fh <<"EOT";
- print $out_fh "\n" . get_conditional_compile_line_end();
+# ifdef PERL_IN_REGCOMP_C
+# define MAX_PRINT_A $max_PRINT_A /* The max code point that isPRINT_A */
+# endif
+EOT
+
+ print $out_fh get_conditional_compile_line_end();
}
@@ -178,9 +184,14 @@ for (my $i = 0; $i < @other_invlist; $i += 2) {
: 0x110000)
- $other_invlist[$i];
}
-printf $out_fh "\n/* The number of code points not matching \\pC */\n"
- . "#define NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C %d\n",
- 0x110000 - $count;
+$count = 0x110000 - $count;
+print $out_fh <<~"EOT";
+
+ /* The number of code points not matching \\pC */
+ #ifdef PERL_IN_REGCOMP_C
+ # define NON_OTHER_COUNT $count
+ #endif
+ EOT
# If this release has both the CWCM and CWCF properties, find the highest code
# point which changes under any case change. We can use this to short-circuit
@@ -192,9 +203,14 @@ if (@cwcm) {
my $max = ($cwcm[-1] < $cwcf[-1])
? $cwcf[-1]
: $cwcm[-1];
- printf $out_fh "\n/* The highest code point that has any type of case change */\n"
- . "#define HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C 0x%X\n",
- $max - 1;
+ $max = sprintf "0x%X", $max - 1;
+ print $out_fh <<~"EOS";
+
+ /* The highest code point that has any type of case change */
+ #ifdef PERL_IN_UTF8_C
+ # define HIGHEST_CASE_CHANGING_CP $max
+ #endif
+ EOS
}
}
diff --git a/regexec.c b/regexec.c
index b80c0824eb..f3edc3a7bb 100644
--- a/regexec.c
+++ b/regexec.c
@@ -118,8 +118,6 @@ static const char non_utf8_target_but_utf8_required[]
goto target; \
} STMT_END
-#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
-
#ifndef STATIC
#define STATIC static
#endif
diff --git a/unicode_constants.h b/unicode_constants.h
index 232f18c464..eea66f6ba4 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -88,8 +88,10 @@ bytes.
# define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS_NATIVE 0xFF /* U+00FF */
# define MICRO_SIGN_NATIVE 0xB5 /* U+00B5 */
# define MICRO_SIGN_UTF8 "\xC2\xB5" /* U+00B5 */
-# define MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C 0x7E /* The max code point that isPRINT_A */
+# ifdef PERL_IN_REGCOMP_C
+# define MAX_PRINT_A 0x7E /* The max code point that isPRINT_A */
+# endif
#endif /* ASCII/Latin1 */
#if 'A' == 193 /* EBCDIC 1047 */ \
@@ -133,8 +135,10 @@ bytes.
# define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS_NATIVE 0xDF /* U+00FF */
# define MICRO_SIGN_NATIVE 0xA0 /* U+00B5 */
# define MICRO_SIGN_UTF8 "\x80\x64" /* U+00B5 */
-# define MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C 0xF9 /* The max code point that isPRINT_A */
+# ifdef PERL_IN_REGCOMP_C
+# define MAX_PRINT_A 0xF9 /* The max code point that isPRINT_A */
+# endif
#endif /* EBCDIC 1047 */
#if 'A' == 193 /* EBCDIC 037 */ \
@@ -178,15 +182,21 @@ bytes.
# define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS_NATIVE 0xDF /* U+00FF */
# define MICRO_SIGN_NATIVE 0xA0 /* U+00B5 */
# define MICRO_SIGN_UTF8 "\x78\x63" /* U+00B5 */
-# define MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C 0xF9 /* The max code point that isPRINT_A */
+# ifdef PERL_IN_REGCOMP_C
+# define MAX_PRINT_A 0xF9 /* The max code point that isPRINT_A */
+# endif
#endif /* EBCDIC 037 */
/* The number of code points not matching \pC */
-#define NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C 143698
+#ifdef PERL_IN_REGCOMP_C
+# define NON_OTHER_COUNT 143698
+#endif
/* The highest code point that has any type of case change */
-#define HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C 0x1E943
+#ifdef PERL_IN_UTF8_C
+# define HIGHEST_CASE_CHANGING_CP 0x1E943
+#endif
#endif /* PERL_UNICODE_CONSTANTS_H_ */
diff --git a/utf8.c b/utf8.c
index aaa620c2da..add8c093aa 100644
--- a/utf8.c
+++ b/utf8.c
@@ -3335,10 +3335,8 @@ S__to_utf8_case(pTHX_ const UV uv1, const U8 *p,
}
goto cases_to_self;
}
-#ifdef HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C
- if (UNLIKELY(uv1
- > HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C))
- {
+#ifdef HIGHEST_CASE_CHANGING_CP
+ if (UNLIKELY(uv1 > HIGHEST_CASE_CHANGING_CP)) {
goto cases_to_self;
}