diff options
author | Karl Williamson <khw@cpan.org> | 2021-06-14 06:04:44 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-08-07 05:14:43 -0600 |
commit | 28ca3ab57366a041138756872c2020aca0b98ec8 (patch) | |
tree | 3e6f7512c5bfe4418ea9e957ce1d73f11004fb01 | |
parent | fcd03d925b4b3a67a6162b516b3ea4194e92bc92 (diff) | |
download | perl-28ca3ab57366a041138756872c2020aca0b98ec8.tar.gz |
utf8.h: Add symbol for easing EBCDIC handling
This is then used in regcomp.c to avoid an #ifdef EBCDIC
-rw-r--r-- | regcomp.c | 11 | ||||
-rw-r--r-- | utf8.h | 6 |
2 files changed, 12 insertions, 5 deletions
@@ -19815,11 +19815,12 @@ S_optimize_regclass(pTHX_ * invariant bytes, because they have the same bit patterns under UTF-8 * as not. */ PERL_UINT_FAST8_T inverted = 0; -#ifdef EBCDIC - const PERL_UINT_FAST8_T max_permissible = 0xFF; -#else - const PERL_UINT_FAST8_T max_permissible = 0x7F; -#endif + + /* Highest possible UTF-8 invariant is 7F on ASCII platforms; FF on + * EBCDIC */ + const PERL_UINT_FAST8_T max_permissible + = nBIT_UMAX(7 + ONE_IF_EBCDIC_ZERO_IF_NOT); + /* If doesn't fit the criteria for ANYOFM, invert and try again. If * that works we will instead later generate an NANYOFM, and invert * back when through */ @@ -278,6 +278,12 @@ are in the character. */ #endif /* EBCDIC vs ASCII */ +/* It turns out that in a number of cases, that handling ASCII vs EBCDIC is a + * matter of being off-by-one. So this is a convenience macro, used to avoid + * some #ifdefs. */ +#define ONE_IF_EBCDIC_ZERO_IF_NOT \ + (UTF_CONTINUATION_BYTE_INFO_BITS == UTF_EBCDIC_CONTINUATION_BYTE_INFO_BITS) + /* Since the significant bits in a continuation byte are stored in the * least-significant positions, we often find ourselves shifting by that * amount. This is a clearer name in such situations */ |