summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-06-14 06:04:44 -0600
committerKarl Williamson <khw@cpan.org>2021-08-07 05:14:43 -0600
commit28ca3ab57366a041138756872c2020aca0b98ec8 (patch)
tree3e6f7512c5bfe4418ea9e957ce1d73f11004fb01
parentfcd03d925b4b3a67a6162b516b3ea4194e92bc92 (diff)
downloadperl-28ca3ab57366a041138756872c2020aca0b98ec8.tar.gz
utf8.h: Add symbol for easing EBCDIC handling
This is then used in regcomp.c to avoid an #ifdef EBCDIC
-rw-r--r--regcomp.c11
-rw-r--r--utf8.h6
2 files changed, 12 insertions, 5 deletions
diff --git a/regcomp.c b/regcomp.c
index b1148130d7..ced87a0416 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19815,11 +19815,12 @@ S_optimize_regclass(pTHX_
* invariant bytes, because they have the same bit patterns under UTF-8
* as not. */
PERL_UINT_FAST8_T inverted = 0;
-#ifdef EBCDIC
- const PERL_UINT_FAST8_T max_permissible = 0xFF;
-#else
- const PERL_UINT_FAST8_T max_permissible = 0x7F;
-#endif
+
+ /* Highest possible UTF-8 invariant is 7F on ASCII platforms; FF on
+ * EBCDIC */
+ const PERL_UINT_FAST8_T max_permissible
+ = nBIT_UMAX(7 + ONE_IF_EBCDIC_ZERO_IF_NOT);
+
/* If doesn't fit the criteria for ANYOFM, invert and try again. If
* that works we will instead later generate an NANYOFM, and invert
* back when through */
diff --git a/utf8.h b/utf8.h
index 4a4525ef11..1cb0b6855e 100644
--- a/utf8.h
+++ b/utf8.h
@@ -278,6 +278,12 @@ are in the character. */
#endif /* EBCDIC vs ASCII */
+/* It turns out that in a number of cases, that handling ASCII vs EBCDIC is a
+ * matter of being off-by-one. So this is a convenience macro, used to avoid
+ * some #ifdefs. */
+#define ONE_IF_EBCDIC_ZERO_IF_NOT \
+ (UTF_CONTINUATION_BYTE_INFO_BITS == UTF_EBCDIC_CONTINUATION_BYTE_INFO_BITS)
+
/* Since the significant bits in a continuation byte are stored in the
* least-significant positions, we often find ourselves shifting by that
* amount. This is a clearer name in such situations */