summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--handy.h27
-rw-r--r--l1_char_class_tab.h16
-rw-r--r--regen/mk_PL_charclass.pl1
3 files changed, 28 insertions, 16 deletions
diff --git a/handy.h b/handy.h
index c90a8764dd..3eb8e51025 100644
--- a/handy.h
+++ b/handy.h
@@ -590,6 +590,12 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
#define isASCII_A(c) isASCII(c)
#define isASCII_L1(c) isASCII(c)
+/* The lower 3 bits in both the ASCII and EBCDIC representations of '0' are 0,
+ * and the 8 possible permutations of those bits exactly comprise the 8 octal
+ * digits */
+#define isOCTAL_A(c) cBOOL(FITS_IN_8_BITS(c) && (0xF8 & (c)) eq '0')
+
+
/* ASCII range only */
#ifdef H_PERL /* If have access to perl.h, lookup in its table */
/* Bits for PL_charclass[]. These use names used in l1_char_class_tab.h but
@@ -611,7 +617,6 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
# define _CC_IDFIRST_L1 (1<<13)
# define _CC_LOWER_A (1<<14)
# define _CC_LOWER_L1 (1<<15)
-# define _CC_OCTAL_A (1<<16)
# define _CC_PRINT_A (1<<17)
# define _CC_PRINT_L1 (1<<18)
# define _CC_PSXSPC_A (1<<19)
@@ -627,11 +632,21 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
# define _CC_XDIGIT_A (1<<29)
# define _CC_NONLATIN1_FOLD (1<<30)
# define _CC_QUOTEMETA (1U<<31) /* 1U keeps Solaris from griping */
-/* Unused: None
+/* Unused: (1<<16)
* If more are needed, can give up some of the above. The first ones to go
- * would be those that require just two tests to verify, either there are two
- * code points, like BLANK_A, or occupy a single range like OCTAL_A, DIGIT_A,
- * UPPER_A, and LOWER_A.
+ * would be those that require just two tests to verify; either there are two
+ * code points, like BLANK_A, or it occupies a single range like DIGIT_A,
+ * UPPER_A, and LOWER_A. Also consider the ones that can be replaced with two
+ * tests and an additional mask, so
+ *
+ * #define isCNTRL_A cBOOL(FITS_IN_8_BITS(c) \
+ * && (( ! (~0x1F & NATIVE_TO_UNI(c)])) \
+ * || UNLIKELY(NATIVE_TO_UNI(c) == 0x7f)))
+ *
+ * This takes advantage of the contiguous block of these with the first one's
+ * representation having the lower order bits all zero;, except the DELETE must
+ * be tested specially. A similar pattern can be used for for isCNTRL_L1,
+ * isPRINT_A, and isPRINT_L1
*/
# ifdef DOINIT
@@ -651,7 +666,6 @@ EXTCONST U32 PL_charclass[];
# define isGRAPH_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_GRAPH_A))
# define isIDFIRST_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_IDFIRST_A))
# define isLOWER_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_LOWER_A))
-# define isOCTAL_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_OCTAL_A))
# define isPRINT_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_PRINT_A))
# define isPSXSPC_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_PSXSPC_A))
# define isPUNCT_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_PUNCT_A))
@@ -664,7 +678,6 @@ EXTCONST U32 PL_charclass[];
# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_NONLATIN1_FOLD))
# define _isQUOTEMETA(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_QUOTEMETA))
#else /* No perl.h. */
-# define isOCTAL_A(c) ((c) <= '7' && (c) >= '0')
# ifdef EBCDIC
# define isALNUMC_A(c) (isASCII(c) && isALNUMC(c))
# define isALPHA_A(c) (isASCII(c) && isALPHA(c))
diff --git a/l1_char_class_tab.h b/l1_char_class_tab.h
index 3698d95e02..28df339fe9 100644
--- a/l1_char_class_tab.h
+++ b/l1_char_class_tab.h
@@ -53,14 +53,14 @@
/* U+2D '-' */ _CC_CHARNAME_CONT|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_PUNCT_A|_CC_PUNCT_L1|_CC_QUOTEMETA,
/* U+2E '.' */ _CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_PUNCT_A|_CC_PUNCT_L1|_CC_QUOTEMETA,
/* U+2F '/' */ _CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_PUNCT_A|_CC_PUNCT_L1|_CC_QUOTEMETA,
-/* U+30 '0' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_OCTAL_A|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
-/* U+31 '1' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_OCTAL_A|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
-/* U+32 '2' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_OCTAL_A|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
-/* U+33 '3' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_OCTAL_A|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
-/* U+34 '4' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_OCTAL_A|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
-/* U+35 '5' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_OCTAL_A|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
-/* U+36 '6' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_OCTAL_A|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
-/* U+37 '7' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_OCTAL_A|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
+/* U+30 '0' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
+/* U+31 '1' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
+/* U+32 '2' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
+/* U+33 '3' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
+/* U+34 '4' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
+/* U+35 '5' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
+/* U+36 '6' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
+/* U+37 '7' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
/* U+38 '8' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
/* U+39 '9' */ _CC_ALNUMC_A|_CC_ALNUMC_L1|_CC_CHARNAME_CONT|_CC_DIGIT_A|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_WORDCHAR_A|_CC_WORDCHAR_L1|_CC_XDIGIT_A,
/* U+3A ':' */ _CC_CHARNAME_CONT|_CC_GRAPH_A|_CC_GRAPH_L1|_CC_PRINT_A|_CC_PRINT_L1|_CC_PUNCT_A|_CC_PUNCT_L1|_CC_QUOTEMETA,
diff --git a/regen/mk_PL_charclass.pl b/regen/mk_PL_charclass.pl
index 4554c3bd53..5a3dbbe1f3 100644
--- a/regen/mk_PL_charclass.pl
+++ b/regen/mk_PL_charclass.pl
@@ -38,7 +38,6 @@ my @properties = qw(
IDFIRST_L1
LOWER_A
LOWER_L1
- OCTAL_A
PRINT_A
PRINT_L1
PSXSPC_A