diff options
author | Karl Williamson <khw@cpan.org> | 2014-05-11 17:41:48 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2014-06-01 08:56:22 -0600 |
commit | f12c011824bed682b979bcf53cf4106b7e4d7f31 (patch) | |
tree | 0f73c8175087f68a1b52a4b5b9c4b0725870a071 /handy.h | |
parent | cce29a1df20fdbe37080fe3ef4982f7f53db93ba (diff) | |
download | perl-f12c011824bed682b979bcf53cf4106b7e4d7f31.tar.gz |
regcomp.c: Skip work that is a no-op
There are a few characters in the Latin1 range that can be folded to by
above-Latin1 characters. Some of these are folded to as part of a
single character fold, like KELVIN SIGN folds to 'k'. More are folded
to as part of a multi-character fold. Until this commit, there wasn't a
quick way to distinguish between the two classes. A couple of places
only want the single-character ones. It is more efficient to look for
just those than to include the multi-char ones which end up not doing
anything. This uses a bit in l1_char_class_tab.h to indicate those
characters that are in the desired class.
Diffstat (limited to 'handy.h')
-rw-r--r-- | handy.h | 21 |
1 files changed, 12 insertions, 9 deletions
@@ -952,14 +952,15 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc /* The members of the third group below do not need to be coordinated with data * structures in regcomp.[ch] and regexec.c. */ -# define _CC_IDFIRST 17 -# define _CC_CHARNAME_CONT 18 -# define _CC_NONLATIN1_FOLD 19 -# define _CC_QUOTEMETA 20 -# define _CC_NON_FINAL_FOLD 21 -# define _CC_IS_IN_SOME_FOLD 22 +# define _CC_IDFIRST 17 +# define _CC_CHARNAME_CONT 18 +# define _CC_NONLATIN1_FOLD 19 +# define _CC_NONLATIN1_SIMPLE_FOLD 20 +# define _CC_QUOTEMETA 21 +# define _CC_NON_FINAL_FOLD 22 +# define _CC_IS_IN_SOME_FOLD 23 # define _CC_BACKSLASH_FOO_LBRACE_IS_META 31 /* temp, see mk_PL_charclass.pl */ -/* Unused: 23-30 +/* Unused: 24-30 * If more bits are needed, one could add a second word for non-64bit * QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd * word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it @@ -1081,8 +1082,10 @@ EXTCONST U32 PL_charclass[]; # define isWORDCHAR_L1(c) _generic_isCC(c, _CC_WORDCHAR) # define isIDFIRST_L1(c) _generic_isCC(c, _CC_IDFIRST) - /* Either participates in a fold with a character above 255, or is a - * multi-char fold */ + /* Participates in a single-character fold with a character above 255 */ +# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_SIMPLE_FOLD))) + + /* Like the above, but also can be part of a multi-char fold */ # define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD))) # define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA) |