From 037715a6d8890fc5a104494153096c071496030a Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 23 Dec 2015 12:43:30 -0700 Subject: regcomp.h: Shorten, clarify names of internal flags Some of the names are expanded slightly and not shortened --- regcomp.c | 20 ++++++++++---------- regcomp.h | 33 +++++++++++++++++---------------- regexec.c | 12 ++++++------ 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/regcomp.c b/regcomp.c index ab7a5d35e4..cba1e3ba0f 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1240,7 +1240,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, } /* Get the code points valid only under UTF-8 locales */ - if ((ANYOF_FLAGS(node) & ANYOF_LOC_FOLD) + if ((ANYOF_FLAGS(node) & ANYOFL_FOLD) && ary[2] && ary[2] != &PL_sv_undef) { only_utf8_locale_invlist = ary[2]; @@ -1287,7 +1287,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, if (ANYOF_FLAGS(node) & ANYOF_INVERT) { _invlist_invert(invlist); } - else if (new_node_has_latin1 && ANYOF_FLAGS(node) & ANYOF_LOC_FOLD) { + else if (new_node_has_latin1 && ANYOF_FLAGS(node) & ANYOFL_FOLD) { /* Under /li, any 0-255 could fold to any other 0-255, depending on the * locale. We can skip this if there are no 0-255 at all. */ @@ -14201,7 +14201,7 @@ redo_curchar: assert(OP(node) == ANYOF); OP(node) = ANYOFL; - ANYOF_FLAGS(node) |= ANYOF_LOC_REQ_UTF8; + ANYOF_FLAGS(node) |= ANYOFL_UTF8_LOCALE_REQD; } if (save_fold) { @@ -16098,14 +16098,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * locales, or the class matches at least one 0-255 range code point */ if (LOC && FOLD) { if (only_utf8_locale_list) { - ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD - |ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES; + ANYOF_FLAGS(ret) |= ANYOFL_FOLD + |ANYOFL_SOME_FOLDS_ONLY_IN_UTF8_LOCALE; } else if (cp_list) { /* Look to see if a 0-255 code point is in list */ UV start, end; invlist_iterinit(cp_list); if (invlist_iternext(cp_list, &start, &end) && start < 256) { - ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD; + ANYOF_FLAGS(ret) |= ANYOFL_FOLD; } invlist_iterfinish(cp_list); } @@ -17473,14 +17473,14 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ if (OP(o) == ANYOFL) { - if (flags & ANYOF_LOC_REQ_UTF8) { + if (flags & ANYOFL_UTF8_LOCALE_REQD) { sv_catpvs(sv, "{utf8-loc}"); } else { sv_catpvs(sv, "{loc}"); } } - if (flags & ANYOF_LOC_FOLD) + if (flags & ANYOFL_FOLD) sv_catpvs(sv, "{i}"); Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]); if (flags & ANYOF_INVERT) @@ -17507,7 +17507,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ || (flags & ( ANYOF_MATCHES_ALL_ABOVE_BITMAP |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP - |ANYOF_LOC_FOLD))) + |ANYOFL_FOLD))) { if (do_sep) { Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]); @@ -17589,7 +17589,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ SvREFCNT_dec_NN(lv); } - if ((flags & ANYOF_LOC_FOLD) + if ((flags & ANYOFL_FOLD) && only_utf8_locale && only_utf8_locale != &PL_sv_undef) { diff --git a/regcomp.h b/regcomp.h index a8955f3b95..d26cf18f9b 100644 --- a/regcomp.h +++ b/regcomp.h @@ -438,20 +438,21 @@ struct regnode_ssc { * UTF-8 one. These are quite rare, so it would be good to avoid the * expense of looking for them. But /l matching is slow anyway, and we've * traditionally not worried too much about its performance. And this - * condition requires the ANYOF_LOC_FOLD flag to be set, so testing for + * condition requires the ANYOFL_FOLD flag to be set, so testing for * that flag would be sufficient to rule out most cases of this. So it is * unclear if this should have a flag or not. But, one is currently - * allocated for this purpose, ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES (and the - * text below indicates how to share it, should another bit be needed). + * allocated for this purpose, ANYOFL_SOME_FOLDS_ONLY_IN_UTF8_LOCALE (and + * the text below indicates how to share it, should another bit be + * needed). * * At the moment, there are no spare bits, but this could be changed by various * tricks. * - * Note that item ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES is not independent of the - * ANYOF_LOC_FOLD flag below. Also, the ANYOF_LOC_REQ_UTF8 flag is set only if - * both these aren't. We can therefore share ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES - * with ANYOF_LOC_REQ_UTF8, so what the shared flag means depends on the - * ANYOF_LOC_FOLD flag. + * Note that item ANYOFL_SOME_FOLDS_ONLY_IN_UTF8_LOCALE is not independent of + * the ANYOFL_FOLD flag below. Also, the ANYOFL_UTF8_LOCALE_REQD flag is set + * only if both these aren't. We can therefore share + * ANYOFL_SOME_FOLDS_ONLY_IN_UTF8_LOCALE with ANYOFL_UTF8_LOCALE_REQD, so what + * the shared flag means depends on the ANYOFL_FOLD flag. * * Beyond that, note that the information may be conveyed by creating new * regnode types. This is not the best solution, as shown later in this @@ -465,14 +466,14 @@ struct regnode_ssc { * POSIXL, and still another for INVERT_POSIXL. This example illustrates one * problem with this, a combinatorial explosion of node types. The one node * type khw can think of that doesn't have this explosion issue is - * ANYOF_LOC_REQ_UTF8. This flag is a natural candidate for being a separate + * ANYOFL_UTF8_LOCALE_REQD. This flag is a natural candidate for being a separate * node type because it is a specialization of the current ANYOFL, and because * no other ANYOFL-only flags are set when it is; also most of its uses are * actually outside the reginclass() function, so this could be done with no * performance penalty. But since it can be shared, as noted above, it doesn't * take up space anyway. Another issue when turning a flag into a node type, is * that a SSC may use that flag -- not just a regular ANYOF[DL]?. In the case - * of ANYOF_LOC_REQ_UTF8, the only likely problem is accurately settting the + * of ANYOFL_UTF8_LOCALE_REQD, the only likely problem is accurately settting the * SSC node-type to the new one, which would likely involve S_ssc_or and * S_ssc_and, and not how the SSC currently gets set to ANYOFL. * @@ -519,17 +520,17 @@ struct regnode_ssc { * time. However under locale, the actual folding varies depending on * what the locale is at the time of execution, so it has to be deferred until * then. Only set under /l; never in an SSC */ -#define ANYOF_LOC_FOLD 0x04 +#define ANYOFL_FOLD 0x04 -/* If set, ANYOF_LOC_FOLD is also set, and there are potential matches that +/* If set, ANYOFL_FOLD is also set, and there are potential matches that * will be valid only if the locale is a UTF-8 one. */ -#define ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES 0x08 +#define ANYOFL_SOME_FOLDS_ONLY_IN_UTF8_LOCALE 0x08 /* If set, means to warn if runtime locale isn't a UTF-8 one. Only under /l. * If set, none of INVERT, LOC_FOLD, POSIXL, * ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP can * be set. Can be in an SSC */ -#define ANYOF_LOC_REQ_UTF8 0x10 +#define ANYOFL_UTF8_LOCALE_REQD 0x10 /* If set, the node matches every code point NUM_ANYOF_CODE_POINTS and above. * Can be in an SSC */ @@ -561,12 +562,12 @@ struct regnode_ssc { #define ANYOF_FLAGS_ALL (0xff) -#define ANYOF_LOCALE_FLAGS (ANYOF_LOC_FOLD | ANYOF_MATCHES_POSIXL) +#define ANYOF_LOCALE_FLAGS (ANYOFL_FOLD | ANYOF_MATCHES_POSIXL) /* These are the flags that apply to both regular ANYOF nodes and synthetic * start class nodes during construction of the SSC. During finalization of * the SSC, other of the flags may get added to it */ -#define ANYOF_COMMON_FLAGS ANYOF_LOC_REQ_UTF8 +#define ANYOF_COMMON_FLAGS ANYOFL_UTF8_LOCALE_REQD /* Character classes for node->classflags of ANYOF */ /* Should be synchronized with a table in regprop() */ diff --git a/regexec.c b/regexec.c index afe87a5fab..85921cf807 100644 --- a/regexec.c +++ b/regexec.c @@ -1826,7 +1826,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, case ANYOFL: _CHECK_AND_WARN_PROBLEMATIC_LOCALE; - if ((FLAGS(c) & ANYOF_LOC_REQ_UTF8) && ! IN_UTF8_CTYPE_LOCALE) { + if ((FLAGS(c) & ANYOFL_UTF8_LOCALE_REQD) && ! IN_UTF8_CTYPE_LOCALE) { Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required); } @@ -5766,7 +5766,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) case ANYOFL: /* /[abc]/l */ _CHECK_AND_WARN_PROBLEMATIC_LOCALE; - if ((FLAGS(scan) & ANYOF_LOC_REQ_UTF8) && ! IN_UTF8_CTYPE_LOCALE) + if ((FLAGS(scan) & ANYOFL_UTF8_LOCALE_REQD) && ! IN_UTF8_CTYPE_LOCALE) { Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required); } @@ -8301,7 +8301,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, case ANYOFL: _CHECK_AND_WARN_PROBLEMATIC_LOCALE; - if ((FLAGS(p) & ANYOF_LOC_REQ_UTF8) && ! IN_UTF8_CTYPE_LOCALE) { + if ((FLAGS(p) & ANYOFL_UTF8_LOCALE_REQD) && ! IN_UTF8_CTYPE_LOCALE) { Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required); } /* FALLTHROUGH */ @@ -8648,7 +8648,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const * UTF8_ALLOW_FFFF */ if (c_len == (STRLEN)-1) Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)"); - if (c > 255 && OP(n) == ANYOFL && ! (flags & ANYOF_LOC_REQ_UTF8)) { + if (c > 255 && OP(n) == ANYOFL && ! (flags & ANYOFL_UTF8_LOCALE_REQD)) { _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(c); } } @@ -8666,7 +8666,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const match = TRUE; } else if (flags & ANYOF_LOCALE_FLAGS) { - if ((flags & ANYOF_LOC_FOLD) + if ((flags & ANYOFL_FOLD) && c < 256 && ANYOF_BITMAP_TEST(n, PL_fold_locale[c])) { @@ -8751,7 +8751,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const && c < 256 # endif ))) - || (( flags & ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES) + || (( flags & ANYOFL_SOME_FOLDS_ONLY_IN_UTF8_LOCALE) && IN_UTF8_CTYPE_LOCALE))) { SV* only_utf8_locale = NULL; -- cgit v1.2.1