summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-02-17 13:47:00 -0700
committerKarl Williamson <public@khwilliamson.com>2014-02-19 08:32:59 -0700
commit1462525b8916fe18637f62742c02f7016eb23fab (patch)
tree9a7ed42da8cfd74b3865ae5f5c403514abb26bcb
parente0e1be5fc663ef0fdda840a92e286b8eece99537 (diff)
downloadperl-1462525b8916fe18637f62742c02f7016eb23fab.tar.gz
regexes: Remove uses of ANYOF_LOCALE flag
This flag no longer adds any useful information and can be removed. An ANYOF node that depends on locale either matches a POSIX class like /d, or matches case insensitively, or both. There are flags for both these cases, and to see if something matches locale, one merely needs to see if either flag is set. Not having to keep track of this extra flag simplifies things, and will allow it to be removed. There was a time when this flag was shared with one of the remaining locale ones, and there was relict code that allowed that sharing to be reinstated, and which this commit also removes.
-rw-r--r--regcomp.c31
-rw-r--r--regcomp.h6
-rw-r--r--regexec.c4
3 files changed, 12 insertions, 29 deletions
diff --git a/regcomp.c b/regcomp.c
index 252ccca675..6382c78c12 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1452,13 +1452,11 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL, FALSE);
- /* The code points that could match under /li are already incorporated into
- * the inversion list and bit map */
- ANYOF_FLAGS(ssc) &= ~ANYOF_LOC_FOLD;
-
if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
- ANYOF_FLAGS(ssc) |= ANYOF_LOCALE|ANYOF_POSIXL;
+ ANYOF_FLAGS(ssc) |= ANYOF_POSIXL;
}
+
+ assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE_FLAGS) || RExC_contains_locale);
}
#define TRIE_LIST_ITEM(state,idx) (trie->states[state].trans.list)[ idx ]
@@ -4259,12 +4257,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
}
if (OP(scan) == EXACTFL) {
- if (flags & SCF_DO_STCLASS_AND) {
- ssc_flags_and(data->start_class, ANYOF_LOCALE);
- }
- else if (flags & SCF_DO_STCLASS_OR) {
- ANYOF_FLAGS(data->start_class) |= ANYOF_LOCALE;
- }
/* We don't know what the folds are; it could be anything. XXX
* Actually, we only support UTF-8 encoding for code points
@@ -13227,9 +13219,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
ANYOF_FLAGS(ret) = 0;
RExC_emit += ANYOF_SKIP;
- if (LOC) {
- ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
- }
listsv = newSVpvs_flags("# comment\n", SVs_TEMP);
initial_listsv_len = SvCUR(listsv);
SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated. */
@@ -13641,8 +13630,7 @@ parseit:
/* What matches in a locale is not known until runtime. This includes
* what the Posix classes (like \w, [:space:]) match. Room must be
* reserved (one time per outer bracketed class) to store such classes,
- * either if Perl is compiled so that locale nodes always should have
- * this space, or if there is such posix class info to be stored. The
+ * if there is such posix class info to be stored. The
* space will contain a bit for each named class that is to be matched
* against. This isn't needed for \p{} and pseudo-classes, as they are
* not affected by locale, and hence are dealt with separately */
@@ -13661,10 +13649,7 @@ parseit:
* a posix class since are doing it here */
ANYOF_POSIXL_ZERO(ret);
}
- if (ANYOF_LOCALE == ANYOF_POSIXL
- || (namedclass > OOB_NAMEDCLASS
- && namedclass < ANYOF_POSIXL_MAX))
- {
+ if (namedclass > OOB_NAMEDCLASS && namedclass < ANYOF_POSIXL_MAX) {
if (! need_class) {
need_class = 1;
if (SIZE_ONLY) {
@@ -14627,7 +14612,7 @@ parseit:
* invert if there are things such as \w, which aren't known until runtime
* */
if (invert
- && ! (ANYOF_FLAGS(ret) & (ANYOF_LOC_FOLD|ANYOF_POSIXL))
+ && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS))
&& ! depends_list
&& ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
{
@@ -14675,7 +14660,7 @@ parseit:
if (cp_list
&& ! invert
&& ! depends_list
- && ! (ANYOF_FLAGS(ret) & (ANYOF_LOC_FOLD|ANYOF_POSIXL))
+ && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS))
&& ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
/* We don't optimize if we are supposed to make sure all non-Unicode
@@ -15657,7 +15642,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
int do_sep = 0;
- if (flags & ANYOF_LOCALE)
+ if (flags & ANYOF_LOCALE_FLAGS)
sv_catpvs(sv, "{loc}");
if (flags & ANYOF_LOC_FOLD)
sv_catpvs(sv, "{i}");
diff --git a/regcomp.h b/regcomp.h
index af1a97033c..a7908bed19 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -422,14 +422,12 @@ struct regnode_ssc {
#define ANYOF_FLAGS_ALL (0xff)
-#define ANYOF_LOCALE_FLAGS (ANYOF_LOCALE \
- |ANYOF_LOC_FOLD \
- |ANYOF_POSIXL)
+#define ANYOF_LOCALE_FLAGS (ANYOF_LOC_FOLD | ANYOF_POSIXL)
/* These are the flags that apply to both regular ANYOF nodes and synthetic
* start class nodes during construction of the SSC. During finalization of
* the SSC, other of the flags could be added to it */
-#define ANYOF_COMMON_FLAGS (ANYOF_LOCALE_FLAGS | ANYOF_WARN_SUPER)
+#define ANYOF_COMMON_FLAGS (ANYOF_WARN_SUPER)
/* Character classes for node->classflags of ANYOF */
/* Should be synchronized with a table in regprop() */
diff --git a/regexec.c b/regexec.c
index a2928ce0db..c31ae762eb 100644
--- a/regexec.c
+++ b/regexec.c
@@ -7652,7 +7652,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
{
match = TRUE;
}
- else if (flags & ANYOF_LOCALE) {
+ else if (flags & ANYOF_LOCALE_FLAGS) {
if (flags & ANYOF_LOC_FOLD) {
RXp_MATCH_TAINTED_on(prog);
if (ANYOF_BITMAP_TEST(n, PL_fold_locale[c])) {
@@ -7736,7 +7736,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
&& ((flags & ANYOF_NONBITMAP_NON_UTF8)
|| (utf8_target
&& (c >=256
- || (! (flags & ANYOF_LOCALE))
+ || (! (flags & ANYOF_LOCALE_FLAGS))
|| is_ANYOF_SYNTHETIC(n)))))
{
SV * const sw = core_regclass_swash(prog, n, TRUE, 0);