summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-11-06 18:26:39 -0700
committerKarl Williamson <khw@cpan.org>2018-11-16 10:48:19 -0700
commitbd0b62a660635ad32a22aa96c5f1f0c45dc3127d (patch)
tree5fc70d3df64f4fe1c220b59489db2395dfd4baf4
parentd1be211cd36639caeb3c7482759d9bed530ecaad (diff)
downloadperl-bd0b62a660635ad32a22aa96c5f1f0c45dc3127d.tar.gz
regcomp.c: Don't restart parse for /d to /u if no need to
This commit keeps track of if there are any operations encountered which differ under /d from /u. If we switch to /u and haven't so far found anything which differs, there's no need to reparse
-rw-r--r--regcomp.c28
1 files changed, 23 insertions, 5 deletions
diff --git a/regcomp.c b/regcomp.c
index 1c3664b743..cc64902938 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -219,6 +219,7 @@ struct RExC_state_t {
#define RExC_mysv2 (pRExC_state->mysv2)
#endif
+ bool seen_d_op;
bool strict;
bool study_started;
bool in_script_run;
@@ -239,6 +240,8 @@ struct RExC_state_t {
#define RExC_parse (pRExC_state->parse)
#define RExC_latest_warn_offset (pRExC_state->latest_warn_offset )
#define RExC_whilem_seen (pRExC_state->whilem_seen)
+#define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
+ under /d from /u ? */
#ifdef RE_TRACK_PATTERN_OFFSETS
@@ -349,9 +352,11 @@ struct RExC_state_t {
if (DEPENDS_SEMANTICS) { \
set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET); \
RExC_uni_semantics = 1; \
- if (LIKELY(RExC_total_parens >= 0)) { \
- /* No need to restart the parse immediately if we're \
- * going to reparse anyway to count parens */ \
+ if (RExC_seen_d_op && LIKELY(RExC_total_parens >= 0)) { \
+ /* No need to restart the parse if we haven't seen \
+ * anything that differs between /u and /d, and no need \
+ * to restart immediately if we're going to reparse \
+ * anyway to count parens */ \
*flagp |= RESTART_PARSE; \
return restart_retval; \
} \
@@ -7287,6 +7292,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RExC_close_parens = NULL;
RExC_paren_names = NULL;
RExC_size = 0;
+ RExC_seen_d_op = FALSE;
#ifdef DEBUGGING
RExC_paren_name_list = NULL;
#endif
@@ -13276,7 +13282,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
RExC_seen |= REG_LOOKBEHIND_SEEN;
op = BOUND + charset;
- if (op == BOUNDL) {
+ if (op == BOUND) {
+ RExC_seen_d_op = TRUE;
+ }
+ else if (op == BOUNDL) {
RExC_contains_locale = 1;
}
@@ -13425,6 +13434,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
else if (op == POSIXL) {
RExC_contains_locale = 1;
}
+ else if (op == POSIXD) {
+ RExC_seen_d_op = TRUE;
+ }
join_posix_op_known:
@@ -13643,6 +13655,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
? REFFL
: REFF),
num);
+ if (OP(REGNODE_p(ret)) == REFF) {
+ RExC_seen_d_op = TRUE;
+ }
*flagp |= HASWIDTH;
/* override incorrect value set in reganode MJD */
@@ -14450,6 +14465,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
OP(REGNODE_p(ret)) = EXACTFLU8;
}
}
+ else if (node_type == EXACTF) {
+ RExC_seen_d_op = TRUE;
+ }
}
alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender,
@@ -18076,10 +18094,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
|| (anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)))
{
use_anyofd = TRUE;
+ RExC_seen_d_op = TRUE;
optimizable = FALSE;
}
-
/* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
* at compile time. Besides not inverting folded locale now, we can't
* invert if there are things such as \w, which aren't known until runtime