summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-12-16 12:38:28 -0700
committerKarl Williamson <khw@cpan.org>2018-12-16 12:44:54 -0700
commit44abdc3a8a76a7ca901b7f513b33f72cc9f21495 (patch)
tree74afe8d43a301dc70b2ff265fdbad460164bd813 /regcomp.c
parent1c484ed4bdbb071c7ad4fc9a259c1aa027d27719 (diff)
downloadperl-44abdc3a8a76a7ca901b7f513b33f72cc9f21495.tar.gz
regcomp.c: Tighten embedded patterns in regex sets
In the (?[ ... ]) regex sets features, one can embed another compiled regex set pattern. Such compiled patterns always have a flag of '^', which we weren't looking for prior to this commit. That meant that uncompiled patterns would be mistaken for compiled ones.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c19
1 files changed, 8 insertions, 11 deletions
diff --git a/regcomp.c b/regcomp.c
index 0fc793626f..83e7029d50 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -15822,10 +15822,11 @@ redo_curchar:
case '(':
- if ( RExC_parse < RExC_end - 1
- && (UCHARAT(RExC_parse + 1) == '?'))
+ if ( RExC_parse < RExC_end - 2
+ && UCHARAT(RExC_parse + 1) == '?'
+ && UCHARAT(RExC_parse + 2) == '^')
{
- /* If is a '(?', could be an embedded '(?flags:(?[...])'.
+ /* If is a '(?', could be an embedded '(?^flags:(?[...])'.
* This happens when we have some thing like
*
* my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/;
@@ -15843,14 +15844,11 @@ redo_curchar:
RExC_parse += 2; /* Skip past the '(?' */
save_parse = RExC_parse;
- /* Parse any flags for the '(?' */
+ /* Parse the flags for the '(?'. We already know the first
+ * flag to parse is a '^' */
parse_lparen_question_flags(pRExC_state);
- if (RExC_parse == save_parse /* Makes sure there was at
- least one flag (or else
- this embedding wasn't
- compiled) */
- || RExC_parse >= RExC_end - 4
+ if ( RExC_parse >= RExC_end - 4
|| UCHARAT(RExC_parse) != ':'
|| UCHARAT(++RExC_parse) != '('
|| UCHARAT(++RExC_parse) != '?'
@@ -15859,8 +15857,7 @@ redo_curchar:
/* In combination with the above, this moves the
* pointer to the point just after the first erroneous
- * character (or if there are no flags, to where they
- * should have been) */
+ * character. */
if (RExC_parse >= RExC_end - 4) {
RExC_parse = RExC_end;
}