diff options
author | Karl Williamson <khw@cpan.org> | 2018-12-16 12:38:28 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-12-16 12:44:54 -0700 |
commit | 44abdc3a8a76a7ca901b7f513b33f72cc9f21495 (patch) | |
tree | 74afe8d43a301dc70b2ff265fdbad460164bd813 /regcomp.c | |
parent | 1c484ed4bdbb071c7ad4fc9a259c1aa027d27719 (diff) | |
download | perl-44abdc3a8a76a7ca901b7f513b33f72cc9f21495.tar.gz |
regcomp.c: Tighten embedded patterns in regex sets
In the (?[ ... ]) regex sets features, one can embed another compiled
regex set pattern. Such compiled patterns always have a flag of '^',
which we weren't looking for prior to this commit. That meant that
uncompiled patterns would be mistaken for compiled ones.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 19 |
1 files changed, 8 insertions, 11 deletions
@@ -15822,10 +15822,11 @@ redo_curchar: case '(': - if ( RExC_parse < RExC_end - 1 - && (UCHARAT(RExC_parse + 1) == '?')) + if ( RExC_parse < RExC_end - 2 + && UCHARAT(RExC_parse + 1) == '?' + && UCHARAT(RExC_parse + 2) == '^') { - /* If is a '(?', could be an embedded '(?flags:(?[...])'. + /* If is a '(?', could be an embedded '(?^flags:(?[...])'. * This happens when we have some thing like * * my $thai_or_lao = qr/(?[ \p{Thai} + \p{Lao} ])/; @@ -15843,14 +15844,11 @@ redo_curchar: RExC_parse += 2; /* Skip past the '(?' */ save_parse = RExC_parse; - /* Parse any flags for the '(?' */ + /* Parse the flags for the '(?'. We already know the first + * flag to parse is a '^' */ parse_lparen_question_flags(pRExC_state); - if (RExC_parse == save_parse /* Makes sure there was at - least one flag (or else - this embedding wasn't - compiled) */ - || RExC_parse >= RExC_end - 4 + if ( RExC_parse >= RExC_end - 4 || UCHARAT(RExC_parse) != ':' || UCHARAT(++RExC_parse) != '(' || UCHARAT(++RExC_parse) != '?' @@ -15859,8 +15857,7 @@ redo_curchar: /* In combination with the above, this moves the * pointer to the point just after the first erroneous - * character (or if there are no flags, to where they - * should have been) */ + * character. */ if (RExC_parse >= RExC_end - 4) { RExC_parse = RExC_end; } |