diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-04-23 13:39:35 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-05-02 13:39:00 -0600 |
commit | 000947ada5b027f394ee63b5166df8c06b64a74e (patch) | |
tree | e6b3d6d7078fea713cdde8a314b7ea3adbb25298 /regcomp.c | |
parent | eb5682978c85adb0d49e6aef177310cfe054d2aa (diff) | |
download | perl-000947ada5b027f394ee63b5166df8c06b64a74e.tar.gz |
Deprecate spaces/comments in some regex tokens
This commit deprecates having space/comments between the first two
characters of regular expression forms '(*VERB:ARG)' and '(?...)'.
That is, the '(' should be immediately be followed by the '*' or '?'.
Previously, things like:
qr/((?# This is a comment in the middle of a token)?:foo)/
were silently accepted.
The problem is that during regex parsing, the '(' is seen, and the input
pointer advanced, skipping comments and, under /x, white space, without
regard to whether the left parenthesis is part of a bigger token or not.
S_reg() handles the parsing of what comes next in the input, and it
just assumes that the first character it sees is the one that
immediately followed the input parenthesis.
Since the parenthesis may or may not be a part of a bigger token, and
the current structure of handling things, I don't see an elegant way to
fix this. What I did is flag the single call to S_reg() where this
could be an issue, and have S_reg check for for adjacency if the
parenthesis is part of a bigger token, and if so, warn if not-adjacent.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 23 |
1 files changed, 20 insertions, 3 deletions
@@ -8601,7 +8601,10 @@ S_parse_lparen_question_flags(pTHX_ struct RExC_state_t *pRExC_state) cannot happen. */ STATIC regnode * S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) - /* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */ + /* paren: Parenthesized? 0=top; 1,2=inside '(': changed to letter. + * 2 is like 1, but indicates that nextchar() has been called to advance + * RExC_parse beyond the '('. Things like '(?' are indivisible tokens, and + * this flag alerts us to the need to check for that */ { dVAR; regnode *ret; /* Will be the head of the group. */ @@ -8629,6 +8632,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) /* Make an OPEN node, if parenthesized. */ if (paren) { + + /* Under /x, space and comments can be gobbled up between the '(' and + * here (if paren ==2). The forms '(*VERB' and '(?...' disallow such + * intervening space, as the sequence is a token, and a token should be + * indivisible */ + bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '('; + if ( *RExC_parse == '*') { /* (*VERB:ARG) */ char *start_verb = RExC_parse; STRLEN verb_len = 0; @@ -8636,6 +8646,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) unsigned char op = 0; int argok = 1; int internal_argval = 0; /* internal_argval is only useful if !argok */ + + if (has_intervening_patws && SIZE_ONLY) { + ckWARNregdep(RExC_parse + 1, "In '(*VERB...)', splitting the initial '(*' is deprecated"); + } while ( *RExC_parse && *RExC_parse != ')' ) { if ( *RExC_parse == ':' ) { start_arg = RExC_parse + 1; @@ -8737,6 +8751,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if (*RExC_parse == '?') { /* (?...) */ bool is_logical = 0; const char * const seqstart = RExC_parse; + if (has_intervening_patws && SIZE_ONLY) { + ckWARNregdep(RExC_parse + 1, "In '(?...)', splitting the initial '(?' is deprecated"); + } RExC_parse++; paren = *RExC_parse++; @@ -9322,7 +9339,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) case ':': ender = reg_node(pRExC_state, TAIL); break; - case 1: + case 1: case 2: ender = reganode(pRExC_state, CLOSE, parno); if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) { DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log, @@ -10312,7 +10329,7 @@ tryagain: } case '(': nextchar(pRExC_state); - ret = reg(pRExC_state, 1, &flags,depth+1); + ret = reg(pRExC_state, 2, &flags,depth+1); if (ret == NULL) { if (flags & TRYAGAIN) { if (RExC_parse == RExC_end) { |