summaryrefslogtreecommitdiff
path: root/toke.c
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2012-08-19 02:45:38 -0700
committerFather Chrysostomos <sprout@cpan.org>2012-08-21 14:10:59 -0700
commit5097bf9b8df114433b321066b622851359bb857e (patch)
treeda5d2f596265c21542f96577b3d4719d337e9af6 /toke.c
parent5af08aedbe30651caf3374bc93f1aa7385b9531f (diff)
downloadperl-5097bf9b8df114433b321066b622851359bb857e.tar.gz
[perl #114040] Parse here-docs correctly in quoted constructs
When parsing code outside a string eval or quoted construct, the lexer reads one line at a time into PL_linestr. To parse a here-doc (hereinafter ‘deer hock’, because I spike lunar- isms), the lexer has to pull extra lines out of the input stream ahead of the current line, the value of PL_linestr remaining the same. In a string eval, the entire piece of code being parsed is in PL_linestr. To parse a deer hock inside a string eval, the lexer has to fiddle with the contents of PL_linestr, scanning for newline characters. Originally, S_scan_heredoc just followed those two approaches. When the lexer encounters a quoted construct, it looks for the end- ing delimiter (reading from the input stream if necessary), puts the entire quoted thing (minus quotes) in PL_linestr, and then starts an inner lexing scope. This means that deer hocks would not nest properly outside of a string eval, because the body of the inner deer hock would be pulled out of the input stream *after* the outer deer hock. Larry Wall fixed that in commit fd2d095329 (Jan. 1997), so that this would work: <<foo ${\<<bar} ber bar foo He did so by following the string eval approach (looking for the deer hock body in PL_linestr) if the deer hock was inside another quoted construct. Later, commit a2c066523a (Mar. 1998) fixed this: s/^not /substr(<<EOF, 0, 0)/e; Ignored EOF by following the string eval approach only if the deer hock was inside another non-backtick deer hock, not just any quoted construct. The problem with the string eval approach inside a substitu- tion is that it only looks in PL_linestr, which only contains ‘substr(<<EOF, 0, 0)’ when the lexer is handling the second part of the s/// operator. But that unfortunately broke this: s/^not /substr(<<EOF, 0, 0) Ignored EOF /e; and this: print <<`EOF`; ${\<<EOG} echo stuff EOG EOF reverting it to the pre-fd2d095329 behaviour, because the outer quoted construct was treated as one line. Later on, commit 0244c3a403 (Mar. 1999) fixed this: eval 's/.../<<FOO/e stuff FOO '; which required a new approach not used before. When the replacement part of the s/// is being parsed, PL_linestr contains ‘<<FOO’. The body of the deer hock is not in the input stream (there isn’t one), but in what was the previous value of PL_linestr before the lexer encountered s///. So 0244c3a403 fixed that by recording pointers into the outer string and using them in S_scan_heredoc. That commit, for some reason, was written such that it applied only to substitutions, and not to other quoted constructs. It also failed to take interpolation into account, and did not record the outer buffer position, but then tried to use it anyway, resulting in crashes in both these cases: eval 's/${ <<END }//'; eval 's//${ <<END }//'; It also failed to take multiline s///’s into account, resulting in neither of these working, because it lost track of the current cursor, leaving it at 'D' instead of the line break following it: eval ' s//<<END /e; blah blah blah END ;1' or die $@; eval ' s//<<END blah blah blah END /e; ;1' or die $@; S_scan_heredoc currently positions the cursor (s) at the last charac- ter of <<END if there is a line break on the same line. There is an s++ later on to account, but the code added by 0244c3a403 bypassed it. So, in the end, deer hocks could only be nested in other quoted con- structs if the outer construct was in a string eval and was not s///, or was a non-backtick deer hock. This commit hopefully fixes most of the problems. :-) The s///-in-eval case is a little tricky. We have to see whether the deer hock label is on the last line of the s///. If it is, we have to peek into the outer buffer. Otherwise, we have to treat it like a string eval. This commit does not deal with <<END inside the pattern of a multi- line s/// or in nested quotes.
Diffstat (limited to 'toke.c')
-rw-r--r--toke.c71
1 files changed, 47 insertions, 24 deletions
diff --git a/toke.c b/toke.c
index 33a560c256..fc2635b481 100644
--- a/toke.c
+++ b/toke.c
@@ -2386,6 +2386,8 @@ S_sublex_start(pTHX)
dVAR;
const I32 op_type = pl_yylval.ival;
+ PL_sublex_info.super_bufptr = PL_bufptr;
+ PL_sublex_info.super_bufend = PL_bufend;
if (op_type == OP_NULL) {
pl_yylval.opval = PL_lex_op;
PL_lex_op = NULL;
@@ -9315,8 +9317,6 @@ S_scan_subst(pTHX_ char *start)
if (es) {
SV * const repl = newSVpvs("");
- PL_sublex_info.super_bufptr = s;
- PL_sublex_info.super_bufend = PL_bufend;
PL_multi_end = 0;
pm->op_pmflags |= PMf_EVAL;
while (es-- > 0) {
@@ -9434,6 +9434,32 @@ S_scan_trans(pTHX_ char *start)
return s;
}
+/* scan_heredoc
+ Takes a pointer to the first < in <<FOO.
+ Returns a pointer to the byte following <<FOO.
+
+ This function scans a heredoc, which involves different methods
+ depending on whether we are in a string eval, quoted construct, etc.
+ This is because PL_linestr could containing a single line of input, or
+ a whole string being evalled, or the contents of the current quote-
+ like operator.
+
+ The three methods are:
+ - Steal lines from the input stream (stream)
+ - Scan the heredoc in PL_linestr and remove it therefrom (linestr)
+ - Peek at the PL_linestr of the outer lexing scope (peek)
+
+ They are used in these cases:
+ file scope or filtered eval stream
+ string eval linestr
+ multiline quoted construct linestr
+ single-line quoted construct in file stream
+ single-line quoted construct in eval peek
+
+ Single-line also applies to heredocs that begin on the last line of a
+ quote-like operator.
+*/
+
STATIC char *
S_scan_heredoc(pTHX_ register char *s)
{
@@ -9443,13 +9469,11 @@ S_scan_heredoc(pTHX_ register char *s)
I32 len;
SV *tmpstr;
char term;
- const char *found_newline;
+ const char *found_newline = 0;
char *d;
char *e;
char *peek;
- char *origd;
- const int outer = (PL_rsfp || PL_parser->filtered)
- && !(PL_lex_inwhat == OP_SCALAR);
+ const bool infile = PL_rsfp || PL_parser->filtered;
#ifdef PERL_MAD
I32 stuffstart = s - SvPVX(PL_linestr);
char *tstart;
@@ -9460,10 +9484,9 @@ S_scan_heredoc(pTHX_ register char *s)
PERL_ARGS_ASSERT_SCAN_HEREDOC;
s += 2;
- d = origd = PL_tokenbuf;
+ d = PL_tokenbuf + 1;
e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
- if (!outer)
- *d++ = '\n', ++origd;
+ *PL_tokenbuf = '\n';
peek = s;
while (SPACE_OR_TAB(*peek))
peek++;
@@ -9496,8 +9519,8 @@ S_scan_heredoc(pTHX_ register char *s)
#ifdef PERL_MAD
if (PL_madskills) {
- tstart = PL_tokenbuf + !outer;
- PL_thisclose = newSVpvn(tstart, len - !outer);
+ tstart = PL_tokenbuf + 1;
+ PL_thisclose = newSVpvn(tstart, len - 1);
tstart = SvPVX(PL_linestr) + stuffstart;
PL_thisopen = newSVpvn(tstart, s - tstart);
stuffstart = s - SvPVX(PL_linestr);
@@ -9527,10 +9550,8 @@ S_scan_heredoc(pTHX_ register char *s)
s = olds;
}
#endif
-#ifdef PERL_MAD
- found_newline = 0;
-#endif
- if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
+ if ((infile && !PL_lex_inwhat)
+ || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s))) {
herewas = newSVpvn(s,PL_bufend-s);
}
else {
@@ -9573,12 +9594,11 @@ S_scan_heredoc(pTHX_ register char *s)
CLINE;
PL_multi_start = CopLINE(PL_curcop);
PL_multi_open = PL_multi_close = '<';
- term = *PL_tokenbuf;
- if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
- && !PL_parser->filtered) {
+ if (!infile && PL_lex_inwhat && !found_newline) {
char * const bufptr = PL_sublex_info.super_bufptr;
char * const bufend = PL_sublex_info.super_bufend;
char * const olds = s - SvCUR(herewas);
+ term = *PL_tokenbuf;
s = strchr(bufptr, '\n');
if (!s)
s = bufend;
@@ -9590,7 +9610,7 @@ S_scan_heredoc(pTHX_ register char *s)
}
if (s >= bufend) {
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- missingterm(origd);
+ missingterm(PL_tokenbuf + 1);
}
sv_setpvn(herewas,bufptr,d-bufptr+1);
sv_setpvn(tmpstr,d+1,s-d);
@@ -9601,7 +9621,8 @@ S_scan_heredoc(pTHX_ register char *s)
s = olds;
goto retval;
}
- else if (!outer) {
+ else if (!infile || found_newline) {
+ term = *PL_tokenbuf;
d = s;
while (s < PL_bufend &&
(*s != term || memNE(s,PL_tokenbuf,len)) ) {
@@ -9610,7 +9631,7 @@ S_scan_heredoc(pTHX_ register char *s)
}
if (s >= PL_bufend) {
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- missingterm(origd);
+ missingterm(PL_tokenbuf + 1);
}
sv_setpvn(tmpstr,d+1,s-d);
#ifdef PERL_MAD
@@ -9633,6 +9654,8 @@ S_scan_heredoc(pTHX_ register char *s)
}
else
sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
+ term = PL_tokenbuf[1];
+ len--;
while (s >= PL_bufend) { /* multiple line string? */
#ifdef PERL_MAD
if (PL_madskills) {
@@ -9645,9 +9668,9 @@ S_scan_heredoc(pTHX_ register char *s)
#endif
PL_bufptr = s;
CopLINE_inc(PL_curcop);
- if (!outer || !lex_next_chunk(0)) {
+ if (!lex_next_chunk(0)) {
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
- missingterm(PL_tokenbuf);
+ missingterm(PL_tokenbuf + 1);
}
CopLINE_dec(PL_curcop);
s = PL_bufptr;
@@ -9672,7 +9695,7 @@ S_scan_heredoc(pTHX_ register char *s)
else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
PL_bufend[-1] = '\n';
#endif
- if (*s == term && memEQ(s,PL_tokenbuf,len)) {
+ if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
*(SvPVX(PL_linestr) + off ) = ' ';
lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);