summaryrefslogtreecommitdiff
path: root/toke.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2016-07-09 10:41:08 +0100
committerDavid Mitchell <davem@iabyn.com>2016-08-03 20:54:40 +0100
commit4fa06845e75d453a3101cff32e24c5b743f9819e (patch)
tree71f5473b348e99044ad80eab8a2416a3c8f9a177 /toke.c
parent6cb4123eb32087e8546f1056ca7b4e761c28d9b7 (diff)
downloadperl-4fa06845e75d453a3101cff32e24c5b743f9819e.tar.gz
add OP_ARGELEM, OP_ARGDEFELEM, OP_ARGCHECK ops
Currently subroutine signature parsing emits many small discrete ops to implement arg handling. This commit replaces them with a couple of ops per signature element, plus an initial signature check op. These new ops are added to the OP tree during parsing, so will be visible to hooks called up to and including peephole optimisation. It is intended soon that the peephole optimiser will take these per-element ops, and replace them with a single OP_SIGNATURE op which handles the whole signature in a single go. So normally these ops wont actually get executed much. But adding these intermediate-level ops gives three advantages: 1) it allows the parser to efficiently generate subtrees containing individual signature elements, which can't be done if only OP_SIGNATURE or discrete ops are available; 2) prior to optimisation, it provides a simple and straightforward representation of the signature; 3) hooks can mess with the signature OP subtree in ways that make it no longer possible to optimise into an OP_SIGNATURE, but which can still be executed, deparsed etc (if less efficiently). This code: use feature "signatures"; sub f($a, $, $b = 1, @c) {$a} under 'perl -MO=Concise,f' now gives: d <1> leavesub[1 ref] K/REFC,1 ->(end) - <@> lineseq KP ->d 1 <;> nextstate(main 84 foo:6) v:%,469762048 ->2 2 <+> argcheck(3,1,@) v ->3 3 <;> nextstate(main 81 foo:6) v:%,469762048 ->4 4 <+> argelem(0)[$a:81,84] v/SV ->5 5 <;> nextstate(main 82 foo:6) v:%,469762048 ->6 8 <+> argelem(2)[$b:82,84] vKS/SV ->9 6 <|> argdefelem(other->7)[2] sK ->8 7 <$> const(IV 1) s ->8 9 <;> nextstate(main 83 foo:6) v:%,469762048 ->a a <+> argelem(3)[@c:83,84] v/AV ->b - <;> ex-nextstate(main 84 foo:6) v:%,469762048 ->b b <;> nextstate(main 84 foo:6) v:%,469762048 ->c c <0> padsv[$a:81,84] s ->d The argcheck(3,1,@) op knows the number of positional params (3), the number of optional params (1), and whether it has an array / hash slurpy element at the end. This op is responsible for checking that @_ contains the right number of args. A simple argelem(0)[$a] op does the equivalent of 'my $a = $_[0]'. Similarly, argelem(3)[@c] is equivalent to 'my @c = @_[3..$#_]'. If it has a child, it gets its arg from the stack rather than using $_[N]. Currently the only used child is the logop argdefelem. argdefelem(other->7)[2] is equivalent to '@_ > 2 ? $_[2] : other'. [ These ops currently assume that the lexical var being introduced is undef/empty and non-magival etc. This is an incorrect assumption and is fixed in a few commits' time ]
Diffstat (limited to 'toke.c')
-rw-r--r--toke.c38
1 files changed, 29 insertions, 9 deletions
diff --git a/toke.c b/toke.c
index 09d15a950d..74313dc459 100644
--- a/toke.c
+++ b/toke.c
@@ -4545,7 +4545,8 @@ Perl_yylex(pTHX)
PL_lex_allbrackets--;
next_type &= 0xffff;
}
- return REPORT(next_type == 'p' ? pending_ident() : next_type);
+ return REPORT(next_type == 'p' ? pending_ident(0)
+ : next_type == 'P' ? pending_ident(1) : next_type);
}
}
@@ -4837,16 +4838,14 @@ Perl_yylex(pTHX)
s = skipspace(s);
if (isIDFIRST_lazy_if(s, UTF)) {
char *dest = PL_tokenbuf + 1;
- /* on next call to yylex this causes pending_ident()
- * to allocmy() etc */
- PL_in_my = KEY_my;
/* read var name, including sigil, into PL_tokenbuf */
PL_tokenbuf[0] = sigil;
parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
0, cBOOL(UTF), FALSE);
*dest = '\0';
assert(PL_tokenbuf[1]); /* we have a variable name */
- force_ident_maybe_lex(sigil);
+ NEXTVAL_NEXTTOKE.ival = sigil;
+ force_next('P'); /* force a signature pending identifier */
}
PL_expect = XOPERATOR;
break;
@@ -8535,6 +8534,9 @@ Perl_yylex(pTHX)
Looks up an identifier in the pad or in a package
+ is_sig indicates that this is a subroutine signature variable
+ rather than a plain pad var.
+
Returns:
PRIVATEREF if this is a lexical name.
BAREWORD if this belongs to a package.
@@ -8551,7 +8553,7 @@ Perl_yylex(pTHX)
*/
static int
-S_pending_ident(pTHX)
+S_pending_ident(pTHX_ bool is_sig)
{
PADOFFSET tmp = 0;
const char pit = (char)pl_yylval.ival;
@@ -8568,7 +8570,7 @@ S_pending_ident(pTHX)
if it's a legal name, the OP is a PADANY.
*/
- if (PL_in_my) {
+ if (is_sig || PL_in_my) {
if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
if (has_colon)
yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
@@ -8577,6 +8579,7 @@ S_pending_ident(pTHX)
tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
}
else {
+ OP *o;
if (has_colon) {
/* "my" variable %s can't be in a package */
/* PL_no_myglob is constant */
@@ -8589,9 +8592,26 @@ S_pending_ident(pTHX)
GCC_DIAG_RESTORE;
}
- pl_yylval.opval = newOP(OP_PADANY, 0);
- pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
+ if (is_sig) {
+ /* A signature 'padop' needs in addition, an op_first to
+ * point to a child sigdefelem, and an extra field to hold
+ * the signature index. We can achieve both by using an
+ * UNOP_AUX and (ab)using the op_aux field to hold the
+ * index. If we ever need more fields, use a real malloced
+ * aux strut instead.
+ */
+ o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
+ INT2PTR(UNOP_AUX_item *,
+ (UV)(PL_parser->sig_elems)));
+ o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
+ : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
+ : OPpARGELEM_HV);
+ }
+ else
+ o = newOP(OP_PADANY, 0);
+ o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
UTF ? SVf_UTF8 : 0);
+ pl_yylval.opval = o;
return PRIVATEREF;
}
}