diff options
author | Zefram <zefram@fysh.org> | 2009-11-08 15:03:45 +0100 |
---|---|---|
committer | Rafael Garcia-Suarez <rgs@consttype.org> | 2009-11-08 15:11:11 +0100 |
commit | f7461760003db2ce68155c97ea6c1658e96fcd27 (patch) | |
tree | 3ef60fff1e3763bb814eb07f408a34688546c026 | |
parent | 6bd7445c6d3b53823883c456e32ea27ce24bfc5c (diff) | |
download | perl-f7461760003db2ce68155c97ea6c1658e96fcd27.tar.gz |
Bareword sub lookups
Attached is a patch that changes how the tokeniser looks up subroutines,
when they're referenced by a bareword, for prototype and const-sub
purposes. Formerly, it has looked up bareword subs directly in the
package, which is contrary to the way the generated op tree looks up
the sub, via an rv2cv op. The patch makes the tokeniser generate the
rv2cv op earlier, and dig around in that.
The motivation for this is to allow modules to hook the rv2cv op
creation, to affect the name->subroutine lookup process. Currently,
such hooking affects op execution as intended, but everything goes wrong
with a bareword ref where the tokeniser looks at some unrelated CV,
or a blank space, in the package. With the patch in place, an rv2cv
hook correctly affects the tokeniser and therefore the prototype-based
aspects of parsing.
The patch also changes ck_subr (which applies the argument context and
checking parts of prototype behaviour) to handle subs referenced by an
RV const op inside the rv2cv, where formerly it would only handle a gv
op inside the rv2cv. This is to support the most likely kind of
modified rv2cv op.
The attached patch is the resulting revised version of the bareword
sub patch. It incorporates the original patch (allowing rv2cv op
hookers to control prototype processing), the GV-downgrading addition,
and a mention in perldelta.
-rw-r--r-- | embed.fnc | 1 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | global.sym | 1 | ||||
-rw-r--r-- | gv.c | 47 | ||||
-rw-r--r-- | op.c | 48 | ||||
-rw-r--r-- | pod/perl5112delta.pod | 10 | ||||
-rw-r--r-- | proto.h | 5 | ||||
-rw-r--r-- | toke.c | 71 |
8 files changed, 141 insertions, 44 deletions
@@ -365,6 +365,7 @@ Ap |void |gv_fullname4 |NN SV* sv|NN const GV* gv|NULLOK const char* prefix|bool pMox |GP * |newGP |NN GV *const gv Ap |void |gv_init |NN GV* gv|NULLOK HV* stash|NN const char* name|STRLEN len|int multi Ap |void |gv_name_set |NN GV* gv|NN const char *name|U32 len|U32 flags +Apd |void |gv_try_downgrade|NN GV* gv Apd |HV* |gv_stashpv |NN const char* name|I32 flags Apd |HV* |gv_stashpvn |NN const char* name|U32 namelen|I32 flags Apd |HV* |gv_stashsv |NN SV* sv|I32 flags @@ -291,6 +291,7 @@ #define gv_fullname4 Perl_gv_fullname4 #define gv_init Perl_gv_init #define gv_name_set Perl_gv_name_set +#define gv_try_downgrade Perl_gv_try_downgrade #define gv_stashpv Perl_gv_stashpv #define gv_stashpvn Perl_gv_stashpvn #define gv_stashsv Perl_gv_stashsv @@ -2654,6 +2655,7 @@ #endif #define gv_init(a,b,c,d,e) Perl_gv_init(aTHX_ a,b,c,d,e) #define gv_name_set(a,b,c,d) Perl_gv_name_set(aTHX_ a,b,c,d) +#define gv_try_downgrade(a) Perl_gv_try_downgrade(aTHX_ a) #define gv_stashpv(a,b) Perl_gv_stashpv(aTHX_ a,b) #define gv_stashpvn(a,b,c) Perl_gv_stashpvn(aTHX_ a,b,c) #define gv_stashsv(a,b) Perl_gv_stashsv(aTHX_ a,b) diff --git a/global.sym b/global.sym index 492801792c..6000af7559 100644 --- a/global.sym +++ b/global.sym @@ -148,6 +148,7 @@ Perl_gv_fullname3 Perl_gv_fullname4 Perl_gv_init Perl_gv_name_set +Perl_gv_try_downgrade Perl_gv_stashpv Perl_gv_stashpvn Perl_gv_stashsv @@ -2372,6 +2372,53 @@ Perl_gv_name_set(pTHX_ GV *gv, const char *name, U32 len, U32 flags) } /* +=for apidoc gv_try_downgrade + +If C<gv> is a typeglob containing only a constant sub, and is only +referenced from its package, and both the typeglob and the sub are +sufficiently ordinary, replace the typeglob (in the package) with a +placeholder that more compactly represents the same thing. This is meant +to be used when a placeholder has been upgraded, most likely because +something wanted to look at a proper code object, and it has turned out +to be a constant sub to which a proper reference is no longer required. + +=cut +*/ + +void +Perl_gv_try_downgrade(pTHX_ GV *gv) +{ + HV *stash; + CV *cv; + HEK *namehek; + SV **gvp; + PERL_ARGS_ASSERT_GV_TRY_DOWNGRADE; + if (SvREFCNT(gv) == 1 && SvTYPE(gv) == SVt_PVGV && !SvFAKE(gv) && + !SvOBJECT(gv) && !SvMAGICAL(gv) && !SvREADONLY(gv) && + isGV_with_GP(gv) && GvGP(gv) && + GvMULTI(gv) && !GvINTRO(gv) && GvREFCNT(gv) == 1 && + !GvSV(gv) && !GvAV(gv) && !GvHV(gv) && !GvIOp(gv) && !GvFORM(gv) && + GvEGV(gv) == gv && (stash = GvSTASH(gv)) && (cv = GvCV(gv)) && + !SvOBJECT(cv) && !SvMAGICAL(cv) && !SvREADONLY(cv) && + CvSTASH(cv) == stash && CvGV(cv) == gv && + CvCONST(cv) && !CvMETHOD(cv) && !CvLVALUE(cv) && !CvUNIQUE(cv) && + !CvNODEBUG(cv) && !CvCLONE(cv) && !CvCLONED(cv) && !CvANON(cv) && + (namehek = GvNAME_HEK(gv)) && + (gvp = hv_fetch(stash, HEK_KEY(namehek), + HEK_LEN(namehek)*(HEK_UTF8(namehek) ? -1 : 1), 0)) && + *gvp == (SV*)gv) { + SV *value = SvREFCNT_inc(CvXSUBANY(cv).any_ptr); + SvREFCNT(gv) = 0; + sv_clear((SV*)gv); + SvREFCNT(gv) = 1; + SvFLAGS(gv) = SVt_IV|SVf_ROK; + SvANY(gv) = (XPVGV*)((char*)&(gv->sv_u.svu_iv) - + STRUCT_OFFSET(XPVIV, xiv_iv)); + SvRV_set(gv, value); + } +} + +/* * Local variables: * c-indentation-style: bsd * c-basic-offset: 4 @@ -570,6 +570,13 @@ Perl_op_clear(pTHX_ OP *o) case OP_AELEMFAST: if (! (o->op_type == OP_AELEMFAST && o->op_flags & OPf_SPECIAL)) { /* not an OP_PADAV replacement */ + GV *gv = (o->op_type == OP_GV || o->op_type == OP_GVSV) +#ifdef USE_ITHREADS + && PL_curpad +#endif + ? cGVOPo_gv : NULL; + if (gv) + SvREFCNT_inc(gv); #ifdef USE_ITHREADS if (cPADOPo->op_padix > 0) { /* No GvIN_PAD_off(cGVOPo_gv) here, because other references @@ -581,6 +588,12 @@ Perl_op_clear(pTHX_ OP *o) SvREFCNT_dec(cSVOPo->op_sv); cSVOPo->op_sv = NULL; #endif + if (gv) { + int try_downgrade = SvREFCNT(gv) == 2; + SvREFCNT_dec(gv); + if (try_downgrade) + gv_try_downgrade(gv); + } } break; case OP_METHOD_NAMED: @@ -7945,22 +7958,29 @@ Perl_ck_subr(pTHX_ OP *o) o->op_private |= OPpENTERSUB_HASTARG; for (cvop = o2; cvop->op_sibling; cvop = cvop->op_sibling) ; if (cvop->op_type == OP_RV2CV) { - SVOP* tmpop; o->op_private |= (cvop->op_private & OPpENTERSUB_AMPER); op_null(cvop); /* disable rv2cv */ - tmpop = (SVOP*)((UNOP*)cvop)->op_first; - if (tmpop->op_type == OP_GV && !(o->op_private & OPpENTERSUB_AMPER)) { - GV *gv = cGVOPx_gv(tmpop); - cv = GvCVu(gv); - if (!cv) - tmpop->op_private |= OPpEARLY_CV; - else { - if (SvPOK(cv)) { - STRLEN len; - namegv = CvANON(cv) ? gv : CvGV(cv); - proto = SvPV(MUTABLE_SV(cv), len); - proto_end = proto + len; - } + if (!(o->op_private & OPpENTERSUB_AMPER)) { + SVOP *tmpop = (SVOP*)((UNOP*)cvop)->op_first; + GV *gv = NULL; + switch (tmpop->op_type) { + case OP_GV: { + gv = cGVOPx_gv(tmpop); + cv = GvCVu(gv); + if (!cv) + tmpop->op_private |= OPpEARLY_CV; + } break; + case OP_CONST: { + SV *sv = cSVOPx_sv(tmpop); + if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV) + cv = (CV*)SvRV(sv); + } break; + } + if (cv && SvPOK(cv)) { + STRLEN len; + namegv = gv && CvANON(cv) ? gv : CvGV(cv); + proto = SvPV(MUTABLE_SV(cv), len); + proto_end = proto + len; } } } diff --git a/pod/perl5112delta.pod b/pod/perl5112delta.pod index ca8c8096ff..d6dad5dcb6 100644 --- a/pod/perl5112delta.pod +++ b/pod/perl5112delta.pod @@ -74,6 +74,16 @@ implements reverse Polish notation arithmetic via pluggable keywords. This module is mainly used for test purposes, and is not normally installed, but also serves as an example of how to use the new mechanism. +=head2 Overridable function lookup + +Where an extension module hooks the creation of rv2cv ops, to modify +the subroutine lookup process, this now works correctly for bareword +subroutine calls. This means that prototypes on subroutines referenced +this way will be processed correctly. (Previously bareword subroutine +names were initially looked up, for parsing purposes, by an unhookable +mechanism, so extensions could only properly influence subroutine names +that appeared with an C<&> sigil.) + =head1 New Platforms XXX List any platforms that this version of perl compiles on, that previous @@ -943,6 +943,11 @@ PERL_CALLCONV void Perl_gv_name_set(pTHX_ GV* gv, const char *name, U32 len, U32 #define PERL_ARGS_ASSERT_GV_NAME_SET \ assert(gv); assert(name) +PERL_CALLCONV void Perl_gv_try_downgrade(pTHX_ GV* gv) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_GV_TRY_DOWNGRADE \ + assert(gv) + PERL_CALLCONV HV* Perl_gv_stashpv(pTHX_ const char* name, I32 flags) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_GV_STASHPV \ @@ -5361,6 +5361,7 @@ Perl_yylex(pTHX) SV *sv; int pkgname = 0; const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); + OP *rv2cv_op; CV *cv; #ifdef PERL_MAD SV *nextPL_nextwhite = 0; @@ -5454,19 +5455,29 @@ Perl_yylex(pTHX) if (len) goto safe_bareword; - /* Do the explicit type check so that we don't need to force - the initialisation of the symbol table to have a real GV. - Beware - gv may not really be a PVGV, cv may not really be - a PVCV, (because of the space optimisations that gv_init - understands) But they're true if for this symbol there is - respectively a typeglob and a subroutine. - */ - cv = gv ? ((SvTYPE(gv) == SVt_PVGV) - /* Real typeglob, so get the real subroutine: */ - ? GvCVu(gv) - /* A proxy for a subroutine in this package? */ - : SvOK(gv) ? MUTABLE_CV(gv) : NULL) - : NULL; + cv = NULL; + { + OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv)); + const_op->op_private = OPpCONST_BARE; + rv2cv_op = newCVREF(0, const_op); + } + if (rv2cv_op->op_type == OP_RV2CV && + (rv2cv_op->op_flags & OPf_KIDS)) { + OP *rv_op = cUNOPx(rv2cv_op)->op_first; + switch (rv_op->op_type) { + case OP_CONST: { + SV *sv = cSVOPx_sv(rv_op); + if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV) + cv = (CV*)SvRV(sv); + } break; + case OP_GV: { + GV *gv = cGVOPx_gv(rv_op); + CV *maybe_cv = GvCVu(gv); + if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV) + cv = maybe_cv; + } break; + } + } /* See if it's the indirect object for a list operator. */ @@ -5489,8 +5500,10 @@ Perl_yylex(pTHX) /* Two barewords in a row may indicate method call. */ if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && - (tmp = intuit_method(s, gv, cv))) + (tmp = intuit_method(s, gv, cv))) { + op_free(rv2cv_op); return REPORT(tmp); + } /* If not a declared subroutine, it's an indirect object. */ /* (But it's an indir obj regardless for sort.) */ @@ -5498,7 +5511,7 @@ Perl_yylex(pTHX) if ( ( !immediate_paren && (PL_last_lop_op == OP_SORT || - ((!gv || !cv) && + (!cv && (PL_last_lop_op != OP_MAPSTART && PL_last_lop_op != OP_GREPSTART)))) || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' @@ -5521,6 +5534,7 @@ Perl_yylex(pTHX) /* Is this a word before a => operator? */ if (*s == '=' && s[1] == '>' && !pkgname) { + op_free(rv2cv_op); CLINE; sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) @@ -5535,7 +5549,7 @@ Perl_yylex(pTHX) d = s + 1; while (SPACE_OR_TAB(*d)) d++; - if (*d == ')' && (sv = gv_const_sv(gv))) { + if (*d == ')' && (sv = cv_const_sv(cv))) { s = d + 1; goto its_constant; } @@ -5556,6 +5570,7 @@ Perl_yylex(pTHX) PL_thistoken = newSVpvs(""); } #endif + op_free(rv2cv_op); force_next(WORD); pl_yylval.ival = 0; TOKEN('&'); @@ -5563,7 +5578,8 @@ Perl_yylex(pTHX) /* If followed by var or block, call it a method (unless sub) */ - if ((*s == '$' || *s == '{') && (!gv || !cv)) { + if ((*s == '$' || *s == '{') && !cv) { + op_free(rv2cv_op); PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_METHOD; PREBLOCK(METHOD); @@ -5573,8 +5589,10 @@ Perl_yylex(pTHX) if (!orig_keyword && (isIDFIRST_lazy_if(s,UTF) || *s == '$') - && (tmp = intuit_method(s, gv, cv))) + && (tmp = intuit_method(s, gv, cv))) { + op_free(rv2cv_op); return REPORT(tmp); + } /* Not a method, so call it a subroutine (if defined) */ @@ -5584,25 +5602,17 @@ Perl_yylex(pTHX) "Ambiguous use of -%s resolved as -&%s()", PL_tokenbuf, PL_tokenbuf); /* Check for a constant sub */ - if ((sv = gv_const_sv(gv))) { + if ((sv = cv_const_sv(cv))) { its_constant: + op_free(rv2cv_op); SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); pl_yylval.opval->op_private = 0; TOKEN(WORD); } - /* Resolve to GV now. */ - if (SvTYPE(gv) != SVt_PVGV) { - gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV); - assert (SvTYPE(gv) == SVt_PVGV); - /* cv must have been some sort of placeholder, so - now needs replacing with a real code reference. */ - cv = GvCV(gv); - } - op_free(pl_yylval.opval); - pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv)); + pl_yylval.opval = rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -5670,7 +5680,7 @@ Perl_yylex(pTHX) if (probable_sub) { gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV); op_free(pl_yylval.opval); - pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv)); + pl_yylval.opval = rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -5722,6 +5732,7 @@ Perl_yylex(pTHX) } } } + op_free(rv2cv_op); safe_bareword: if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { |