summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZefram <zefram@fysh.org>2009-11-08 15:03:45 +0100
committerRafael Garcia-Suarez <rgs@consttype.org>2009-11-08 15:11:11 +0100
commitf7461760003db2ce68155c97ea6c1658e96fcd27 (patch)
tree3ef60fff1e3763bb814eb07f408a34688546c026
parent6bd7445c6d3b53823883c456e32ea27ce24bfc5c (diff)
downloadperl-f7461760003db2ce68155c97ea6c1658e96fcd27.tar.gz
Bareword sub lookups
Attached is a patch that changes how the tokeniser looks up subroutines, when they're referenced by a bareword, for prototype and const-sub purposes. Formerly, it has looked up bareword subs directly in the package, which is contrary to the way the generated op tree looks up the sub, via an rv2cv op. The patch makes the tokeniser generate the rv2cv op earlier, and dig around in that. The motivation for this is to allow modules to hook the rv2cv op creation, to affect the name->subroutine lookup process. Currently, such hooking affects op execution as intended, but everything goes wrong with a bareword ref where the tokeniser looks at some unrelated CV, or a blank space, in the package. With the patch in place, an rv2cv hook correctly affects the tokeniser and therefore the prototype-based aspects of parsing. The patch also changes ck_subr (which applies the argument context and checking parts of prototype behaviour) to handle subs referenced by an RV const op inside the rv2cv, where formerly it would only handle a gv op inside the rv2cv. This is to support the most likely kind of modified rv2cv op. The attached patch is the resulting revised version of the bareword sub patch. It incorporates the original patch (allowing rv2cv op hookers to control prototype processing), the GV-downgrading addition, and a mention in perldelta.
-rw-r--r--embed.fnc1
-rw-r--r--embed.h2
-rw-r--r--global.sym1
-rw-r--r--gv.c47
-rw-r--r--op.c48
-rw-r--r--pod/perl5112delta.pod10
-rw-r--r--proto.h5
-rw-r--r--toke.c71
8 files changed, 141 insertions, 44 deletions
diff --git a/embed.fnc b/embed.fnc
index cbea2911f7..755c42d442 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -365,6 +365,7 @@ Ap |void |gv_fullname4 |NN SV* sv|NN const GV* gv|NULLOK const char* prefix|bool
pMox |GP * |newGP |NN GV *const gv
Ap |void |gv_init |NN GV* gv|NULLOK HV* stash|NN const char* name|STRLEN len|int multi
Ap |void |gv_name_set |NN GV* gv|NN const char *name|U32 len|U32 flags
+Apd |void |gv_try_downgrade|NN GV* gv
Apd |HV* |gv_stashpv |NN const char* name|I32 flags
Apd |HV* |gv_stashpvn |NN const char* name|U32 namelen|I32 flags
Apd |HV* |gv_stashsv |NN SV* sv|I32 flags
diff --git a/embed.h b/embed.h
index 636a87b679..e80384adc1 100644
--- a/embed.h
+++ b/embed.h
@@ -291,6 +291,7 @@
#define gv_fullname4 Perl_gv_fullname4
#define gv_init Perl_gv_init
#define gv_name_set Perl_gv_name_set
+#define gv_try_downgrade Perl_gv_try_downgrade
#define gv_stashpv Perl_gv_stashpv
#define gv_stashpvn Perl_gv_stashpvn
#define gv_stashsv Perl_gv_stashsv
@@ -2654,6 +2655,7 @@
#endif
#define gv_init(a,b,c,d,e) Perl_gv_init(aTHX_ a,b,c,d,e)
#define gv_name_set(a,b,c,d) Perl_gv_name_set(aTHX_ a,b,c,d)
+#define gv_try_downgrade(a) Perl_gv_try_downgrade(aTHX_ a)
#define gv_stashpv(a,b) Perl_gv_stashpv(aTHX_ a,b)
#define gv_stashpvn(a,b,c) Perl_gv_stashpvn(aTHX_ a,b,c)
#define gv_stashsv(a,b) Perl_gv_stashsv(aTHX_ a,b)
diff --git a/global.sym b/global.sym
index 492801792c..6000af7559 100644
--- a/global.sym
+++ b/global.sym
@@ -148,6 +148,7 @@ Perl_gv_fullname3
Perl_gv_fullname4
Perl_gv_init
Perl_gv_name_set
+Perl_gv_try_downgrade
Perl_gv_stashpv
Perl_gv_stashpvn
Perl_gv_stashsv
diff --git a/gv.c b/gv.c
index 3e225bc270..932b2b8002 100644
--- a/gv.c
+++ b/gv.c
@@ -2372,6 +2372,53 @@ Perl_gv_name_set(pTHX_ GV *gv, const char *name, U32 len, U32 flags)
}
/*
+=for apidoc gv_try_downgrade
+
+If C<gv> is a typeglob containing only a constant sub, and is only
+referenced from its package, and both the typeglob and the sub are
+sufficiently ordinary, replace the typeglob (in the package) with a
+placeholder that more compactly represents the same thing. This is meant
+to be used when a placeholder has been upgraded, most likely because
+something wanted to look at a proper code object, and it has turned out
+to be a constant sub to which a proper reference is no longer required.
+
+=cut
+*/
+
+void
+Perl_gv_try_downgrade(pTHX_ GV *gv)
+{
+ HV *stash;
+ CV *cv;
+ HEK *namehek;
+ SV **gvp;
+ PERL_ARGS_ASSERT_GV_TRY_DOWNGRADE;
+ if (SvREFCNT(gv) == 1 && SvTYPE(gv) == SVt_PVGV && !SvFAKE(gv) &&
+ !SvOBJECT(gv) && !SvMAGICAL(gv) && !SvREADONLY(gv) &&
+ isGV_with_GP(gv) && GvGP(gv) &&
+ GvMULTI(gv) && !GvINTRO(gv) && GvREFCNT(gv) == 1 &&
+ !GvSV(gv) && !GvAV(gv) && !GvHV(gv) && !GvIOp(gv) && !GvFORM(gv) &&
+ GvEGV(gv) == gv && (stash = GvSTASH(gv)) && (cv = GvCV(gv)) &&
+ !SvOBJECT(cv) && !SvMAGICAL(cv) && !SvREADONLY(cv) &&
+ CvSTASH(cv) == stash && CvGV(cv) == gv &&
+ CvCONST(cv) && !CvMETHOD(cv) && !CvLVALUE(cv) && !CvUNIQUE(cv) &&
+ !CvNODEBUG(cv) && !CvCLONE(cv) && !CvCLONED(cv) && !CvANON(cv) &&
+ (namehek = GvNAME_HEK(gv)) &&
+ (gvp = hv_fetch(stash, HEK_KEY(namehek),
+ HEK_LEN(namehek)*(HEK_UTF8(namehek) ? -1 : 1), 0)) &&
+ *gvp == (SV*)gv) {
+ SV *value = SvREFCNT_inc(CvXSUBANY(cv).any_ptr);
+ SvREFCNT(gv) = 0;
+ sv_clear((SV*)gv);
+ SvREFCNT(gv) = 1;
+ SvFLAGS(gv) = SVt_IV|SVf_ROK;
+ SvANY(gv) = (XPVGV*)((char*)&(gv->sv_u.svu_iv) -
+ STRUCT_OFFSET(XPVIV, xiv_iv));
+ SvRV_set(gv, value);
+ }
+}
+
+/*
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
diff --git a/op.c b/op.c
index 1e869c1374..43cbc07552 100644
--- a/op.c
+++ b/op.c
@@ -570,6 +570,13 @@ Perl_op_clear(pTHX_ OP *o)
case OP_AELEMFAST:
if (! (o->op_type == OP_AELEMFAST && o->op_flags & OPf_SPECIAL)) {
/* not an OP_PADAV replacement */
+ GV *gv = (o->op_type == OP_GV || o->op_type == OP_GVSV)
+#ifdef USE_ITHREADS
+ && PL_curpad
+#endif
+ ? cGVOPo_gv : NULL;
+ if (gv)
+ SvREFCNT_inc(gv);
#ifdef USE_ITHREADS
if (cPADOPo->op_padix > 0) {
/* No GvIN_PAD_off(cGVOPo_gv) here, because other references
@@ -581,6 +588,12 @@ Perl_op_clear(pTHX_ OP *o)
SvREFCNT_dec(cSVOPo->op_sv);
cSVOPo->op_sv = NULL;
#endif
+ if (gv) {
+ int try_downgrade = SvREFCNT(gv) == 2;
+ SvREFCNT_dec(gv);
+ if (try_downgrade)
+ gv_try_downgrade(gv);
+ }
}
break;
case OP_METHOD_NAMED:
@@ -7945,22 +7958,29 @@ Perl_ck_subr(pTHX_ OP *o)
o->op_private |= OPpENTERSUB_HASTARG;
for (cvop = o2; cvop->op_sibling; cvop = cvop->op_sibling) ;
if (cvop->op_type == OP_RV2CV) {
- SVOP* tmpop;
o->op_private |= (cvop->op_private & OPpENTERSUB_AMPER);
op_null(cvop); /* disable rv2cv */
- tmpop = (SVOP*)((UNOP*)cvop)->op_first;
- if (tmpop->op_type == OP_GV && !(o->op_private & OPpENTERSUB_AMPER)) {
- GV *gv = cGVOPx_gv(tmpop);
- cv = GvCVu(gv);
- if (!cv)
- tmpop->op_private |= OPpEARLY_CV;
- else {
- if (SvPOK(cv)) {
- STRLEN len;
- namegv = CvANON(cv) ? gv : CvGV(cv);
- proto = SvPV(MUTABLE_SV(cv), len);
- proto_end = proto + len;
- }
+ if (!(o->op_private & OPpENTERSUB_AMPER)) {
+ SVOP *tmpop = (SVOP*)((UNOP*)cvop)->op_first;
+ GV *gv = NULL;
+ switch (tmpop->op_type) {
+ case OP_GV: {
+ gv = cGVOPx_gv(tmpop);
+ cv = GvCVu(gv);
+ if (!cv)
+ tmpop->op_private |= OPpEARLY_CV;
+ } break;
+ case OP_CONST: {
+ SV *sv = cSVOPx_sv(tmpop);
+ if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
+ cv = (CV*)SvRV(sv);
+ } break;
+ }
+ if (cv && SvPOK(cv)) {
+ STRLEN len;
+ namegv = gv && CvANON(cv) ? gv : CvGV(cv);
+ proto = SvPV(MUTABLE_SV(cv), len);
+ proto_end = proto + len;
}
}
}
diff --git a/pod/perl5112delta.pod b/pod/perl5112delta.pod
index ca8c8096ff..d6dad5dcb6 100644
--- a/pod/perl5112delta.pod
+++ b/pod/perl5112delta.pod
@@ -74,6 +74,16 @@ implements reverse Polish notation arithmetic via pluggable keywords.
This module is mainly used for test purposes, and is not normally
installed, but also serves as an example of how to use the new mechanism.
+=head2 Overridable function lookup
+
+Where an extension module hooks the creation of rv2cv ops, to modify
+the subroutine lookup process, this now works correctly for bareword
+subroutine calls. This means that prototypes on subroutines referenced
+this way will be processed correctly. (Previously bareword subroutine
+names were initially looked up, for parsing purposes, by an unhookable
+mechanism, so extensions could only properly influence subroutine names
+that appeared with an C<&> sigil.)
+
=head1 New Platforms
XXX List any platforms that this version of perl compiles on, that previous
diff --git a/proto.h b/proto.h
index c3322b8169..2a3b118dee 100644
--- a/proto.h
+++ b/proto.h
@@ -943,6 +943,11 @@ PERL_CALLCONV void Perl_gv_name_set(pTHX_ GV* gv, const char *name, U32 len, U32
#define PERL_ARGS_ASSERT_GV_NAME_SET \
assert(gv); assert(name)
+PERL_CALLCONV void Perl_gv_try_downgrade(pTHX_ GV* gv)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_GV_TRY_DOWNGRADE \
+ assert(gv)
+
PERL_CALLCONV HV* Perl_gv_stashpv(pTHX_ const char* name, I32 flags)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_GV_STASHPV \
diff --git a/toke.c b/toke.c
index 0bfa970d0a..680d8a203a 100644
--- a/toke.c
+++ b/toke.c
@@ -5361,6 +5361,7 @@ Perl_yylex(pTHX)
SV *sv;
int pkgname = 0;
const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
+ OP *rv2cv_op;
CV *cv;
#ifdef PERL_MAD
SV *nextPL_nextwhite = 0;
@@ -5454,19 +5455,29 @@ Perl_yylex(pTHX)
if (len)
goto safe_bareword;
- /* Do the explicit type check so that we don't need to force
- the initialisation of the symbol table to have a real GV.
- Beware - gv may not really be a PVGV, cv may not really be
- a PVCV, (because of the space optimisations that gv_init
- understands) But they're true if for this symbol there is
- respectively a typeglob and a subroutine.
- */
- cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
- /* Real typeglob, so get the real subroutine: */
- ? GvCVu(gv)
- /* A proxy for a subroutine in this package? */
- : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
- : NULL;
+ cv = NULL;
+ {
+ OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
+ const_op->op_private = OPpCONST_BARE;
+ rv2cv_op = newCVREF(0, const_op);
+ }
+ if (rv2cv_op->op_type == OP_RV2CV &&
+ (rv2cv_op->op_flags & OPf_KIDS)) {
+ OP *rv_op = cUNOPx(rv2cv_op)->op_first;
+ switch (rv_op->op_type) {
+ case OP_CONST: {
+ SV *sv = cSVOPx_sv(rv_op);
+ if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
+ cv = (CV*)SvRV(sv);
+ } break;
+ case OP_GV: {
+ GV *gv = cGVOPx_gv(rv_op);
+ CV *maybe_cv = GvCVu(gv);
+ if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
+ cv = maybe_cv;
+ } break;
+ }
+ }
/* See if it's the indirect object for a list operator. */
@@ -5489,8 +5500,10 @@ Perl_yylex(pTHX)
/* Two barewords in a row may indicate method call. */
if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
- (tmp = intuit_method(s, gv, cv)))
+ (tmp = intuit_method(s, gv, cv))) {
+ op_free(rv2cv_op);
return REPORT(tmp);
+ }
/* If not a declared subroutine, it's an indirect object. */
/* (But it's an indir obj regardless for sort.) */
@@ -5498,7 +5511,7 @@ Perl_yylex(pTHX)
if (
( !immediate_paren && (PL_last_lop_op == OP_SORT ||
- ((!gv || !cv) &&
+ (!cv &&
(PL_last_lop_op != OP_MAPSTART &&
PL_last_lop_op != OP_GREPSTART))))
|| (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
@@ -5521,6 +5534,7 @@ Perl_yylex(pTHX)
/* Is this a word before a => operator? */
if (*s == '=' && s[1] == '>' && !pkgname) {
+ op_free(rv2cv_op);
CLINE;
sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
@@ -5535,7 +5549,7 @@ Perl_yylex(pTHX)
d = s + 1;
while (SPACE_OR_TAB(*d))
d++;
- if (*d == ')' && (sv = gv_const_sv(gv))) {
+ if (*d == ')' && (sv = cv_const_sv(cv))) {
s = d + 1;
goto its_constant;
}
@@ -5556,6 +5570,7 @@ Perl_yylex(pTHX)
PL_thistoken = newSVpvs("");
}
#endif
+ op_free(rv2cv_op);
force_next(WORD);
pl_yylval.ival = 0;
TOKEN('&');
@@ -5563,7 +5578,8 @@ Perl_yylex(pTHX)
/* If followed by var or block, call it a method (unless sub) */
- if ((*s == '$' || *s == '{') && (!gv || !cv)) {
+ if ((*s == '$' || *s == '{') && !cv) {
+ op_free(rv2cv_op);
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_METHOD;
PREBLOCK(METHOD);
@@ -5573,8 +5589,10 @@ Perl_yylex(pTHX)
if (!orig_keyword
&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
- && (tmp = intuit_method(s, gv, cv)))
+ && (tmp = intuit_method(s, gv, cv))) {
+ op_free(rv2cv_op);
return REPORT(tmp);
+ }
/* Not a method, so call it a subroutine (if defined) */
@@ -5584,25 +5602,17 @@ Perl_yylex(pTHX)
"Ambiguous use of -%s resolved as -&%s()",
PL_tokenbuf, PL_tokenbuf);
/* Check for a constant sub */
- if ((sv = gv_const_sv(gv))) {
+ if ((sv = cv_const_sv(cv))) {
its_constant:
+ op_free(rv2cv_op);
SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
pl_yylval.opval->op_private = 0;
TOKEN(WORD);
}
- /* Resolve to GV now. */
- if (SvTYPE(gv) != SVt_PVGV) {
- gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
- assert (SvTYPE(gv) == SVt_PVGV);
- /* cv must have been some sort of placeholder, so
- now needs replacing with a real code reference. */
- cv = GvCV(gv);
- }
-
op_free(pl_yylval.opval);
- pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
+ pl_yylval.opval = rv2cv_op;
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_ENTERSUB;
@@ -5670,7 +5680,7 @@ Perl_yylex(pTHX)
if (probable_sub) {
gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
op_free(pl_yylval.opval);
- pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
+ pl_yylval.opval = rv2cv_op;
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_ENTERSUB;
@@ -5722,6 +5732,7 @@ Perl_yylex(pTHX)
}
}
}
+ op_free(rv2cv_op);
safe_bareword:
if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {