From d538caf67d824e5c4ca07dc62b7d7b1ff929ea5a Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Tue, 22 Feb 2022 20:35:17 +0000 Subject: Perl_gv_init_pvn() must handle a stash holding the value -1 As a space optimisation, where possible, sub declarations are stored in the symbol table as a plain scalar, and converted to a PVGV, PVCV pair only when needed. A regression was introduced by the recently merged commit bb5bc97fde9ef2f1: Don't set SVf_POK in Perl_sv_2pv_flags() when caching the string for an IV. This permits XS code (such as serialisers) to distinguish between values that started as IVs but had a string representation cached, and values that started as PVs but had an (exact) integer representation cached. As implemented, the change in flags means that Perl_sv_2pv_flags() will be entered each time the string for an IV is wanted. The next commit will fix SvPV() and the other macros to avoid calling Perl_sv_2pv_flags() more than once, restoring the previous behaviour. Specifically, code which reduces to this: sub foo; $::{foo} . ""; eval q{*foo = []}; changed behaviour (to an assertion failure) Previously the runtime behaviour of the middle line (simplifying slightly) was to convert the IV -1 to the PVIV "-1". The third line would then see this as a(n illegal) prototype "-1", but the code in Perl_gv_init_pvn() would diligently move this across to the PVCV it created, and finish with a structurally valid PVGV and PVCV With the recent commit, the second line would still convert to PVIV, but no longer set the flag bit SVf_POK. This meant that with this same test case, the SvPOK() test in Perl_gv_init_pvn() was no longer true, and a different code path was taken, resulting in broken assumptions and an assertion failure (when assertions are enabled). Fixing this regression also fixes a long standing bug, likely present since the commit from 1998 that added this space optimisation. This pathological code: sub foo; $::{foo} . ""; eval q{ sub foo {} }; use to incorrectly warn: Prototype mismatch: sub main::foo (-1) vs none at (eval 1) line 1. because the placeholder -1 for "no prototype" had been converted to a string, that string then copied because it seemed to be a placeholder prototype, and then the warning issued on the mistaken incorrect prototype. --- gv.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- t/op/gv.t | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 3 deletions(-) diff --git a/gv.c b/gv.c index 1f06833e1a..014554af0e 100644 --- a/gv.c +++ b/gv.c @@ -391,6 +391,52 @@ Perl_gv_init_pv(pTHX_ GV *gv, HV *stash, const char *name, U32 flags) gv_init_pvn(gv, stash, name, strlen(name), flags); } +/* Packages in the symbol table are "stashes" - hashes where the keys are symbol + names and the values are typeglobs. The value $foo::bar is actually found + by looking up the typeglob *foo::{bar} and then reading its SCALAR slot. + + At least, that's what you see in Perl space if you use typeglob syntax. + Usually it's also what's actually stored in the stash, but for some cases + different values are stored (as a space optimisation) and converted to full + typeglobs "on demand" - if a typeglob syntax is used to read a value. It's + the job of this function, Perl_gv_init_pvn(), to undo any trickery and + replace the SV stored in the stash with the regular PVGV structure that it is + a shorthand for. This has to be done "in-place" by upgrading the actual SV + that is already stored in the stash to a PVGV. + + As the public documentation above says: + Converting any scalar that is C may produce unpredictable + results and is reserved for perl's internal use. + + Values that can be stored: + + * plain scalar - a subroutine declaration + The scalar's string value is the subroutine prototype; the integer -1 is + "no prototype". ie shorthand for sub foo ($$); or sub bar; + * reference to a scalar - a constant. ie shorthand for sub PI() { 4; } + * reference to a sub - a subroutine (avoids allocating a PVGV) + + The earliest optimisation was subroutine declarations, implemented in 1998 + by commit 8472ac73d6d80294: + "Sub declaration cost reduced from ~500 to ~100 bytes" + + This space optimisation needs to be invisible to regular Perl code. For this + code: + + sub foo ($$); + *foo = []; + + When the first line is compiled, the optimisation is used, and $::{foo} is + assigned the scalar '$$'. No PVGV or PVCV is created. + + When the second line encountered, the typeglob lookup on foo needs to + "upgrade" the symbol table entry to a PVGV, and then create a PVCV in the + {CODE} slot with the prototype $$ and no body. The typeglob is then available + so that [] can be assigned to the {ARRAY} slot. For the code above the + upgrade happens at compile time, the assignment at runtime. + + Analogous code unwinds the other optimisations. +*/ void Perl_gv_init_pvn(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, U32 flags) { @@ -435,11 +481,18 @@ Perl_gv_init_pvn(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, U32 flag } if (SvLEN(gv)) { if (proto) { + /* For this case, we are "stealing" the buffer from the SvPV and + re-attaching to an SV below with the call to sv_usepvn_flags(). + Hence we don't free it. */ SvPV_set(gv, NULL); - SvLEN_set(gv, 0); - SvPOK_off(gv); - } else + } + else { + /* There is no valid prototype. (SvPOK() must be true for a valid + prototype.) Hence we free the memory. */ Safefree(SvPVX_mutable(gv)); + } + SvLEN_set(gv, 0); + SvPOK_off(gv); } SvIOK_off(gv); isGV_with_GP_on(gv); diff --git a/t/op/gv.t b/t/op/gv.t index 1b37e4d3ca..f2510cb33a 100644 --- a/t/op/gv.t +++ b/t/op/gv.t @@ -1244,6 +1244,63 @@ is ($?, 0, "hv_delete_common must check SvTYPE before using GvAV"); runperl(prog => 'sub foo::ISA; delete $foo::{ISA}'); is ($?, 0, "hv_delete_common must check SvTYPE before using GvAV"); +{ + my $w; + local $SIG{__WARN__} = sub { $w .= shift }; + + # This is a test case for a very long standing bug discovered while + # investigating GH #19450 + sub thump; + + is($::{thump} . "", '-1', "stub sub with no prototype stored as -1"); + + is(eval 'sub thump {}; 1', 1, "can define the stub") + or diag($@); + is($w, undef, "define the stub without warnings"); + undef $w; + + # This is a testcase for the bug reported in GH #19450, which is a + # regression introduced by commit bb5bc97fde9ef2f1 + sub tic_tac_toe; + + is($::{tic_tac_toe} . "", '-1', "stub sub with no prototype stored as -1"); + + is(eval '*tic_tac_toe = []; 1', 1, "can assign to the typeglob") + or diag($a); + is($w, undef, "assign to the typeglob without warnings"); + undef $w; + + # do both: + sub comment_sign; + + is($::{comment_sign} . "", '-1', "stub sub with no prototype stored as -1"); + + is(eval '*comment_sign = []; 1', 1, "can assign to the typeglob") + or diag($a); + is($w, undef, "assign to the typeglob without warnings"); + undef $w; + + is(eval 'sub comment_sign {}; 1', 1, "can define the stub") + or diag($@); + is($w, undef, "define the stub without warnings"); + undef $w; + + # do both, reverse order: + sub widget_mark; + + is($::{widget_mark} . "", '-1', "stub sub with no prototype stored as -1"); + + is(eval 'sub widget_mark {}; 1', 1, "can define the stub") + or diag($@); + is($w, undef, "define the stub without warnings"); + undef $w; + + is(eval '*widget_mark = []; 1', 1, "can assign to the typeglob") + or diag($a); + is($w, undef, "assign to the typeglob without warnings"); + undef $w; +} + done_testing(); __END__ -- cgit v1.2.1