diff options
-rw-r--r-- | ext/XS-APItest/t/fetch_pad_names.t | 24 | ||||
-rw-r--r-- | op.c | 3 | ||||
-rw-r--r-- | pad.c | 85 | ||||
-rw-r--r-- | pad.h | 5 | ||||
-rw-r--r-- | toke.c | 10 |
5 files changed, 35 insertions, 92 deletions
diff --git a/ext/XS-APItest/t/fetch_pad_names.t b/ext/XS-APItest/t/fetch_pad_names.t index 3d42280952..fb6dcdbfed 100644 --- a/ext/XS-APItest/t/fetch_pad_names.t +++ b/ext/XS-APItest/t/fetch_pad_names.t @@ -41,8 +41,8 @@ general_tests( $cv->(), $names_av, { ], pad_size => { total => { cmp => 2, msg => 'Sub has two lexicals.' }, - utf8 => { cmp => 0, msg => 'Sub has no UTF-8 encoded vars.' }, - invariant => { cmp => 2, msg => 'Sub has two invariant vars.' }, + utf8 => { cmp => 2, msg => 'Sub has only UTF-8 vars.' }, + invariant => { cmp => 0, msg => 'Sub has no invariant vars.' }, }, vars => [ { name => '$zest', msg => 'Sub has [\$zest].', type => 'ok' }, @@ -79,8 +79,8 @@ general_tests( $cv->(), $names_av, { ], pad_size => { total => { cmp => 2, msg => 'Sub has two lexicals, including those it closed over.' }, - utf8 => { cmp => 1, msg => 'UTF-8 in the pad.' }, - invariant => { cmp => 1, msg => '' }, + utf8 => { cmp => 2, msg => 'UTF-8 in the pad.' }, + invariant => { cmp => 0, msg => '' }, }, vars => [ { name => '$ascii', msg => 'Sub has [$ascii].', type => 'ok' }, @@ -120,8 +120,8 @@ general_tests( $cv->(), $names_av, { ], pad_size => { total => { cmp => 2, msg => 'Sub has two lexicals' }, - utf8 => { cmp => 0, msg => 'Latin-1 not upgraded to UTF-8.' }, - invariant => { cmp => 2, msg => '' }, + utf8 => { cmp => 2, msg => 'Latin-1 got upgraded to UTF-8.' }, + invariant => { cmp => 0, msg => '' }, }, vars => [ { name => '$Leon', msg => 'Sub has [$Leon].', type => 'ok' }, @@ -153,8 +153,8 @@ END_EVAL results => [ ({ SKIP => 1 }) x 3 ], pad_size => { total => { cmp => 1, msg => 'Sub has one lexical, which it closed over.' }, - utf8 => { cmp => 0, msg => '' }, - invariant => { cmp => 1, msg => '' }, + utf8 => { cmp => 1, msg => '' }, + invariant => { cmp => 0, msg => '' }, }, vars => [ { name => '$Ceon', msg => "Sub doesn't have [\$Ceon].", type => 'not ok' }, @@ -189,8 +189,8 @@ general_tests( $cv->(), $names_av, { ], pad_size => { total => { cmp => 3, msg => 'Sub has three lexicals.' }, - utf8 => { cmp => 1, msg => 'Japanese stored as UTF-8.' }, - invariant => { cmp => 2, msg => '' }, + utf8 => { cmp => 3, msg => 'Japanese stored as UTF-8.' }, + invariant => { cmp => 0, msg => '' }, }, vars => [ { name => "\$\x{6226}\x{56fd}", msg => "Sub has [\$\x{6226}\x{56fd}].", type => 'ok' }, @@ -236,8 +236,8 @@ general_tests( $cv->(), $names_av, { ], pad_size => { total => { cmp => 1, msg => 'Sub has one lexical.' }, - utf8 => { cmp => 0, msg => '' }, - invariant => { cmp => 1, msg => '' }, + utf8 => { cmp => 1, msg => '' }, + invariant => { cmp => 0, msg => '' }, }, vars => [], }); @@ -613,8 +613,7 @@ Perl_allocmy(pTHX_ const char *const name, const STRLEN len, const U32 flags) off = pad_add_name_pvn(name, len, (is_our ? padadd_OUR : - PL_parser->in_my == KEY_state ? padadd_STATE : 0) - | ( flags & SVf_UTF8 ? SVf_UTF8 : 0 ), + PL_parser->in_my == KEY_state ? padadd_STATE : 0), PL_parser->in_my_stash, (is_our /* $_ is always in main::, even with our */ @@ -155,33 +155,6 @@ Points directly to the body of the L</PL_comppad> array. #define PARENT_FAKELEX_FLAGS_set(sv,val) \ STMT_START { ((XPVNV*)SvANY(sv))->xnv_u.xpad_cop_seq.xhigh = (val); } STMT_END -/* -This is basically sv_eq_flags() in sv.c, but we avoid the magic -and bytes checking. -*/ - -static bool -padname_eq_pvn_flags(pTHX_ const PADNAME *pn, const char* pv, const STRLEN - pvlen, const U32 flags) { - if ( !PadnameUTF8(pn) != !(flags & SVf_UTF8) ) { - const char *pv1 = PadnamePV(pn); - STRLEN cur1 = PadnameLEN(pn); - const char *pv2 = pv; - STRLEN cur2 = pvlen; - if (flags & SVf_UTF8) - return (bytes_cmp_utf8( - (const U8*)pv1, cur1, - (const U8*)pv2, cur2) == 0); - else - return (bytes_cmp_utf8( - (const U8*)pv2, cur2, - (const U8*)pv1, cur1) == 0); - } - else - return ((PadnamePV(pn) == pv) - || memEQ(PadnamePV(pn), pv, pvlen)); -} - #ifdef DEBUGGING void Perl_set_padlist(CV * cv, PADLIST *padlist){ @@ -622,29 +595,18 @@ Perl_pad_add_name_pvn(pTHX_ const char *namepv, STRLEN namelen, { PADOFFSET offset; PADNAME *name; - bool is_utf8; PERL_ARGS_ASSERT_PAD_ADD_NAME_PVN; - if (flags & ~(padadd_OUR|padadd_STATE|padadd_NO_DUP_CHECK|padadd_UTF8_NAME)) + if (flags & ~(padadd_OUR|padadd_STATE|padadd_NO_DUP_CHECK)) Perl_croak(aTHX_ "panic: pad_add_name_pvn illegal flag bits 0x%" UVxf, (UV)flags); name = (PADNAME *) newSV_type((ourstash || typestash) ? SVt_PVMG : SVt_PVNV); - if ((is_utf8 = ((flags & padadd_UTF8_NAME) != 0))) { - namepv = (const char*)bytes_from_utf8((U8*)namepv, &namelen, &is_utf8); - } - sv_setpvn((SV *)name, namepv, namelen); - - if (is_utf8) { - flags |= padadd_UTF8_NAME; - SvUTF8_on(name); - } - else - flags &= ~padadd_UTF8_NAME; + SvUTF8_on(name); if ((flags & padadd_NO_DUP_CHECK) == 0) { ENTER; @@ -655,7 +617,7 @@ Perl_pad_add_name_pvn(pTHX_ const char *namepv, STRLEN namelen, LEAVE; } - offset = pad_alloc_name(name, flags & ~padadd_UTF8_NAME, typestash, ourstash); + offset = pad_alloc_name(name, flags, typestash, ourstash); /* not yet introduced */ COP_SEQ_RANGE_LOW_set(name, PERL_PADSEQ_INTRO); @@ -714,9 +676,7 @@ Perl_pad_add_name_sv(pTHX_ SV *name, U32 flags, HV *typestash, HV *ourstash) char *namepv; STRLEN namelen; PERL_ARGS_ASSERT_PAD_ADD_NAME_SV; - namepv = SvPV(name, namelen); - if (SvUTF8(name)) - flags |= padadd_UTF8_NAME; + namepv = SvPVutf8(name, namelen); return pad_add_name_pvn(namepv, namelen, flags, typestash, ourstash); } @@ -987,20 +947,10 @@ Perl_pad_findmy_pvn(pTHX_ const char *namepv, STRLEN namelen, U32 flags) pad_peg("pad_findmy_pvn"); - if (flags & ~padadd_UTF8_NAME) + if (flags) Perl_croak(aTHX_ "panic: pad_findmy_pvn illegal flag bits 0x%" UVxf, (UV)flags); - if (flags & padadd_UTF8_NAME) { - bool is_utf8 = TRUE; - namepv = (const char*)bytes_from_utf8((U8*)namepv, &namelen, &is_utf8); - - if (is_utf8) - flags |= padadd_UTF8_NAME; - else - flags &= ~padadd_UTF8_NAME; - } - offset = pad_findlex(namepv, namelen, flags, PL_compcv, PL_cop_seqmax, 1, NULL, &out_pn, &out_flags); if ((PADOFFSET)offset != NOT_IN_PAD) @@ -1021,8 +971,8 @@ Perl_pad_findmy_pvn(pTHX_ const char *namepv, STRLEN namelen, U32 flags) if (name && PadnameLEN(name) == namelen && !PadnameOUTER(name) && (PadnameIsOUR(name)) - && padname_eq_pvn_flags(aTHX_ name, namepv, namelen, - flags & padadd_UTF8_NAME ? SVf_UTF8 : 0 ) + && ( PadnamePV(name) == namepv + || memEQ(PadnamePV(name), namepv, namelen) ) && COP_SEQ_RANGE_LOW(name) == PERL_PADSEQ_INTRO ) return offset; @@ -1061,9 +1011,7 @@ Perl_pad_findmy_sv(pTHX_ SV *name, U32 flags) char *namepv; STRLEN namelen; PERL_ARGS_ASSERT_PAD_FINDMY_SV; - namepv = SvPV(name, namelen); - if (SvUTF8(name)) - flags |= padadd_UTF8_NAME; + namepv = SvPVutf8(name, namelen); return pad_findmy_pvn(namepv, namelen, flags); } @@ -1187,10 +1135,10 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv, PERL_ARGS_ASSERT_PAD_FINDLEX; - if (flags & ~(padadd_UTF8_NAME|padadd_STALEOK)) + flags &= ~ padadd_STALEOK; /* one-shot flag */ + if (flags) Perl_croak(aTHX_ "panic: pad_findlex illegal flag bits 0x%" UVxf, (UV)flags); - flags &= ~ padadd_STALEOK; /* one-shot flag */ *out_flags = 0; @@ -1209,8 +1157,8 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv, for (offset = PadnamelistMAXNAMED(names); offset > 0; offset--) { const PADNAME * const name = name_p[offset]; if (name && PadnameLEN(name) == namelen - && padname_eq_pvn_flags(aTHX_ name, namepv, namelen, - flags & padadd_UTF8_NAME ? SVf_UTF8 : 0)) + && ( PadnamePV(name) == namepv + || memEQ(PadnamePV(name), namepv, namelen) )) { if (PadnameOUTER(name)) { fake_offset = offset; /* in case we don't find a real one */ @@ -1273,8 +1221,7 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv, if (warn) S_unavailable(aTHX_ newSVpvn_flags(namepv, namelen, - SVs_TEMP | - (flags & padadd_UTF8_NAME ? SVf_UTF8 : 0))); + SVs_TEMP|SVf_UTF8)); *out_capture = NULL; } @@ -1289,8 +1236,7 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv, Perl_warner(aTHX_ packWARN(WARN_CLOSURE), "Variable \"%"SVf"\" will not stay shared", SVfARG(newSVpvn_flags(namepv, namelen, - SVs_TEMP | - (flags & padadd_UTF8_NAME ? SVf_UTF8 : 0)))); + SVs_TEMP|SVf_UTF8))); } if (fake_offset && CvANON(cv) @@ -1321,8 +1267,7 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv, { S_unavailable(aTHX_ newSVpvn_flags(namepv, namelen, - SVs_TEMP | - (flags & padadd_UTF8_NAME ? SVf_UTF8 : 0))); + SVs_TEMP|SVf_UTF8)); *out_capture = NULL; } } @@ -143,7 +143,6 @@ typedef enum { #define padadd_NO_DUP_CHECK 0x04 /* skip warning on dups. */ #define padadd_STALEOK 0x08 /* allow stale lexical in active * sub, but only one level up */ -#define padadd_UTF8_NAME SVf_UTF8 /* name is UTF-8 encoded. */ /* ASSERT_CURPAD_LEGAL and ASSERT_CURPAD_ACTIVE respectively determine * whether PL_comppad and PL_curpad are consistent and whether they have @@ -234,7 +233,7 @@ GV slot. The length of the name. =for apidoc Amx|bool|PadnameUTF8|PADNAME pn -Whether PadnamePV is in UTF8. +Whether PadnamePV is in UTF8. Currently, this is always true. =for apidoc Amx|SV *|PadnameSV|PADNAME pn Returns the pad name as an SV. This is currently just C<pn>. It will @@ -315,7 +314,7 @@ Restore the old pad saved into the local variable opad by PAD_SAVE_LOCAL() #define PadnamePV(pn) (SvPOKp(pn) ? SvPVX_const(pn) : NULL) #define PadnameLEN(pn) ((SV*)(pn) == &PL_sv_undef ? 0 : SvCUR(pn)) -#define PadnameUTF8(pn) !!SvUTF8(pn) +#define PadnameUTF8(pn) (assert_(SvUTF8(pn)) 1) #define PadnameSV(pn) pn #define PadnameIsOUR(pn) !!SvPAD_OUR(pn) #define PadnameOURSTASH(pn) SvOURSTASH(pn) @@ -6392,7 +6392,7 @@ Perl_yylex(pTHX) char tmpbuf[sizeof PL_tokenbuf + 1]; *tmpbuf = '&'; Copy(PL_tokenbuf, tmpbuf+1, len, char); - off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0); + off = pad_findmy_pvn(tmpbuf, len+1, 0); if (off != NOT_IN_PAD) { assert(off); /* we assume this is boolean-true below */ if (PAD_COMPNAME_FLAGS_isOUR(off)) { @@ -7881,7 +7881,7 @@ Perl_yylex(pTHX) *PL_tokenbuf = '&'; if (memchr(tmpbuf, ':', len) || key != KEY_sub || pad_findmy_pvn( - PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0 + PL_tokenbuf, len + 1, 0 ) != NOT_IN_PAD) sv_setpvn(PL_subname, tmpbuf, len); else { @@ -8182,7 +8182,7 @@ S_pending_ident(pTHX) if (!has_colon) { if (!PL_in_my) tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, - UTF ? SVf_UTF8 : 0); + 0); if (tmp != NOT_IN_PAD) { /* might be an "our" variable" */ if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { @@ -8300,7 +8300,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) char tmpbuf[256]; Copy(w, tmpbuf+1, s - w, char); *tmpbuf = '&'; - off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0); + off = pad_findmy_pvn(tmpbuf, s-w+1, 0); if (off != NOT_IN_PAD) return; } Perl_croak(aTHX_ "No comma allowed after %s", what); @@ -9452,7 +9452,7 @@ S_scan_inputsymbol(pTHX_ char *start) /* try to find it in the pad for this block, otherwise find add symbol table ops */ - const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0); + const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); if (tmp != NOT_IN_PAD) { if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { HV * const stash = PAD_COMPNAME_OURSTASH(tmp); |