summaryrefslogtreecommitdiff
path: root/pad.c
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2014-11-24 00:00:51 -0800
committerFather Chrysostomos <sprout@cpan.org>2014-11-30 11:48:37 -0800
commit2502ffdfca07fac6972c9b2da7ae160d011c2877 (patch)
tree36550c7148dabae1b2a8fe766a5b64e523657c6f /pad.c
parent09c676d973b724f9d8bc07dbb1b115dd8920d5f5 (diff)
downloadperl-2502ffdfca07fac6972c9b2da7ae160d011c2877.tar.gz
Make pad names always UTF8
Prior to 5.16, pad names never used the UTF8 flag, and all non-ASCII pad names were in UTF8. Because the latter was consistently true, everything just worked anyway. In 5.16, UTF8 handling was done ‘properly’, so that non-ASCII UTF8 strings were always accompanied by the UTF8 flag. Now, it is still the case that the only non-ASCII names to make their way into pad name code are in UTF8. Since ASCII is a subset of UTF8, we effectively *always* have UTF8 pad names. So the flag handling is actually redundant. If we just assume that all pad names are UTF8 (which is true), then we don’t need to bother with the flag checking. There is actually no reason why we should have two different encodings for storing pad names. So this commit enforces what has always been the case and removes the extra code for converting between Latin-1 and UTF8. Nothing on CPAN is using the UTF8 flag with pads, so nothing should break. In fact, we never documented padadd_UTF8_NAME.
Diffstat (limited to 'pad.c')
-rw-r--r--pad.c85
1 files changed, 15 insertions, 70 deletions
diff --git a/pad.c b/pad.c
index 343383bbb7..34c0d9d41b 100644
--- a/pad.c
+++ b/pad.c
@@ -155,33 +155,6 @@ Points directly to the body of the L</PL_comppad> array.
#define PARENT_FAKELEX_FLAGS_set(sv,val) \
STMT_START { ((XPVNV*)SvANY(sv))->xnv_u.xpad_cop_seq.xhigh = (val); } STMT_END
-/*
-This is basically sv_eq_flags() in sv.c, but we avoid the magic
-and bytes checking.
-*/
-
-static bool
-padname_eq_pvn_flags(pTHX_ const PADNAME *pn, const char* pv, const STRLEN
- pvlen, const U32 flags) {
- if ( !PadnameUTF8(pn) != !(flags & SVf_UTF8) ) {
- const char *pv1 = PadnamePV(pn);
- STRLEN cur1 = PadnameLEN(pn);
- const char *pv2 = pv;
- STRLEN cur2 = pvlen;
- if (flags & SVf_UTF8)
- return (bytes_cmp_utf8(
- (const U8*)pv1, cur1,
- (const U8*)pv2, cur2) == 0);
- else
- return (bytes_cmp_utf8(
- (const U8*)pv2, cur2,
- (const U8*)pv1, cur1) == 0);
- }
- else
- return ((PadnamePV(pn) == pv)
- || memEQ(PadnamePV(pn), pv, pvlen));
-}
-
#ifdef DEBUGGING
void
Perl_set_padlist(CV * cv, PADLIST *padlist){
@@ -622,29 +595,18 @@ Perl_pad_add_name_pvn(pTHX_ const char *namepv, STRLEN namelen,
{
PADOFFSET offset;
PADNAME *name;
- bool is_utf8;
PERL_ARGS_ASSERT_PAD_ADD_NAME_PVN;
- if (flags & ~(padadd_OUR|padadd_STATE|padadd_NO_DUP_CHECK|padadd_UTF8_NAME))
+ if (flags & ~(padadd_OUR|padadd_STATE|padadd_NO_DUP_CHECK))
Perl_croak(aTHX_ "panic: pad_add_name_pvn illegal flag bits 0x%" UVxf,
(UV)flags);
name = (PADNAME *)
newSV_type((ourstash || typestash) ? SVt_PVMG : SVt_PVNV);
- if ((is_utf8 = ((flags & padadd_UTF8_NAME) != 0))) {
- namepv = (const char*)bytes_from_utf8((U8*)namepv, &namelen, &is_utf8);
- }
-
sv_setpvn((SV *)name, namepv, namelen);
-
- if (is_utf8) {
- flags |= padadd_UTF8_NAME;
- SvUTF8_on(name);
- }
- else
- flags &= ~padadd_UTF8_NAME;
+ SvUTF8_on(name);
if ((flags & padadd_NO_DUP_CHECK) == 0) {
ENTER;
@@ -655,7 +617,7 @@ Perl_pad_add_name_pvn(pTHX_ const char *namepv, STRLEN namelen,
LEAVE;
}
- offset = pad_alloc_name(name, flags & ~padadd_UTF8_NAME, typestash, ourstash);
+ offset = pad_alloc_name(name, flags, typestash, ourstash);
/* not yet introduced */
COP_SEQ_RANGE_LOW_set(name, PERL_PADSEQ_INTRO);
@@ -714,9 +676,7 @@ Perl_pad_add_name_sv(pTHX_ SV *name, U32 flags, HV *typestash, HV *ourstash)
char *namepv;
STRLEN namelen;
PERL_ARGS_ASSERT_PAD_ADD_NAME_SV;
- namepv = SvPV(name, namelen);
- if (SvUTF8(name))
- flags |= padadd_UTF8_NAME;
+ namepv = SvPVutf8(name, namelen);
return pad_add_name_pvn(namepv, namelen, flags, typestash, ourstash);
}
@@ -987,20 +947,10 @@ Perl_pad_findmy_pvn(pTHX_ const char *namepv, STRLEN namelen, U32 flags)
pad_peg("pad_findmy_pvn");
- if (flags & ~padadd_UTF8_NAME)
+ if (flags)
Perl_croak(aTHX_ "panic: pad_findmy_pvn illegal flag bits 0x%" UVxf,
(UV)flags);
- if (flags & padadd_UTF8_NAME) {
- bool is_utf8 = TRUE;
- namepv = (const char*)bytes_from_utf8((U8*)namepv, &namelen, &is_utf8);
-
- if (is_utf8)
- flags |= padadd_UTF8_NAME;
- else
- flags &= ~padadd_UTF8_NAME;
- }
-
offset = pad_findlex(namepv, namelen, flags,
PL_compcv, PL_cop_seqmax, 1, NULL, &out_pn, &out_flags);
if ((PADOFFSET)offset != NOT_IN_PAD)
@@ -1021,8 +971,8 @@ Perl_pad_findmy_pvn(pTHX_ const char *namepv, STRLEN namelen, U32 flags)
if (name && PadnameLEN(name) == namelen
&& !PadnameOUTER(name)
&& (PadnameIsOUR(name))
- && padname_eq_pvn_flags(aTHX_ name, namepv, namelen,
- flags & padadd_UTF8_NAME ? SVf_UTF8 : 0 )
+ && ( PadnamePV(name) == namepv
+ || memEQ(PadnamePV(name), namepv, namelen) )
&& COP_SEQ_RANGE_LOW(name) == PERL_PADSEQ_INTRO
)
return offset;
@@ -1061,9 +1011,7 @@ Perl_pad_findmy_sv(pTHX_ SV *name, U32 flags)
char *namepv;
STRLEN namelen;
PERL_ARGS_ASSERT_PAD_FINDMY_SV;
- namepv = SvPV(name, namelen);
- if (SvUTF8(name))
- flags |= padadd_UTF8_NAME;
+ namepv = SvPVutf8(name, namelen);
return pad_findmy_pvn(namepv, namelen, flags);
}
@@ -1187,10 +1135,10 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv,
PERL_ARGS_ASSERT_PAD_FINDLEX;
- if (flags & ~(padadd_UTF8_NAME|padadd_STALEOK))
+ flags &= ~ padadd_STALEOK; /* one-shot flag */
+ if (flags)
Perl_croak(aTHX_ "panic: pad_findlex illegal flag bits 0x%" UVxf,
(UV)flags);
- flags &= ~ padadd_STALEOK; /* one-shot flag */
*out_flags = 0;
@@ -1209,8 +1157,8 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv,
for (offset = PadnamelistMAXNAMED(names); offset > 0; offset--) {
const PADNAME * const name = name_p[offset];
if (name && PadnameLEN(name) == namelen
- && padname_eq_pvn_flags(aTHX_ name, namepv, namelen,
- flags & padadd_UTF8_NAME ? SVf_UTF8 : 0))
+ && ( PadnamePV(name) == namepv
+ || memEQ(PadnamePV(name), namepv, namelen) ))
{
if (PadnameOUTER(name)) {
fake_offset = offset; /* in case we don't find a real one */
@@ -1273,8 +1221,7 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv,
if (warn)
S_unavailable(aTHX_
newSVpvn_flags(namepv, namelen,
- SVs_TEMP |
- (flags & padadd_UTF8_NAME ? SVf_UTF8 : 0)));
+ SVs_TEMP|SVf_UTF8));
*out_capture = NULL;
}
@@ -1289,8 +1236,7 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv,
Perl_warner(aTHX_ packWARN(WARN_CLOSURE),
"Variable \"%"SVf"\" will not stay shared",
SVfARG(newSVpvn_flags(namepv, namelen,
- SVs_TEMP |
- (flags & padadd_UTF8_NAME ? SVf_UTF8 : 0))));
+ SVs_TEMP|SVf_UTF8)));
}
if (fake_offset && CvANON(cv)
@@ -1321,8 +1267,7 @@ S_pad_findlex(pTHX_ const char *namepv, STRLEN namelen, U32 flags, const CV* cv,
{
S_unavailable(aTHX_
newSVpvn_flags(namepv, namelen,
- SVs_TEMP |
- (flags & padadd_UTF8_NAME ? SVf_UTF8 : 0)));
+ SVs_TEMP|SVf_UTF8));
*out_capture = NULL;
}
}