summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Fraser <fraserbn@gmail.com>2011-09-24 11:57:27 -0700
committerFather Chrysostomos <sprout@cpan.org>2011-10-06 13:01:02 -0700
commit0be4d16f8f8037c260cd860eed59d47166fbdd12 (patch)
tree02f0d0e167abdfba7c280bcd7a78d10edb7f6b45
parentbd8cb5529605f33aa9cf95d6c471386b3a0e015d (diff)
downloadperl-0be4d16f8f8037c260cd860eed59d47166fbdd12.tar.gz
gv.c: Initial gv_fetchpvn_flags and gv_stashpvn UTF8 cleanup
Now that a glob can be initialized and fetched in UTF-8, the next commit will introduce some changes in toke.c to actually test this. Committer’s note: To keep tests passing I had to incorporate the toke.c:S_pending_ident changes in the same patch.
-rw-r--r--ext/XS-APItest/t/fetch_pad_names.t4
-rw-r--r--gv.c14
-rw-r--r--toke.c14
3 files changed, 17 insertions, 15 deletions
diff --git a/ext/XS-APItest/t/fetch_pad_names.t b/ext/XS-APItest/t/fetch_pad_names.t
index 8d6e73969e..559bc3f79b 100644
--- a/ext/XS-APItest/t/fetch_pad_names.t
+++ b/ext/XS-APItest/t/fetch_pad_names.t
@@ -165,7 +165,6 @@ END_EVAL
}
-#XXX: This will most certainly break once clean stashes are out.
$cv = sub {
use utf8;
our $戦国 = 10;
@@ -185,7 +184,8 @@ $names_av = fetch_pad_names($cv);
general_tests( $cv->(), $names_av, {
results => [
{ cmp => '10', msg => 'Fetched UTF-8 our var.' },
- ({ cmp => '10', msg => "Symref fetch." }) x 2,
+ { cmp => '10', msg => "Symref fetch of an our works." },
+ { cmp => undef, msg => "..and using the encoded form yields undef." },
],
pad_size => {
total => { cmp => 3, msg => 'Sub has three lexicals.' },
diff --git a/gv.c b/gv.c
index d2a0ed02dd..f5dedee248 100644
--- a/gv.c
+++ b/gv.c
@@ -1285,7 +1285,7 @@ Perl_gv_stashpvn(pTHX_ const char *name, U32 namelen, I32 flags)
if (!(flags & ~GV_NOADD_MASK) && !stash) return NULL;
assert(stash);
if (!HvNAME_get(stash)) {
- hv_name_set(stash, name, namelen, 0);
+ hv_name_set(stash, name, namelen, flags & SVf_UTF8 ? SVf_UTF8 : 0 );
/* FIXME: This is a repeat of logic in gv_fetchpvn_flags */
/* If the containing stash has multiple effective
@@ -1312,7 +1312,7 @@ Perl_gv_stashsv(pTHX_ SV *sv, I32 flags)
PERL_ARGS_ASSERT_GV_STASHSV;
- return gv_stashpvn(ptr, len, flags);
+ return gv_stashpvn(ptr, len, flags | SvUTF8(sv));
}
@@ -1414,7 +1414,7 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
tmpbuf[len++] = ':';
key = tmpbuf;
}
- gvp = (GV**)hv_fetch(stash, key, len, add);
+ gvp = (GV**)hv_fetch(stash, key, is_utf8 ? -len : len, add);
gv = gvp ? *gvp : NULL;
if (gv && gv != (const GV *)&PL_sv_undef) {
if (SvTYPE(gv) != SVt_PVGV)
@@ -1436,7 +1436,7 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
hv_name_set(stash, "CORE", 4, 0);
else
hv_name_set(
- stash, nambeg, name_cursor-nambeg, 0
+ stash, nambeg, name_cursor-nambeg, is_utf8
);
/* If the containing stash has multiple effective
names, see that this one gets them, too. */
@@ -1445,7 +1445,7 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
}
}
else if (!HvNAME_get(stash))
- hv_name_set(stash, nambeg, name_cursor - nambeg, 0);
+ hv_name_set(stash, nambeg, name_cursor - nambeg, is_utf8);
}
if (*name_cursor == ':')
@@ -1512,7 +1512,7 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
!(len == 1 && sv_type == SVt_PV &&
(*name == 'a' || *name == 'b')) )
{
- gvp = (GV**)hv_fetch(stash,name,len,0);
+ gvp = (GV**)hv_fetch(stash,name,is_utf8 ? -len : len,0);
if (!gvp ||
*gvp == (const GV *)&PL_sv_undef ||
SvTYPE(*gvp) != SVt_PVGV)
@@ -1574,7 +1574,7 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
if (!SvREFCNT(stash)) /* symbol table under destruction */
return NULL;
- gvp = (GV**)hv_fetch(stash,name,len,add);
+ gvp = (GV**)hv_fetch(stash,name,is_utf8 ? -len : len,add);
if (!gvp || *gvp == (const GV *)&PL_sv_undef) {
if (addmg) gv = (GV *)newSV(0);
else return NULL;
diff --git a/toke.c b/toke.c
index 5261c6c379..cdde065057 100644
--- a/toke.c
+++ b/toke.c
@@ -8347,7 +8347,7 @@ S_pending_ident(pTHX)
HEK * const stashname = HvNAME_HEK(stash);
SV * const sym = newSVhek(stashname);
sv_catpvs(sym, "::");
- sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
+ sv_catsv(sym, newSVpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, SVs_TEMP | (UTF ? SVf_UTF8 : 0 )));
pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
pl_yylval.opval->op_private = OPpCONST_ENTERED;
gv_fetchsv(sym,
@@ -8391,8 +8391,8 @@ S_pending_ident(pTHX)
*/
if (ckWARN(WARN_AMBIGUOUS) &&
pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
- GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
- SVt_PVAV);
+ GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
+ ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
/* DO NOT warn for @- and @+ */
&& !( PL_tokenbuf[2] == '\0' &&
@@ -8407,11 +8407,13 @@ S_pending_ident(pTHX)
}
/* build ops for a bareword */
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
- tokenbuf_len - 1));
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
+ tokenbuf_len - 1,
+ UTF ? SVf_UTF8 : 0 ));
pl_yylval.opval->op_private = OPpCONST_ENTERED;
gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
- PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
+ (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
+ | ( UTF ? SVf_UTF8 : 0 ),
((PL_tokenbuf[0] == '$') ? SVt_PV
: (PL_tokenbuf[0] == '@') ? SVt_PVAV
: SVt_PVHV));