From 2d3b3561a39bb17fd6003fb262f52b3bc800770e Mon Sep 17 00:00:00 2001 From: Felipe Gasper Date: Fri, 7 Jan 2022 14:28:39 -0500 Subject: Properly handle UTF8-flagged strings when assigning to $0. Issue #19331: Use of SvPV_const and SvPV_force in S_set_dollarzero() wrote the PV internals directly to argv, which causes an improper UTF-8 encode if the SV is UTF8-flagged/upgraded. This fixes that doing a downgrade prior to those SvPV* calls. If the string contains wide characters (and thus cannot be downgraded) a warning is thrown; this mirrors preexisting behavior with %ENV, print, and other output channels that convert Perl SVs to bytes. The relevant documentation is also updated. --- mg.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'mg.c') diff --git a/mg.c b/mg.c index 10813a7316..5f3eeae4fe 100644 --- a/mg.c +++ b/mg.c @@ -3367,6 +3367,16 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) else sv_setiv(mg->mg_obj, (IV)PerlProc_getpid()); break; case '0': + if (!sv_utf8_downgrade(sv, /* fail_ok */ TRUE)) { + + /* Since we are going to set the string's UTF8-encoded form + as the process name we should update $0 itself to contain + that same (UTF8-encoded) value. */ + sv_utf8_encode(GvSV(mg->mg_obj)); + + Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "Wide character in %s", "$0"); + } + LOCK_DOLLARZERO_MUTEX; S_set_dollarzero(aTHX_ sv); UNLOCK_DOLLARZERO_MUTEX; -- cgit v1.2.1