summaryrefslogtreecommitdiff
path: root/mg.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-03-02 04:29:08 -0700
committerKarl Williamson <khw@cpan.org>2022-09-02 13:21:24 -0600
commitc728a1a6206bcd35a013b6b78f60bdc1adf5bfa2 (patch)
tree43c6b3e85b3065ddc8b06ee50223a1825db543d0 /mg.c
parentcb5c690da676cf4043cf4ff7c29cb6db6bdb1634 (diff)
downloadperl-c728a1a6206bcd35a013b6b78f60bdc1adf5bfa2.tar.gz
Move utf8ness calc for $! into locale.c from mg.c
locale.c has the infrastructure to handle this, so remove repeated logic. The removed code tried to discern better based on using script runs, but this actually doesn't help, so is removed. Since we're now using C99, we can remove the block that was previously needed, and now the code is properly indented, whereas before it wasn't
Diffstat (limited to 'mg.c')
-rw-r--r--mg.c60
1 files changed, 27 insertions, 33 deletions
diff --git a/mg.c b/mg.c
index cab3b8227e..0d5a6eb341 100644
--- a/mg.c
+++ b/mg.c
@@ -773,37 +773,6 @@ S_fixup_errno_string(pTHX_ SV* sv)
if(strEQ(SvPVX(sv), "")) {
sv_catpv(sv, UNKNOWN_ERRNO_MSG);
}
- else {
-
- /* In some locales the error string may come back as UTF-8, in which
- * case we should turn on that flag. This didn't use to happen, and to
- * avoid as many possible backward compatibility issues as possible, we
- * don't turn on the flag unless we have to. So the flag stays off for
- * an entirely invariant string. We assume that if the string looks
- * like UTF-8 in a single script, it really is UTF-8: "text in any
- * other encoding that uses bytes with the high bit set is extremely
- * unlikely to pass a UTF-8 validity test"
- * (http://en.wikipedia.org/wiki/Charset_detection). There is a
- * potential that we will get it wrong however, especially on short
- * error message text, so do an additional check. */
- if ( ! IN_BYTES /* respect 'use bytes' */
- && is_utf8_non_invariant_string((U8*) SvPVX_const(sv), SvCUR(sv))
-
-#ifdef USE_LOCALE_MESSAGES
-
- && _is_cur_LC_category_utf8(LC_MESSAGES)
-
-#else /* If can't check directly, at least can see if script is consistent,
- under UTF-8, which gives us an extra measure of confidence. */
-
- && isSCRIPT_RUN((const U8 *) SvPVX_const(sv), (U8 *) SvEND(sv),
- TRUE) /* Means assume UTF-8 */
-#endif
-
- ) {
- SvUTF8_on(sv);
- }
- }
}
/*
@@ -841,11 +810,16 @@ SV *
Perl_sv_string_from_errnum(pTHX_ int errnum, SV *tgtsv)
{
char const *errstr;
+ utf8ness_t utf8ness;
+
if(!tgtsv)
tgtsv = newSV_type_mortal(SVt_PV);
- errstr = my_strerror(errnum);
+ errstr = my_strerror(errnum, &utf8ness);
if(errstr) {
sv_setpv(tgtsv, errstr);
+ if (utf8ness == UTF8NESS_YES) {
+ SvUTF8_on(tgtsv);
+ }
fixup_errno_string(tgtsv);
} else {
SvPVCLEAR(tgtsv);
@@ -925,7 +899,19 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
#elif defined(OS2)
if (!(_emx_env & 0x200)) { /* Under DOS */
sv_setnv(sv, (NV)errno);
- sv_setpv(sv, errno ? my_strerror(errno) : "");
+ if (errno) {
+ utf8ness_t utf8ness;
+ const char * errstr = my_strerror(errnum, &utf8ness);
+
+ sv_setpv(sv, errstr);
+
+ if (utf8ness == UTF8NESS_YES) {
+ SvUTF8_on(sv);
+ }
+ }
+ else {
+ SvPVCLEAR(sv);
+ }
} else {
if (errno != errno_isOS2) {
const int tmp = _syserrno();
@@ -945,6 +931,14 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
if (dwErr) {
PerlProc_GetOSError(sv, dwErr);
fixup_errno_string(sv);
+
+# ifdef USE_LOCALE
+ if ( IN_LOCALE
+ && get_win32_message_utf8ness(SvPV_nomg_const_nolen(sv)))
+ {
+ SvUTF8_on(sv);
+ }
+# endif
}
else
SvPVCLEAR(sv);