summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc5
-rw-r--r--embed.h5
-rw-r--r--locale.c67
-rw-r--r--makedef.pl2
-rw-r--r--mg.c60
-rw-r--r--proto.h9
6 files changed, 90 insertions, 58 deletions
diff --git a/embed.fnc b/embed.fnc
index e2b2671061..37a70e1a99 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1651,9 +1651,12 @@ ATdo |const char*|Perl_langinfo8|const nl_item item|NULLOK utf8ness_t * utf8ness
ATdo |const char*|Perl_langinfo|const int item
ATdo |const char*|Perl_langinfo8|const int item|NULLOK utf8ness_t * utf8ness
#endif
+#ifdef WIN32
+p |bool |get_win32_message_utf8ness|NULLOK const char * string
+#endif
pEX |int |mbtowc_|NULLOK const wchar_t * pwc|NULLOK const char * s|const Size_t len
CpO |int |init_i18nl10n |int printwarn
-p |char* |my_strerror |const int errnum
+p |char* |my_strerror |const int errnum|NN utf8ness_t * utf8ness
XpT |void |_warn_problematic_locale
Xp |void |set_numeric_underlying
Xp |void |set_numeric_standard
diff --git a/embed.h b/embed.h
index f60ad4708b..a4502273cb 100644
--- a/embed.h
+++ b/embed.h
@@ -1408,7 +1408,7 @@
#define my_clearenv() Perl_my_clearenv(aTHX)
#define my_lstat_flags(a) Perl_my_lstat_flags(aTHX_ a)
#define my_stat_flags(a) Perl_my_stat_flags(aTHX_ a)
-#define my_strerror(a) Perl_my_strerror(aTHX_ a)
+#define my_strerror(a,b) Perl_my_strerror(aTHX_ a,b)
#define my_unexec() Perl_my_unexec(aTHX)
#define newATTRSUB_x(a,b,c,d,e,f) Perl_newATTRSUB_x(aTHX_ a,b,c,d,e,f)
#define newSTUB(a,b) Perl_newSTUB(aTHX_ a,b)
@@ -2081,6 +2081,9 @@
#define quadmath_format_needed Perl_quadmath_format_needed
#define quadmath_format_valid Perl_quadmath_format_valid
# endif
+# if defined(WIN32)
+#define get_win32_message_utf8ness(a) Perl_get_win32_message_utf8ness(aTHX_ a)
+# endif
# if defined(_MSC_VER)
#define magic_regdatum_set(a,b) Perl_magic_regdatum_set(aTHX_ a,b)
# endif
diff --git a/locale.c b/locale.c
index 1911daf4a6..3f0f950dca 100644
--- a/locale.c
+++ b/locale.c
@@ -2847,12 +2847,27 @@ S_get_locale_string_utf8ness_i(pTHX_ const char * locale,
return UTF8NESS_YES;
-#endif
+# endif
}
+# ifdef WIN32
+
+bool
+Perl_get_win32_message_utf8ness(pTHX_ const char * string)
+{
+ /* NULL => locale irrelevant, 0 => category irrelevant
+ * so returns based on the UTF-8 legality of the input string, ignoring the
+ * locale and category completely.
+ *
+ * This is because Windows doesn't have LC_MESSAGES */
+ return get_locale_string_utf8ness_i(NULL, 0, string, LOCALE_IS_UTF8);
+}
+
+# endif
#endif /* USE_LOCALE */
+
int
Perl_mbtowc_(pTHX_ const wchar_t * pwc, const char * s, const Size_t len)
{
@@ -6707,6 +6722,8 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
* LC_MESSAGES, it uses whatever strerror() returns. Otherwise the text is
* derived from the locale, LC_MESSAGES if we have that; LC_CTYPE if not.
*
+ * It returns in *utf8ness the result's UTF-8ness
+ *
* The function just calls strerror(), but temporarily switches locales, if
* needed. Many platforms require LC_CTYPE and LC_MESSAGES to be in the same
* CODESET in order for the return from strerror() to not contain '?' symbols,
@@ -6736,11 +6753,11 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
"my_strerror called with errnum %d;" \
" Within locale scope=%d\n", \
errnum, in_locale))
-#define DEBUG_STRERROR_RETURN(errstr) \
+#define DEBUG_STRERROR_RETURN(errstr, utf8ness) \
DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
"Strerror returned; saving a copy: '"); \
- print_bytes_for_locale(errstr, errstr + strlen(errstr), 0); \
- PerlIO_printf(Perl_debug_log, "'\n");)
+ print_bytes_for_locale(errstr, errstr + strlen(errstr), 0); \
+ PerlIO_printf(Perl_debug_log, "'; utf8ness=%d\n", (int) *utf8ness);)
/* On platforms that have precisely one of these categories (Windows
* qualifies), these yield the correct one */
@@ -6759,15 +6776,16 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
/* Here, neither category is defined: use the C locale */
char *
-Perl_my_strerror(pTHX_ const int errnum)
+Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness)
{
PERL_ARGS_ASSERT_MY_STRERROR;
DEBUG_STRERROR_ENTER(errnum, 0);
char *errstr = savepv(strerror_l(errnum, PL_C_locale_obj));
+ *utf8ness = UTF8NESS_IMMATERIAL;
- DEBUG_STRERROR_RETURN(errstr);
+ DEBUG_STRERROR_RETURN(errstr, utf8ness);
SAVEFREEPV(errstr);
return errstr;
@@ -6782,7 +6800,7 @@ Perl_my_strerror(pTHX_ const int errnum)
* locale; otherwise use the current locale object */
char *
-Perl_my_strerror(pTHX_ const int errnum)
+Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness)
{
PERL_ARGS_ASSERT_MY_STRERROR;
@@ -6794,20 +6812,20 @@ Perl_my_strerror(pTHX_ const int errnum)
: use_curlocale_scratch();
char *errstr = savepv(strerror_l(errnum, which_obj));
-
- DEBUG_STRERROR_RETURN(errstr);
+ *utf8ness = get_locale_string_utf8ness_i(NULL, WHICH_LC_INDEX, errstr,
+ LOCALE_UTF8NESS_UNKNOWN);
+ DEBUG_STRERROR_RETURN(errstr, utf8ness);
SAVEFREEPV(errstr);
return errstr;
}
/*--------------------------------------------------------------------------*/
-
# else /* Are using both categories. Place them in the same CODESET,
* either C or the LC_MESSAGES locale */
char *
-Perl_my_strerror(pTHX_ const int errnum)
+Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness)
{
PERL_ARGS_ASSERT_MY_STRERROR;
@@ -6816,6 +6834,7 @@ Perl_my_strerror(pTHX_ const int errnum)
char *errstr;
if (! IN_LC(LC_MESSAGES)) { /* Use C if not within locale scope */
errstr = savepv(strerror_l(errnum, PL_C_locale_obj));
+ *utf8ness = UTF8NESS_IMMATERIAL;
}
else { /* Otherwise, use the LC_MESSAGES locale, making sure LC_CTYPE
matches */
@@ -6823,16 +6842,17 @@ Perl_my_strerror(pTHX_ const int errnum)
cur = newlocale(LC_CTYPE_MASK, querylocale_c(LC_MESSAGES), cur);
errstr = savepv(strerror_l(errnum, cur));
+ *utf8ness = get_locale_string_utf8ness_i(NULL, LC_MESSAGES_INDEX_,
+ errstr, LOCALE_UTF8NESS_UNKNOWN);
freelocale(cur);
}
- DEBUG_STRERROR_RETURN(errstr);
+ DEBUG_STRERROR_RETURN(errstr, utf8ness);
SAVEFREEPV(errstr);
return errstr;
}
# endif /* Above is using strerror_l */
-
/*==========================================================================*/
#else /* Below is not using strerror_l */
# if ! defined(USE_LOCALE_CTYPE) && ! defined(USE_LOCALE_MESSAGES)
@@ -6841,15 +6861,16 @@ Perl_my_strerror(pTHX_ const int errnum)
* strerror */
char *
-Perl_my_strerror(pTHX_ const int errnum)
+Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness)
{
PERL_ARGS_ASSERT_MY_STRERROR;
DEBUG_STRERROR_ENTER(errnum, 0);
char *errstr = savepv(Strerror(errnum));
+ *utf8ness = UTF8NESS_IMMATERIAL;
- DEBUG_STRERROR_RETURN(errstr);
+ DEBUG_STRERROR_RETURN(errstr, utf8ness);
SAVEFREEPV(errstr);
return errstr;
@@ -6863,16 +6884,17 @@ Perl_my_strerror(pTHX_ const int errnum)
* locale; otherwise use the current locale */
char *
-Perl_my_strerror(pTHX_ const int errnum)
+Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness)
{
PERL_ARGS_ASSERT_MY_STRERROR;
DEBUG_STRERROR_ENTER(errnum, IN_LC(categories[WHICH_LC_INDEX]));
char *errstr;
-
if (IN_LC(categories[WHICH_LC_INDEX])) {
errstr = savepv(Strerror(errnum));
+ *utf8ness = get_locale_string_utf8ness_i(NULL, WHICH_LC_INDEX, errstr,
+ LOCALE_UTF8NESS_UNKNOWN);
}
else {
@@ -6885,9 +6907,12 @@ Perl_my_strerror(pTHX_ const int errnum)
restore_toggled_locale_i(WHICH_LC_INDEX, orig_locale);
SETLOCALE_UNLOCK;
+
+ *utf8ness = UTF8NESS_IMMATERIAL;
+
}
- DEBUG_STRERROR_RETURN(errstr);
+ DEBUG_STRERROR_RETURN(errstr, utf8ness);
SAVEFREEPV(errstr);
return errstr;
@@ -6900,7 +6925,7 @@ Perl_my_strerror(pTHX_ const int errnum)
* either C or the LC_MESSAGES locale */
char *
-Perl_my_strerror(pTHX_ const int errnum)
+Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness)
{
PERL_ARGS_ASSERT_MY_STRERROR;
@@ -6923,7 +6948,9 @@ Perl_my_strerror(pTHX_ const int errnum)
SETLOCALE_UNLOCK;
- DEBUG_STRERROR_RETURN(errstr);
+ *utf8ness = get_locale_string_utf8ness_i(NULL, LC_MESSAGES_INDEX_, errstr,
+ LOCALE_UTF8NESS_UNKNOWN);
+ DEBUG_STRERROR_RETURN(errstr, utf8ness);
SAVEFREEPV(errstr);
return errstr;
diff --git a/makedef.pl b/makedef.pl
index e1ccf3a733..ad2bd613ed 100644
--- a/makedef.pl
+++ b/makedef.pl
@@ -273,7 +273,7 @@ if ($ARGS{PLATFORM} ne 'vms') {
if ($ARGS{PLATFORM} ne 'win32') {
++$skip{$_} foreach qw(
- Perl_my_setlocale
+ Perl_get_win32_message_utf8ness
);
}
diff --git a/mg.c b/mg.c
index cab3b8227e..0d5a6eb341 100644
--- a/mg.c
+++ b/mg.c
@@ -773,37 +773,6 @@ S_fixup_errno_string(pTHX_ SV* sv)
if(strEQ(SvPVX(sv), "")) {
sv_catpv(sv, UNKNOWN_ERRNO_MSG);
}
- else {
-
- /* In some locales the error string may come back as UTF-8, in which
- * case we should turn on that flag. This didn't use to happen, and to
- * avoid as many possible backward compatibility issues as possible, we
- * don't turn on the flag unless we have to. So the flag stays off for
- * an entirely invariant string. We assume that if the string looks
- * like UTF-8 in a single script, it really is UTF-8: "text in any
- * other encoding that uses bytes with the high bit set is extremely
- * unlikely to pass a UTF-8 validity test"
- * (http://en.wikipedia.org/wiki/Charset_detection). There is a
- * potential that we will get it wrong however, especially on short
- * error message text, so do an additional check. */
- if ( ! IN_BYTES /* respect 'use bytes' */
- && is_utf8_non_invariant_string((U8*) SvPVX_const(sv), SvCUR(sv))
-
-#ifdef USE_LOCALE_MESSAGES
-
- && _is_cur_LC_category_utf8(LC_MESSAGES)
-
-#else /* If can't check directly, at least can see if script is consistent,
- under UTF-8, which gives us an extra measure of confidence. */
-
- && isSCRIPT_RUN((const U8 *) SvPVX_const(sv), (U8 *) SvEND(sv),
- TRUE) /* Means assume UTF-8 */
-#endif
-
- ) {
- SvUTF8_on(sv);
- }
- }
}
/*
@@ -841,11 +810,16 @@ SV *
Perl_sv_string_from_errnum(pTHX_ int errnum, SV *tgtsv)
{
char const *errstr;
+ utf8ness_t utf8ness;
+
if(!tgtsv)
tgtsv = newSV_type_mortal(SVt_PV);
- errstr = my_strerror(errnum);
+ errstr = my_strerror(errnum, &utf8ness);
if(errstr) {
sv_setpv(tgtsv, errstr);
+ if (utf8ness == UTF8NESS_YES) {
+ SvUTF8_on(tgtsv);
+ }
fixup_errno_string(tgtsv);
} else {
SvPVCLEAR(tgtsv);
@@ -925,7 +899,19 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
#elif defined(OS2)
if (!(_emx_env & 0x200)) { /* Under DOS */
sv_setnv(sv, (NV)errno);
- sv_setpv(sv, errno ? my_strerror(errno) : "");
+ if (errno) {
+ utf8ness_t utf8ness;
+ const char * errstr = my_strerror(errnum, &utf8ness);
+
+ sv_setpv(sv, errstr);
+
+ if (utf8ness == UTF8NESS_YES) {
+ SvUTF8_on(sv);
+ }
+ }
+ else {
+ SvPVCLEAR(sv);
+ }
} else {
if (errno != errno_isOS2) {
const int tmp = _syserrno();
@@ -945,6 +931,14 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
if (dwErr) {
PerlProc_GetOSError(sv, dwErr);
fixup_errno_string(sv);
+
+# ifdef USE_LOCALE
+ if ( IN_LOCALE
+ && get_win32_message_utf8ness(SvPV_nomg_const_nolen(sv)))
+ {
+ SvUTF8_on(sv);
+ }
+# endif
}
else
SvPVCLEAR(sv);
diff --git a/proto.h b/proto.h
index a4b3b8976a..9509af38fa 100644
--- a/proto.h
+++ b/proto.h
@@ -2545,9 +2545,10 @@ PERL_CALLCONV int Perl_my_socketpair(int family, int type, int protocol, int fd[
#define PERL_ARGS_ASSERT_MY_STAT
PERL_CALLCONV I32 Perl_my_stat_flags(pTHX_ const U32 flags);
#define PERL_ARGS_ASSERT_MY_STAT_FLAGS
-PERL_CALLCONV char* Perl_my_strerror(pTHX_ const int errnum)
+PERL_CALLCONV char* Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness)
__attribute__visibility__("hidden");
-#define PERL_ARGS_ASSERT_MY_STRERROR
+#define PERL_ARGS_ASSERT_MY_STRERROR \
+ assert(utf8ness)
PERL_CALLCONV char * Perl_my_strftime(pTHX_ const char *fmt, int sec, int min, int hour, int mday, int mon, int year, int wday, int yday, int isdst)
__attribute__format__(__strftime__,pTHX_1,0);
@@ -7625,6 +7626,10 @@ PERL_CALLCONV bool Perl_quadmath_format_valid(const char* format)
#endif
#if defined(WIN32)
+PERL_CALLCONV bool Perl_get_win32_message_utf8ness(pTHX_ const char * string)
+ __attribute__visibility__("hidden");
+#define PERL_ARGS_ASSERT_GET_WIN32_MESSAGE_UTF8NESS
+
PERL_CALLCONV_NO_RET void win32_croak_not_implemented(const char * fname)
__attribute__noreturn__;
#define PERL_ARGS_ASSERT_WIN32_CROAK_NOT_IMPLEMENTED \