diff options
-rw-r--r-- | embed.fnc | 5 | ||||
-rw-r--r-- | embed.h | 5 | ||||
-rw-r--r-- | locale.c | 67 | ||||
-rw-r--r-- | makedef.pl | 2 | ||||
-rw-r--r-- | mg.c | 60 | ||||
-rw-r--r-- | proto.h | 9 |
6 files changed, 90 insertions, 58 deletions
@@ -1651,9 +1651,12 @@ ATdo |const char*|Perl_langinfo8|const nl_item item|NULLOK utf8ness_t * utf8ness ATdo |const char*|Perl_langinfo|const int item ATdo |const char*|Perl_langinfo8|const int item|NULLOK utf8ness_t * utf8ness #endif +#ifdef WIN32 +p |bool |get_win32_message_utf8ness|NULLOK const char * string +#endif pEX |int |mbtowc_|NULLOK const wchar_t * pwc|NULLOK const char * s|const Size_t len CpO |int |init_i18nl10n |int printwarn -p |char* |my_strerror |const int errnum +p |char* |my_strerror |const int errnum|NN utf8ness_t * utf8ness XpT |void |_warn_problematic_locale Xp |void |set_numeric_underlying Xp |void |set_numeric_standard @@ -1408,7 +1408,7 @@ #define my_clearenv() Perl_my_clearenv(aTHX) #define my_lstat_flags(a) Perl_my_lstat_flags(aTHX_ a) #define my_stat_flags(a) Perl_my_stat_flags(aTHX_ a) -#define my_strerror(a) Perl_my_strerror(aTHX_ a) +#define my_strerror(a,b) Perl_my_strerror(aTHX_ a,b) #define my_unexec() Perl_my_unexec(aTHX) #define newATTRSUB_x(a,b,c,d,e,f) Perl_newATTRSUB_x(aTHX_ a,b,c,d,e,f) #define newSTUB(a,b) Perl_newSTUB(aTHX_ a,b) @@ -2081,6 +2081,9 @@ #define quadmath_format_needed Perl_quadmath_format_needed #define quadmath_format_valid Perl_quadmath_format_valid # endif +# if defined(WIN32) +#define get_win32_message_utf8ness(a) Perl_get_win32_message_utf8ness(aTHX_ a) +# endif # if defined(_MSC_VER) #define magic_regdatum_set(a,b) Perl_magic_regdatum_set(aTHX_ a,b) # endif @@ -2847,12 +2847,27 @@ S_get_locale_string_utf8ness_i(pTHX_ const char * locale, return UTF8NESS_YES; -#endif +# endif } +# ifdef WIN32 + +bool +Perl_get_win32_message_utf8ness(pTHX_ const char * string) +{ + /* NULL => locale irrelevant, 0 => category irrelevant + * so returns based on the UTF-8 legality of the input string, ignoring the + * locale and category completely. + * + * This is because Windows doesn't have LC_MESSAGES */ + return get_locale_string_utf8ness_i(NULL, 0, string, LOCALE_IS_UTF8); +} + +# endif #endif /* USE_LOCALE */ + int Perl_mbtowc_(pTHX_ const wchar_t * pwc, const char * s, const Size_t len) { @@ -6707,6 +6722,8 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) * LC_MESSAGES, it uses whatever strerror() returns. Otherwise the text is * derived from the locale, LC_MESSAGES if we have that; LC_CTYPE if not. * + * It returns in *utf8ness the result's UTF-8ness + * * The function just calls strerror(), but temporarily switches locales, if * needed. Many platforms require LC_CTYPE and LC_MESSAGES to be in the same * CODESET in order for the return from strerror() to not contain '?' symbols, @@ -6736,11 +6753,11 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) "my_strerror called with errnum %d;" \ " Within locale scope=%d\n", \ errnum, in_locale)) -#define DEBUG_STRERROR_RETURN(errstr) \ +#define DEBUG_STRERROR_RETURN(errstr, utf8ness) \ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ "Strerror returned; saving a copy: '"); \ - print_bytes_for_locale(errstr, errstr + strlen(errstr), 0); \ - PerlIO_printf(Perl_debug_log, "'\n");) + print_bytes_for_locale(errstr, errstr + strlen(errstr), 0); \ + PerlIO_printf(Perl_debug_log, "'; utf8ness=%d\n", (int) *utf8ness);) /* On platforms that have precisely one of these categories (Windows * qualifies), these yield the correct one */ @@ -6759,15 +6776,16 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category) /* Here, neither category is defined: use the C locale */ char * -Perl_my_strerror(pTHX_ const int errnum) +Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) { PERL_ARGS_ASSERT_MY_STRERROR; DEBUG_STRERROR_ENTER(errnum, 0); char *errstr = savepv(strerror_l(errnum, PL_C_locale_obj)); + *utf8ness = UTF8NESS_IMMATERIAL; - DEBUG_STRERROR_RETURN(errstr); + DEBUG_STRERROR_RETURN(errstr, utf8ness); SAVEFREEPV(errstr); return errstr; @@ -6782,7 +6800,7 @@ Perl_my_strerror(pTHX_ const int errnum) * locale; otherwise use the current locale object */ char * -Perl_my_strerror(pTHX_ const int errnum) +Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) { PERL_ARGS_ASSERT_MY_STRERROR; @@ -6794,20 +6812,20 @@ Perl_my_strerror(pTHX_ const int errnum) : use_curlocale_scratch(); char *errstr = savepv(strerror_l(errnum, which_obj)); - - DEBUG_STRERROR_RETURN(errstr); + *utf8ness = get_locale_string_utf8ness_i(NULL, WHICH_LC_INDEX, errstr, + LOCALE_UTF8NESS_UNKNOWN); + DEBUG_STRERROR_RETURN(errstr, utf8ness); SAVEFREEPV(errstr); return errstr; } /*--------------------------------------------------------------------------*/ - # else /* Are using both categories. Place them in the same CODESET, * either C or the LC_MESSAGES locale */ char * -Perl_my_strerror(pTHX_ const int errnum) +Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) { PERL_ARGS_ASSERT_MY_STRERROR; @@ -6816,6 +6834,7 @@ Perl_my_strerror(pTHX_ const int errnum) char *errstr; if (! IN_LC(LC_MESSAGES)) { /* Use C if not within locale scope */ errstr = savepv(strerror_l(errnum, PL_C_locale_obj)); + *utf8ness = UTF8NESS_IMMATERIAL; } else { /* Otherwise, use the LC_MESSAGES locale, making sure LC_CTYPE matches */ @@ -6823,16 +6842,17 @@ Perl_my_strerror(pTHX_ const int errnum) cur = newlocale(LC_CTYPE_MASK, querylocale_c(LC_MESSAGES), cur); errstr = savepv(strerror_l(errnum, cur)); + *utf8ness = get_locale_string_utf8ness_i(NULL, LC_MESSAGES_INDEX_, + errstr, LOCALE_UTF8NESS_UNKNOWN); freelocale(cur); } - DEBUG_STRERROR_RETURN(errstr); + DEBUG_STRERROR_RETURN(errstr, utf8ness); SAVEFREEPV(errstr); return errstr; } # endif /* Above is using strerror_l */ - /*==========================================================================*/ #else /* Below is not using strerror_l */ # if ! defined(USE_LOCALE_CTYPE) && ! defined(USE_LOCALE_MESSAGES) @@ -6841,15 +6861,16 @@ Perl_my_strerror(pTHX_ const int errnum) * strerror */ char * -Perl_my_strerror(pTHX_ const int errnum) +Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) { PERL_ARGS_ASSERT_MY_STRERROR; DEBUG_STRERROR_ENTER(errnum, 0); char *errstr = savepv(Strerror(errnum)); + *utf8ness = UTF8NESS_IMMATERIAL; - DEBUG_STRERROR_RETURN(errstr); + DEBUG_STRERROR_RETURN(errstr, utf8ness); SAVEFREEPV(errstr); return errstr; @@ -6863,16 +6884,17 @@ Perl_my_strerror(pTHX_ const int errnum) * locale; otherwise use the current locale */ char * -Perl_my_strerror(pTHX_ const int errnum) +Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) { PERL_ARGS_ASSERT_MY_STRERROR; DEBUG_STRERROR_ENTER(errnum, IN_LC(categories[WHICH_LC_INDEX])); char *errstr; - if (IN_LC(categories[WHICH_LC_INDEX])) { errstr = savepv(Strerror(errnum)); + *utf8ness = get_locale_string_utf8ness_i(NULL, WHICH_LC_INDEX, errstr, + LOCALE_UTF8NESS_UNKNOWN); } else { @@ -6885,9 +6907,12 @@ Perl_my_strerror(pTHX_ const int errnum) restore_toggled_locale_i(WHICH_LC_INDEX, orig_locale); SETLOCALE_UNLOCK; + + *utf8ness = UTF8NESS_IMMATERIAL; + } - DEBUG_STRERROR_RETURN(errstr); + DEBUG_STRERROR_RETURN(errstr, utf8ness); SAVEFREEPV(errstr); return errstr; @@ -6900,7 +6925,7 @@ Perl_my_strerror(pTHX_ const int errnum) * either C or the LC_MESSAGES locale */ char * -Perl_my_strerror(pTHX_ const int errnum) +Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) { PERL_ARGS_ASSERT_MY_STRERROR; @@ -6923,7 +6948,9 @@ Perl_my_strerror(pTHX_ const int errnum) SETLOCALE_UNLOCK; - DEBUG_STRERROR_RETURN(errstr); + *utf8ness = get_locale_string_utf8ness_i(NULL, LC_MESSAGES_INDEX_, errstr, + LOCALE_UTF8NESS_UNKNOWN); + DEBUG_STRERROR_RETURN(errstr, utf8ness); SAVEFREEPV(errstr); return errstr; diff --git a/makedef.pl b/makedef.pl index e1ccf3a733..ad2bd613ed 100644 --- a/makedef.pl +++ b/makedef.pl @@ -273,7 +273,7 @@ if ($ARGS{PLATFORM} ne 'vms') { if ($ARGS{PLATFORM} ne 'win32') { ++$skip{$_} foreach qw( - Perl_my_setlocale + Perl_get_win32_message_utf8ness ); } @@ -773,37 +773,6 @@ S_fixup_errno_string(pTHX_ SV* sv) if(strEQ(SvPVX(sv), "")) { sv_catpv(sv, UNKNOWN_ERRNO_MSG); } - else { - - /* In some locales the error string may come back as UTF-8, in which - * case we should turn on that flag. This didn't use to happen, and to - * avoid as many possible backward compatibility issues as possible, we - * don't turn on the flag unless we have to. So the flag stays off for - * an entirely invariant string. We assume that if the string looks - * like UTF-8 in a single script, it really is UTF-8: "text in any - * other encoding that uses bytes with the high bit set is extremely - * unlikely to pass a UTF-8 validity test" - * (http://en.wikipedia.org/wiki/Charset_detection). There is a - * potential that we will get it wrong however, especially on short - * error message text, so do an additional check. */ - if ( ! IN_BYTES /* respect 'use bytes' */ - && is_utf8_non_invariant_string((U8*) SvPVX_const(sv), SvCUR(sv)) - -#ifdef USE_LOCALE_MESSAGES - - && _is_cur_LC_category_utf8(LC_MESSAGES) - -#else /* If can't check directly, at least can see if script is consistent, - under UTF-8, which gives us an extra measure of confidence. */ - - && isSCRIPT_RUN((const U8 *) SvPVX_const(sv), (U8 *) SvEND(sv), - TRUE) /* Means assume UTF-8 */ -#endif - - ) { - SvUTF8_on(sv); - } - } } /* @@ -841,11 +810,16 @@ SV * Perl_sv_string_from_errnum(pTHX_ int errnum, SV *tgtsv) { char const *errstr; + utf8ness_t utf8ness; + if(!tgtsv) tgtsv = newSV_type_mortal(SVt_PV); - errstr = my_strerror(errnum); + errstr = my_strerror(errnum, &utf8ness); if(errstr) { sv_setpv(tgtsv, errstr); + if (utf8ness == UTF8NESS_YES) { + SvUTF8_on(tgtsv); + } fixup_errno_string(tgtsv); } else { SvPVCLEAR(tgtsv); @@ -925,7 +899,19 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) #elif defined(OS2) if (!(_emx_env & 0x200)) { /* Under DOS */ sv_setnv(sv, (NV)errno); - sv_setpv(sv, errno ? my_strerror(errno) : ""); + if (errno) { + utf8ness_t utf8ness; + const char * errstr = my_strerror(errnum, &utf8ness); + + sv_setpv(sv, errstr); + + if (utf8ness == UTF8NESS_YES) { + SvUTF8_on(sv); + } + } + else { + SvPVCLEAR(sv); + } } else { if (errno != errno_isOS2) { const int tmp = _syserrno(); @@ -945,6 +931,14 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) if (dwErr) { PerlProc_GetOSError(sv, dwErr); fixup_errno_string(sv); + +# ifdef USE_LOCALE + if ( IN_LOCALE + && get_win32_message_utf8ness(SvPV_nomg_const_nolen(sv))) + { + SvUTF8_on(sv); + } +# endif } else SvPVCLEAR(sv); @@ -2545,9 +2545,10 @@ PERL_CALLCONV int Perl_my_socketpair(int family, int type, int protocol, int fd[ #define PERL_ARGS_ASSERT_MY_STAT PERL_CALLCONV I32 Perl_my_stat_flags(pTHX_ const U32 flags); #define PERL_ARGS_ASSERT_MY_STAT_FLAGS -PERL_CALLCONV char* Perl_my_strerror(pTHX_ const int errnum) +PERL_CALLCONV char* Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) __attribute__visibility__("hidden"); -#define PERL_ARGS_ASSERT_MY_STRERROR +#define PERL_ARGS_ASSERT_MY_STRERROR \ + assert(utf8ness) PERL_CALLCONV char * Perl_my_strftime(pTHX_ const char *fmt, int sec, int min, int hour, int mday, int mon, int year, int wday, int yday, int isdst) __attribute__format__(__strftime__,pTHX_1,0); @@ -7625,6 +7626,10 @@ PERL_CALLCONV bool Perl_quadmath_format_valid(const char* format) #endif #if defined(WIN32) +PERL_CALLCONV bool Perl_get_win32_message_utf8ness(pTHX_ const char * string) + __attribute__visibility__("hidden"); +#define PERL_ARGS_ASSERT_GET_WIN32_MESSAGE_UTF8NESS + PERL_CALLCONV_NO_RET void win32_croak_not_implemented(const char * fname) __attribute__noreturn__; #define PERL_ARGS_ASSERT_WIN32_CROAK_NOT_IMPLEMENTED \ |