diff options
author | Karl Williamson <khw@cpan.org> | 2017-09-07 15:21:56 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2017-09-09 21:27:45 -0600 |
commit | f741678155ebcc9639c420c23996e89e67bb0a4b (patch) | |
tree | c98c06d56883a0e9f9fece4dd2ee66a2a518b4d8 | |
parent | 97a3682bccec0fd02cc1de1c9897bf23545ccf9c (diff) | |
download | perl-f741678155ebcc9639c420c23996e89e67bb0a4b.tar.gz |
Add API function Perl_langinfo()
This is designed to generally replace nl_langinfo() in XS code. It is
thread-safer, hides the quirks of perl's LC_NUMERIC handling, and can be
used on systems lacking nl_langinfo.
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | embed.fnc | 21 | ||||
-rw-r--r-- | embed.h | 33 | ||||
-rw-r--r-- | embedvar.h | 2 | ||||
-rw-r--r-- | ext/XS-APItest/APItest.pm | 2 | ||||
-rw-r--r-- | ext/XS-APItest/APItest.xs | 7 | ||||
-rw-r--r-- | ext/XS-APItest/t/locale.t | 106 | ||||
-rw-r--r-- | intrpvar.h | 3 | ||||
-rw-r--r-- | locale.c | 599 | ||||
-rw-r--r-- | perl.c | 5 | ||||
-rw-r--r-- | perl_langinfo.h | 297 | ||||
-rw-r--r-- | pod/perldelta.pod | 8 | ||||
-rw-r--r-- | proto.h | 63 | ||||
-rw-r--r-- | sv.c | 3 |
14 files changed, 1096 insertions, 54 deletions
@@ -4896,6 +4896,7 @@ parser.h parser object header patchlevel.h The current patch level of perl perl.c main() perl.h Global declarations +perl_langinfo.h Perl's version of <langinfo.h> perlapi.c Perl API functions perlapi.h Perl API function declarations perldtrace.d D script for Perl probes @@ -1259,6 +1259,11 @@ ApdO |HV* |get_hv |NN const char *name|I32 flags ApdO |CV* |get_cv |NN const char* name|I32 flags Apd |CV* |get_cvn_flags |NN const char* name|STRLEN len|I32 flags EXnpo |char* |setlocale |int category|NULLOK const char* locale +#if defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H) +Ando |const char*|Perl_langinfo|const nl_item item +#else +Ando |const char*|Perl_langinfo|const int item +#endif ApOM |int |init_i18nl10n |int printwarn ApOM |int |init_i18nl14n |int printwarn p |char* |my_strerror |const int errnum @@ -2718,15 +2723,20 @@ s |bool |isa_lookup |NN HV *stash|NN const char * const name \ |STRLEN len|U32 flags #endif -#if defined(USE_LOCALE) && defined(PERL_IN_LOCALE_C) +#if defined(PERL_IN_LOCALE_C) +in |const char *|save_to_buffer|NN const char * string \ + |NULLOK char **buf \ + |NN Size_t *buf_size \ + |const Size_t offset +# if defined(USE_LOCALE) s |char* |stdize_locale |NN char* locs s |void |new_collate |NULLOK const char* newcoll s |void |new_ctype |NN const char* newctype s |void |set_numeric_radix -#ifdef WIN32 +# ifdef WIN32 s |char* |my_setlocale |int category|NULLOK const char* locale -#endif -# ifdef DEBUGGING +# endif +# ifdef DEBUGGING s |void |print_collxfrm_input_and_return \ |NN const char * const s \ |NN const char * const e \ @@ -2738,7 +2748,8 @@ s |void |print_bytes_for_locale |NN const char * const s \ snR |char * |setlocale_debug_string |const int category \ |NULLOK const char* const locale \ |NULLOK const char* const retval -# endif +# endif +# endif #endif #if defined(USE_LOCALE) \ @@ -1478,6 +1478,13 @@ # if defined(DEBUGGING) #define get_debug_opts(a,b) Perl_get_debug_opts(aTHX_ a,b) #define set_padlist Perl_set_padlist +# if defined(PERL_IN_LOCALE_C) +# if defined(USE_LOCALE) +#define print_bytes_for_locale(a,b,c) S_print_bytes_for_locale(aTHX_ a,b,c) +#define print_collxfrm_input_and_return(a,b,c,d) S_print_collxfrm_input_and_return(aTHX_ a,b,c,d) +#define setlocale_debug_string S_setlocale_debug_string +# endif +# endif # if defined(PERL_IN_PAD_C) #define cv_dump(a,b) S_cv_dump(aTHX_ a,b) # endif @@ -1488,11 +1495,6 @@ #define printbuf(a,b) S_printbuf(aTHX_ a,b) #define tokereport(a,b) S_tokereport(aTHX_ a,b) # endif -# if defined(USE_LOCALE) && defined(PERL_IN_LOCALE_C) -#define print_bytes_for_locale(a,b,c) S_print_bytes_for_locale(aTHX_ a,b,c) -#define print_collxfrm_input_and_return(a,b,c,d) S_print_collxfrm_input_and_return(aTHX_ a,b,c,d) -#define setlocale_debug_string S_setlocale_debug_string -# endif # endif # if defined(DEBUG_LEAKING_SCALARS_FORK_DUMP) #define dump_sv_child(a) Perl_dump_sv_child(aTHX_ a) @@ -1591,6 +1593,18 @@ #define share_hek_flags(a,b,c,d) S_share_hek_flags(aTHX_ a,b,c,d) #define unshare_hek_or_pvn(a,b,c,d) S_unshare_hek_or_pvn(aTHX_ a,b,c,d) # endif +# if defined(PERL_IN_LOCALE_C) +#define save_to_buffer S_save_to_buffer +# if defined(USE_LOCALE) +#define new_collate(a) S_new_collate(aTHX_ a) +#define new_ctype(a) S_new_ctype(aTHX_ a) +#define set_numeric_radix() S_set_numeric_radix(aTHX) +#define stdize_locale(a) S_stdize_locale(aTHX_ a) +# if defined(WIN32) +#define my_setlocale(a,b) S_my_setlocale(aTHX_ a,b) +# endif +# endif +# endif # if defined(PERL_IN_LOCALE_C) || defined(PERL_IN_SV_C) || defined(PERL_IN_MATHOMS_C) # if defined(USE_LOCALE_COLLATE) #define _mem_collxfrm(a,b,c,d) Perl__mem_collxfrm(aTHX_ a,b,c,d) @@ -1882,15 +1896,6 @@ #define padname_dup(a,b) Perl_padname_dup(aTHX_ a,b) #define padnamelist_dup(a,b) Perl_padnamelist_dup(aTHX_ a,b) # endif -# if defined(USE_LOCALE) && defined(PERL_IN_LOCALE_C) -#define new_collate(a) S_new_collate(aTHX_ a) -#define new_ctype(a) S_new_ctype(aTHX_ a) -#define set_numeric_radix() S_set_numeric_radix(aTHX) -#define stdize_locale(a) S_stdize_locale(aTHX_ a) -# if defined(WIN32) -#define my_setlocale(a,b) S_my_setlocale(aTHX_ a,b) -# endif -# endif # if defined(USE_LOCALE_COLLATE) #define magic_setcollxfrm(a,b) Perl_magic_setcollxfrm(aTHX_ a,b) #ifndef NO_MATHOMS diff --git a/embedvar.h b/embedvar.h index 7d284b894b..8b9842f9cc 100644 --- a/embedvar.h +++ b/embedvar.h @@ -176,6 +176,8 @@ #define PL_inplace (vTHX->Iinplace) #define PL_isarev (vTHX->Iisarev) #define PL_known_layers (vTHX->Iknown_layers) +#define PL_langinfo_buf (vTHX->Ilanginfo_buf) +#define PL_langinfo_bufsize (vTHX->Ilanginfo_bufsize) #define PL_last_in_gv (vTHX->Ilast_in_gv) #define PL_last_swash_hv (vTHX->Ilast_swash_hv) #define PL_last_swash_key (vTHX->Ilast_swash_key) diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index 796605f7c0..d4edcac51a 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -5,7 +5,7 @@ use strict; use warnings; use Carp; -our $VERSION = '0.91'; +our $VERSION = '0.92'; require XSLoader; diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 7a18bbf291..e9a55b4030 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -6544,6 +6544,13 @@ test_Gconvert(SV * number, SV * num_digits) OUTPUT: RETVAL +SV * +test_Perl_langinfo(SV * item) + CODE: + RETVAL = newSVpv(Perl_langinfo(SvIV(item)), 0); + OUTPUT: + RETVAL + MODULE = XS::APItest PACKAGE = XS::APItest::Backrefs void diff --git a/ext/XS-APItest/t/locale.t b/ext/XS-APItest/t/locale.t index be594b0804..08c16a8499 100644 --- a/ext/XS-APItest/t/locale.t +++ b/ext/XS-APItest/t/locale.t @@ -22,9 +22,6 @@ for (@locales) { } } -skip_all("no non-dot radix locales available") unless $non_dot_locale; - -plan tests => 2; SKIP: { if ($Config{usequadmath}) { @@ -34,3 +31,106 @@ SKIP: { use locale; is(test_Gconvert(4.179, 2), "4.2", "Gconvert doesn't recognize underlying locale inside 'use locale'"); } + +my %correct_C_responses = ( + # Commented out entries are ones which there is room for variation + ABDAY_1 => 'Sun', + ABDAY_2 => 'Mon', + ABDAY_3 => 'Tue', + ABDAY_4 => 'Wed', + ABDAY_5 => 'Thu', + ABDAY_6 => 'Fri', + ABDAY_7 => 'Sat', + ABMON_1 => 'Jan', + ABMON_10 => 'Oct', + ABMON_11 => 'Nov', + ABMON_12 => 'Dec', + ABMON_2 => 'Feb', + ABMON_3 => 'Mar', + ABMON_4 => 'Apr', + ABMON_5 => 'May', + ABMON_6 => 'Jun', + ABMON_7 => 'Jul', + ABMON_8 => 'Aug', + ABMON_9 => 'Sep', + ALT_DIGITS => '', + AM_STR => 'AM', + #CODESET => 'ANSI_X3.4-1968', + #CRNCYSTR => '-', + DAY_1 => 'Sunday', + DAY_2 => 'Monday', + DAY_3 => 'Tuesday', + DAY_4 => 'Wednesday', + DAY_5 => 'Thursday', + DAY_6 => 'Friday', + DAY_7 => 'Saturday', + #D_FMT => '%m/%d/%y', + #D_T_FMT => '%a %b %e %H:%M:%S %Y', + ERA => '', + #ERA_D_FMT => '', + #ERA_D_T_FMT => '', + #ERA_T_FMT => '', + MON_1 => 'January', + MON_10 => 'October', + MON_11 => 'November', + MON_12 => 'December', + MON_2 => 'February', + MON_3 => 'March', + MON_4 => 'April', + MON_5 => 'May', + MON_6 => 'June', + MON_7 => 'July', + MON_8 => 'August', + MON_9 => 'September', + #NOEXPR => '^[nN]', + PM_STR => 'PM', + RADIXCHAR => '.', + THOUSEP => '', + #T_FMT => '%H:%M:%S', + #T_FMT_AMPM => '%I:%M:%S %p', + #YESEXPR => '^[yY]', + ); + +my $hdr = "../../perl_langinfo.h"; +open my $fh, "<", $hdr; +$|=1; + +SKIP: { + skip "No LC_ALL", 1 unless find_locales( &LC_ALL ); + + use POSIX; + setlocale(LC_ALL, "C"); + eval "use I18N::Langinfo qw(langinfo RADIXCHAR); langinfo(RADIXCHAR)"; + my $has_nl_langinfo = $@ eq ""; + + skip "Can't open $hdr for reading: $!", 1 unless $fh; + + my %items; + + # Find all the current items from the header, and their values. + # For non-nl_langinfo systems, those values are arbitrary negative numbers + # set in the header. Otherwise they are the nl_langinfo approved values, + # which for the moment is the item name. + while (<$fh>) { + chomp; + next unless / - \d+ $ /x; + s/ ^ .* PERL_//x; + m/ (.*) \ (.*) /x; + $items{$1} = ($has_nl_langinfo) + ? $1 + : $2; + } + + # Get the translation from item name to numeric value. + I18N::Langinfo->import(keys %items) if $has_nl_langinfo; + + foreach my $item (sort keys %items) { + my $result = test_Perl_langinfo(eval $items{$item}); + if (exists $correct_C_responses{$item}) { + is ($result, $correct_C_responses{$item}, + "Returns expected value for $item"); + } + } +} + +done_testing(); diff --git a/intrpvar.h b/intrpvar.h index e2468bf3fe..b6b20bcad9 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -588,6 +588,9 @@ PERLVARI(I, collation_standard, bool, TRUE) /* Assume simple collation */ #endif /* USE_LOCALE_COLLATE */ +PERLVARI(I, langinfo_buf, char *, NULL) +PERLVARI(I, langinfo_bufsize, Size_t, 0) + #ifdef PERL_SAWAMPERSAND PERLVAR(I, sawampersand, U8) /* must save all match strings */ #endif @@ -36,12 +36,9 @@ #include "EXTERN.h" #define PERL_IN_LOCALE_C +#include "perl_langinfo.h" #include "perl.h" -#ifdef I_LANGINFO -# include <langinfo.h> -#endif - #include "reentr.h" /* If the environment says to, we can output debugging information during @@ -1022,6 +1019,598 @@ Perl_setlocale(int category, const char * locale) return retval; + +} + +PERL_STATIC_INLINE const char * +S_save_to_buffer(const char * string, char **buf, Size_t *buf_size, const Size_t offset) +{ + /* Copy the NUL-terminated 'string' to 'buf' + 'offset'. 'buf' has size 'buf_size', + * growing it if necessary */ + + const Size_t string_size = strlen(string) + offset + 1; + + PERL_ARGS_ASSERT_SAVE_TO_BUFFER; + + if (*buf_size == 0) { + Newx(*buf, string_size, char); + *buf_size = string_size; + } + else if (string_size > *buf_size) { + Renew(*buf, string_size, char); + *buf_size = string_size; + } + + Copy(string, *buf + offset, string_size - offset, char); + return *buf; +} + +/* + +=head1 Locale-related functions and macros + +=for apidoc Perl_langinfo + +This is an (almostÂȘ) drop-in replacement for the system C<L<nl_langinfo(3)>>, +taking the same C<item> parameter values, and returning the same information. +But it is more thread-safe than regular C<nl_langinfo()>, and hides the quirks +of Perl's locale handling from your code, and can be used on systems that lack +a native C<nl_langinfo>. + +Expanding on these: + +=over + +=item * + +It delivers the correct results for the C<RADIXCHAR> and C<THOUSESEP> items, +without you having to write extra code. The reason for the extra code would be +because these are from the C<LC_NUMERIC> locale category, which is normally +kept set to the C locale by Perl, no matter what the underlying locale is +supposed to be, and so to get the expected results, you have to temporarily +toggle into the underlying locale, and later toggle back. (You could use +plain C<nl_langinfo> and C<L</STORE_LC_NUMERIC_FORCE_TO_UNDERLYING>> for this +but then you wouldn't get the other advantages of C<Perl_langinfo()>; not +keeping C<LC_NUMERIC> in the C locale would break a lot of CPAN, which is +expecting the radix (decimal point) character to be a dot.) + +=item * + +Depending on C<item>, it works on systems that don't have C<nl_langinfo>, hence +makes your code more portable. Of the fifty-some possible items specified by +the POSIX 2008 standard, +L<http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/langinfo.h.html>, +only two are completely unimplemented. It uses various techniques to recover +the other items, including calling C<L<localeconv(3)>>, and C<L<strftime(3)>>, +both of which are specified in C89, so should be always be available. Later +C<strftime()> versions have additional capabilities; C<""> is returned for +those not available on your system. + +The details for those items which may differ from what this emulation returns +and what a native C<nl_langinfo()> would return are: + +=over + +=item C<CODESET> + +=item C<ERA> + +Unimplemented, so returns C<"">. + +=item C<YESEXPR> + +=item C<NOEXPR> + +Only the values for English are returned. Earlier POSIX standards also +specified C<YESSTR> and C<NOSTR>, but these have been removed from POSIX 2008, +and aren't supported by C<Perl_langinfo>. + +=item C<D_FMT> + +Always evaluates to C<%x>, the locale's appropriate date representation. + +=item C<T_FMT> + +Always evaluates to C<%X>, the locale's appropriate time representation. + +=item C<D_T_FMT> + +Always evaluates to C<%c>, the locale's appropriate date and time +representation. + +=item C<CRNCYSTR> + +The return may be incorrect for those rare locales where the currency symbol +replaces the radix character. +Send email to L<mailto:perlbug@perl.org> if you have examples of it needing +to work differently. + +=item C<ALT_DIGITS> + +Currently this gives the same results as Linux does. +Send email to L<mailto:perlbug@perl.org> if you have examples of it needing +to work differently. + +=item C<ERA_D_FMT> + +=item C<ERA_T_FMT> + +=item C<ERA_D_T_FMT> + +=item C<T_FMT_AMPM> + +These are derived by using C<strftime()>, and not all versions of that function +know about them. C<""> is returned for these on such systems. + +=back + +When using C<Perl_langinfo> on systems that don't have a native +C<nl_langinfo()>, you must + + #include "perl_langinfo.h" + +before the C<perl.h> C<#include>. You can replace your C<langinfo.h> +C<#include> with this one. (Doing it this way keeps out the symbols that plain +C<langinfo.h> imports into the namespace for code that doesn't need it.) + +You also should not use the bare C<langinfo.h> item names, but should preface +them with C<PERL_>, so use C<PERL_RADIXCHAR> instead of plain C<RADIXCHAR>. +The C<PERL_I<foo>> versions will also work for this function on systems that do +have a native C<nl_langinfo>. + +=item * + +It is thread-friendly, returning its result in a buffer that won't be +overwritten by another thread, so you don't have to code for that possibility. +The buffer can be overwritten by the next call to C<nl_langinfo> or +C<Perl_langinfo> in the same thread. + +=item * + +ÂȘIt returns S<C<const char *>>, whereas plain C<nl_langinfo()> returns S<C<char +*>>, but you are (only by documentation) forbidden to write into the buffer. +By declaring this C<const>, the compiler enforces this restriction. The extra +C<const> is why this isn't an unequivocal drop-in replacement for +C<nl_langinfo>. + +=back + +The original impetus for C<Perl_langinfo()> was so that code that needs to +find out the current currency symbol, floating point radix character, or digit +grouping separator can use, on all systems, the simpler and more +thread-friendly C<nl_langinfo> API instead of C<L<localeconv(3)>> which is a +pain to make thread-friendly. For other fields returned by C<localeconv>, it +is better to use the methods given in L<perlcall> to call +L<C<POSIX::localeconv()>|POSIX/localeconv>, which is thread-friendly. + +=cut + +*/ + +const char * +#ifdef HAS_NL_LANGINFO +Perl_langinfo(const nl_item item) +#else +Perl_langinfo(const int item) +#endif +{ + bool toggle = TRUE; + +#if defined(HAS_NL_LANGINFO) +# if ! defined(USE_ITHREADS) + + /* Single-thread, and nl_langinfo() is available. Call it, switching to + * underlying LC_NUMERIC for those items dependent on it */ + + const char * retval; + + if (toggle) { + if (item == PERL_RADIXCHAR || item == PERL_THOUSEP) { + setlocale(LC_NUMERIC, PL_numeric_name); + } + else { + toggle = FALSE; + } + } + + retval = nl_langinfo(item); + + if (toggle) { + setlocale(LC_NUMERIC, "C"); + } + + return retval; + + +# else + + /* Multi-threaded, with native nl_langinfo(). Use it, copying result to + * per-thread buffer, and toggling LC_NUMERIC if necessary, all within a + * crtical section */ + + dTHX; + + LOCALE_LOCK; + + if (toggle) { + if (item == PERL_RADIXCHAR || item == PERL_THOUSEP) { + setlocale(LC_NUMERIC, PL_numeric_name); + } + else { + toggle = FALSE; + } + } + + save_to_buffer(nl_langinfo(item), &PL_langinfo_buf, &PL_langinfo_bufsize, 0); + + if (toggle) { + setlocale(LC_NUMERIC, "C"); + } + + LOCALE_UNLOCK; + + return PL_langinfo_buf; + +# endif +#else /* Below, emulate nl_langinfo as best we can */ + + dTHX; + +# ifdef HAS_LOCALECONV + + const struct lconv* lc; + +# endif +# ifdef HAS_STRFTIME + + struct tm tm; + bool return_format = FALSE; /* Return the %format, not the value */ + const char * format; + +# endif + + /* We copy the results to a per-thread buffer, even if not multi-threaded. + * This is in part to simplify this code, and partly because we need a + * buffer anyway for strftime(), and partly because a call of localeconv() + * could otherwise wipe out the buffer, and the programmer would not be + * expecting this, as this is a nl_langinfo() substitute after all, so s/he + * might be thinking their localeconv() is safe until another localeconv() + * call. */ + + switch (item) { + Size_t len; + const char * retval; + + /* These 2 are unimplemented */ + case PERL_CODESET: + case PERL_ERA: /* For use with strftime() %E modifier */ + + default: + return ""; + + /* We use only an English set, since we don't know any more */ + case PERL_YESEXPR: return "^[+1yY]"; + case PERL_NOEXPR: return "^[-0nN]"; + +# ifdef HAS_LOCALECONV + + case PERL_CRNCYSTR: + + LOCALE_LOCK; + + lc = localeconv(); + if (! lc || ! lc->currency_symbol || strEQ("", lc->currency_symbol)) + { + LOCALE_UNLOCK; + return ""; + } + + /* Leave the first spot empty to be filled in below */ + save_to_buffer(lc->currency_symbol, &PL_langinfo_buf, + &PL_langinfo_bufsize, 1); + if (lc->mon_decimal_point && strEQ(lc->mon_decimal_point, "")) + { /* khw couldn't figure out how the localedef specifications + would show that the $ should replace the radix; this is + just a guess as to how it might work.*/ + *PL_langinfo_buf = '.'; + } + else if (lc->p_cs_precedes) { + *PL_langinfo_buf = '-'; + } + else { + *PL_langinfo_buf = '+'; + } + + LOCALE_UNLOCK; + break; + + case PERL_RADIXCHAR: + case PERL_THOUSEP: + + LOCALE_LOCK; + + if (toggle) { + setlocale(LC_NUMERIC, PL_numeric_name); + } + + lc = localeconv(); + if (! lc) { + retval = ""; + } + else switch (item) { + case PERL_RADIXCHAR: + if (! lc->decimal_point) { + retval = ""; + } + else { + retval = lc->decimal_point; + } + break; + + case PERL_THOUSEP: + if (! lc->thousands_sep || strEQ("", lc->thousands_sep)) { + retval = ""; + } + else { + retval = lc->thousands_sep; + } + break; + + default: + LOCALE_UNLOCK; + Perl_croak(aTHX_ "panic: %s: %d: switch case: %d problem", + __FILE__, __LINE__, item); + } + + save_to_buffer(retval, &PL_langinfo_buf, &PL_langinfo_bufsize, 0); + + if (toggle) { + setlocale(LC_NUMERIC, "C"); + } + + LOCALE_UNLOCK; + + break; + +# endif +# ifdef HAS_STRFTIME + + /* These are defined by C89, so we assume that strftime supports them, + * and so are returned unconditionally; they may not be what the locale + * actually says, but should give good enough results for someone using + * them as formats (as opposed to trying to parse them to figure out + * what the locale says). The other format ones are actually tested to + * verify they work on the platform */ + case PERL_D_FMT: return "%x"; + case PERL_T_FMT: return "%X"; + case PERL_D_T_FMT: return "%c"; + + /* These formats are only available in later strfmtime's */ + case PERL_ERA_D_FMT: case PERL_ERA_T_FMT: case PERL_ERA_D_T_FMT: + case PERL_T_FMT_AMPM: + + /* The rest can be gotten from most versions of strftime(). */ + case PERL_ABDAY_1: case PERL_ABDAY_2: case PERL_ABDAY_3: + case PERL_ABDAY_4: case PERL_ABDAY_5: case PERL_ABDAY_6: + case PERL_ABDAY_7: + case PERL_ALT_DIGITS: + case PERL_AM_STR: case PERL_PM_STR: + case PERL_ABMON_1: case PERL_ABMON_2: case PERL_ABMON_3: + case PERL_ABMON_4: case PERL_ABMON_5: case PERL_ABMON_6: + case PERL_ABMON_7: case PERL_ABMON_8: case PERL_ABMON_9: + case PERL_ABMON_10: case PERL_ABMON_11: case PERL_ABMON_12: + case PERL_DAY_1: case PERL_DAY_2: case PERL_DAY_3: case PERL_DAY_4: + case PERL_DAY_5: case PERL_DAY_6: case PERL_DAY_7: + case PERL_MON_1: case PERL_MON_2: case PERL_MON_3: case PERL_MON_4: + case PERL_MON_5: case PERL_MON_6: case PERL_MON_7: case PERL_MON_8: + case PERL_MON_9: case PERL_MON_10: case PERL_MON_11: case PERL_MON_12: + + LOCALE_LOCK; + + init_tm(&tm); /* Precaution against core dumps */ + tm.tm_sec = 30; + tm.tm_min = 30; + tm.tm_hour = 6; + tm.tm_year = 2017 - 1900; + tm.tm_wday = 0; + tm.tm_mon = 0; + switch (item) { + default: + LOCALE_UNLOCK; + Perl_croak(aTHX_ "panic: %s: %d: switch case: %d problem", + __FILE__, __LINE__, item); + NOT_REACHED; /* NOTREACHED */ + + case PERL_PM_STR: tm.tm_hour = 18; + case PERL_AM_STR: + format = "%p"; + break; + + case PERL_ABDAY_7: tm.tm_wday++; + case PERL_ABDAY_6: tm.tm_wday++; + case PERL_ABDAY_5: tm.tm_wday++; + case PERL_ABDAY_4: tm.tm_wday++; + case PERL_ABDAY_3: tm.tm_wday++; + case PERL_ABDAY_2: tm.tm_wday++; + case PERL_ABDAY_1: + format = "%a"; + break; + + case PERL_DAY_7: tm.tm_wday++; + case PERL_DAY_6: tm.tm_wday++; + case PERL_DAY_5: tm.tm_wday++; + case PERL_DAY_4: tm.tm_wday++; + case PERL_DAY_3: tm.tm_wday++; + case PERL_DAY_2: tm.tm_wday++; + case PERL_DAY_1: + format = "%A"; + break; + + case PERL_ABMON_12: tm.tm_mon++; + case PERL_ABMON_11: tm.tm_mon++; + case PERL_ABMON_10: tm.tm_mon++; + case PERL_ABMON_9: tm.tm_mon++; + case PERL_ABMON_8: tm.tm_mon++; + case PERL_ABMON_7: tm.tm_mon++; + case PERL_ABMON_6: tm.tm_mon++; + case PERL_ABMON_5: tm.tm_mon++; + case PERL_ABMON_4: tm.tm_mon++; + case PERL_ABMON_3: tm.tm_mon++; + case PERL_ABMON_2: tm.tm_mon++; + case PERL_ABMON_1: + format = "%b"; + break; + + case PERL_MON_12: tm.tm_mon++; + case PERL_MON_11: tm.tm_mon++; + case PERL_MON_10: tm.tm_mon++; + case PERL_MON_9: tm.tm_mon++; + case PERL_MON_8: tm.tm_mon++; + case PERL_MON_7: tm.tm_mon++; + case PERL_MON_6: tm.tm_mon++; + case PERL_MON_5: tm.tm_mon++; + case PERL_MON_4: tm.tm_mon++; + case PERL_MON_3: tm.tm_mon++; + case PERL_MON_2: tm.tm_mon++; + case PERL_MON_1: + format = "%B"; + break; + + case PERL_T_FMT_AMPM: + format = "%r"; + return_format = TRUE; + break; + + case PERL_ERA_D_FMT: + format = "%Ex"; + return_format = TRUE; + break; + + case PERL_ERA_T_FMT: + format = "%EX"; + return_format = TRUE; + break; + + case PERL_ERA_D_T_FMT: + format = "%Ec"; + return_format = TRUE; + break; + + case PERL_ALT_DIGITS: + tm.tm_wday = 0; + format = "%Ow"; /* Find the alternate digit for 0 */ + break; + } + + /* We can't use my_strftime() because it doesn't look at tm_wday */ + while (0 == strftime(PL_langinfo_buf, PL_langinfo_bufsize, + format, &tm)) + { + /* A zero return means one of: + * a) there wasn't enough space in PL_langinfo_buf + * b) the format, like a plain %p, returns empty + * c) it was an illegal format, though some implementations of + * strftime will just return the illegal format as a plain + * character sequence. + * + * To quickly test for case 'b)', try again but precede the + * format with a plain character. If that result is still + * empty, the problem is either 'a)' or 'c)' */ + + Size_t format_size = strlen(format) + 1; + Size_t mod_size = format_size + 1; + char * mod_format; + char * temp_result; + + Newx(mod_format, mod_size, char); + Newx(temp_result, PL_langinfo_bufsize, char); + *mod_format = '\a'; + my_strlcpy(mod_format + 1, format, mod_size); + len = strftime(temp_result, + PL_langinfo_bufsize, + mod_format, &tm); + Safefree(mod_format); + Safefree(temp_result); + + /* If 'len' is non-zero, it means that we had a case like %p + * which means the current locale doesn't use a.m. or p.m., and + * that is valid */ + if (len == 0) { + + /* Here, still didn't work. If we get well beyond a + * reasonable size, bail out to prevent an infinite loop. */ + + if (PL_langinfo_bufsize > 100 * format_size) { + *PL_langinfo_buf = '\0'; + } + else { /* Double the buffer size to retry; Add 1 in case + original was 0, so we aren't stuck at 0. */ + PL_langinfo_bufsize *= 2; + PL_langinfo_bufsize++; + Renew(PL_langinfo_buf, PL_langinfo_bufsize, char); + continue; + } + } + + break; + } + + /* Here, we got a result. + * + * If the item is 'ALT_DIGITS', PL_langinfo_buf contains the + * alternate format for wday 0. If the value is the same as the + * normal 0, there isn't an alternate, so clear the buffer. */ + if ( item == PERL_ALT_DIGITS + && strEQ(PL_langinfo_buf, "0")) + { + *PL_langinfo_buf = '\0'; + } + + /* ALT_DIGITS is problematic. Experiments on it showed that + * strftime() did not always work properly when going from alt-9 to + * alt-10. Only a few locales have this item defined, and in all + * of them on Linux that khw was able to find, nl_langinfo() merely + * returned the alt-0 character, possibly doubled. Most Unicode + * digits are in blocks of 10 consecutive code points, so that is + * sufficient information for those scripts, as we can infer alt-1, + * alt-2, .... But for a Japanese locale, a CJK ideographic 0 is + * returned, and the CJK digits are not in code point order, so you + * can't really infer anything. The localedef for this locale did + * specify the succeeding digits, so that strftime() works properly + * on them, without needing to infer anything. But the + * nl_langinfo() return did not give sufficient information for the + * caller to understand what's going on. So until there is + * evidence that it should work differently, this returns the alt-0 + * string for ALT_DIGITS. + * + * wday was chosen because its range is all a single digit. Things + * like tm_sec have two digits as the minimum: '00' */ + + LOCALE_UNLOCK; + + /* If to return the format, not the value, overwrite the buffer + * with it. But some strftime()s will keep the original format if + * illegal, so change those to "" */ + if (return_format) { + if (strEQ(PL_langinfo_buf, format)) { + *PL_langinfo_buf = '\0'; + } + else { + save_to_buffer(format, &PL_langinfo_buf, + &PL_langinfo_bufsize, 0); + } + } + + break; + +# endif + + } + + return PL_langinfo_buf; + +#endif + } /* @@ -2858,8 +3447,6 @@ Perl_my_strerror(pTHX_ const int errnum) /* -=head1 Locale-related functions and macros - =for apidoc sync_locale Changing the program's locale should be avoided by XS code. Nevertheless, @@ -1115,6 +1115,11 @@ perl_destruct(pTHXx) PL_numeric_radix_sv = NULL; #endif + if (PL_langinfo_buf) { + Safefree(PL_langinfo_buf); + PL_langinfo_buf = NULL; + } + /* clear character classes */ for (i = 0; i < POSIX_SWASH_COUNT; i++) { SvREFCNT_dec(PL_utf8_swash_ptrs[i]); diff --git a/perl_langinfo.h b/perl_langinfo.h new file mode 100644 index 0000000000..a93874f8ab --- /dev/null +++ b/perl_langinfo.h @@ -0,0 +1,297 @@ +/* Replaces <langinfo.h>, and allows our code to work on systems that don't + * have that. */ + +#ifndef PERL_LANGINFO_H +#define PERL_LANGINFO_H 1 + +#include "config.h" + +#if defined(HAS_NL_LANGINFO) && defined(I_LANGINFO) +# include <langinfo.h> +#endif + +/* NOTE that this file is parsed by ext/XS-APItest/t/locale.t, so be careful + * with changes */ + +/* Define PERL_foo to 'foo' if it exists; a negative number otherwise. The + * negatives are to minimize the possibility of collisions on platforms that + * define some but not all of these item names (though each name is required by + * the 2008 POSIX specification) */ + +#ifdef CODESET +# define PERL_CODESET CODESET +#else +# define PERL_CODESET -1 +#endif +#ifdef D_T_FMT +# define PERL_D_T_FMT D_T_FMT +#else +# define PERL_D_T_FMT -2 +#endif +#ifdef D_FMT +# define PERL_D_FMT D_FMT +#else +# define PERL_D_FMT -3 +#endif +#ifdef T_FMT +# define PERL_T_FMT T_FMT +#else +# define PERL_T_FMT -4 +#endif +#ifdef T_FMT_AMPM +# define PERL_T_FMT_AMPM T_FMT_AMPM +#else +# define PERL_T_FMT_AMPM -5 +#endif +#ifdef AM_STR +# define PERL_AM_STR AM_STR +#else +# define PERL_AM_STR -6 +#endif +#ifdef PM_STR +# define PERL_PM_STR PM_STR +#else +# define PERL_PM_STR -7 +#endif +#ifdef DAY_1 +# define PERL_DAY_1 DAY_1 +#else +# define PERL_DAY_1 -8 +#endif +#ifdef DAY_2 +# define PERL_DAY_2 DAY_2 +#else +# define PERL_DAY_2 -9 +#endif +#ifdef DAY_3 +# define PERL_DAY_3 DAY_3 +#else +# define PERL_DAY_3 -10 +#endif +#ifdef DAY_4 +# define PERL_DAY_4 DAY_4 +#else +# define PERL_DAY_4 -11 +#endif +#ifdef DAY_5 +# define PERL_DAY_5 DAY_5 +#else +# define PERL_DAY_5 -12 +#endif +#ifdef DAY_6 +# define PERL_DAY_6 DAY_6 +#else +# define PERL_DAY_6 -13 +#endif +#ifdef DAY_7 +# define PERL_DAY_7 DAY_7 +#else +# define PERL_DAY_7 -14 +#endif +#ifdef ABDAY_1 +# define PERL_ABDAY_1 ABDAY_1 +#else +# define PERL_ABDAY_1 -15 +#endif +#ifdef ABDAY_2 +# define PERL_ABDAY_2 ABDAY_2 +#else +# define PERL_ABDAY_2 -16 +#endif +#ifdef ABDAY_3 +# define PERL_ABDAY_3 ABDAY_3 +#else +# define PERL_ABDAY_3 -17 +#endif +#ifdef ABDAY_4 +# define PERL_ABDAY_4 ABDAY_4 +#else +# define PERL_ABDAY_4 -18 +#endif +#ifdef ABDAY_5 +# define PERL_ABDAY_5 ABDAY_5 +#else +# define PERL_ABDAY_5 -19 +#endif +#ifdef ABDAY_6 +# define PERL_ABDAY_6 ABDAY_6 +#else +# define PERL_ABDAY_6 -20 +#endif +#ifdef ABDAY_7 +# define PERL_ABDAY_7 ABDAY_7 +#else +# define PERL_ABDAY_7 -21 +#endif +#ifdef MON_1 +# define PERL_MON_1 MON_1 +#else +# define PERL_MON_1 -22 +#endif +#ifdef MON_2 +# define PERL_MON_2 MON_2 +#else +# define PERL_MON_2 -23 +#endif +#ifdef MON_3 +# define PERL_MON_3 MON_3 +#else +# define PERL_MON_3 -24 +#endif +#ifdef MON_4 +# define PERL_MON_4 MON_4 +#else +# define PERL_MON_4 -25 +#endif +#ifdef MON_5 +# define PERL_MON_5 MON_5 +#else +# define PERL_MON_5 -26 +#endif +#ifdef MON_6 +# define PERL_MON_6 MON_6 +#else +# define PERL_MON_6 -27 +#endif +#ifdef MON_7 +# define PERL_MON_7 MON_7 +#else +# define PERL_MON_7 -28 +#endif +#ifdef MON_8 +# define PERL_MON_8 MON_8 +#else +# define PERL_MON_8 -29 +#endif +#ifdef MON_9 +# define PERL_MON_9 MON_9 +#else +# define PERL_MON_9 -30 +#endif +#ifdef MON_10 +# define PERL_MON_10 MON_10 +#else +# define PERL_MON_10 -31 +#endif +#ifdef MON_11 +# define PERL_MON_11 MON_11 +#else +# define PERL_MON_11 -32 +#endif +#ifdef MON_12 +# define PERL_MON_12 MON_12 +#else +# define PERL_MON_12 -33 +#endif +#ifdef ABMON_1 +# define PERL_ABMON_1 ABMON_1 +#else +# define PERL_ABMON_1 -34 +#endif +#ifdef ABMON_2 +# define PERL_ABMON_2 ABMON_2 +#else +# define PERL_ABMON_2 -35 +#endif +#ifdef ABMON_3 +# define PERL_ABMON_3 ABMON_3 +#else +# define PERL_ABMON_3 -36 +#endif +#ifdef ABMON_4 +# define PERL_ABMON_4 ABMON_4 +#else +# define PERL_ABMON_4 -37 +#endif +#ifdef ABMON_5 +# define PERL_ABMON_5 ABMON_5 +#else +# define PERL_ABMON_5 -38 +#endif +#ifdef ABMON_6 +# define PERL_ABMON_6 ABMON_6 +#else +# define PERL_ABMON_6 -39 +#endif +#ifdef ABMON_7 +# define PERL_ABMON_7 ABMON_7 +#else +# define PERL_ABMON_7 -40 +#endif +#ifdef ABMON_8 +# define PERL_ABMON_8 ABMON_8 +#else +# define PERL_ABMON_8 -41 +#endif +#ifdef ABMON_9 +# define PERL_ABMON_9 ABMON_9 +#else +# define PERL_ABMON_9 -42 +#endif +#ifdef ABMON_10 +# define PERL_ABMON_10 ABMON_10 +#else +# define PERL_ABMON_10 -43 +#endif +#ifdef ABMON_11 +# define PERL_ABMON_11 ABMON_11 +#else +# define PERL_ABMON_11 -44 +#endif +#ifdef ABMON_12 +# define PERL_ABMON_12 ABMON_12 +#else +# define PERL_ABMON_12 -45 +#endif +#ifdef ERA +# define PERL_ERA ERA +#else +# define PERL_ERA -46 +#endif +#ifdef ERA_D_FMT +# define PERL_ERA_D_FMT ERA_D_FMT +#else +# define PERL_ERA_D_FMT -47 +#endif +#ifdef ERA_D_T_FMT +# define PERL_ERA_D_T_FMT ERA_D_T_FMT +#else +# define PERL_ERA_D_T_FMT -48 +#endif +#ifdef ERA_T_FMT +# define PERL_ERA_T_FMT ERA_T_FMT +#else +# define PERL_ERA_T_FMT -49 +#endif +#ifdef ALT_DIGITS +# define PERL_ALT_DIGITS ALT_DIGITS +#else +# define PERL_ALT_DIGITS -50 +#endif +#ifdef RADIXCHAR +# define PERL_RADIXCHAR RADIXCHAR +#else +# define PERL_RADIXCHAR -51 +#endif +#ifdef THOUSEP +# define PERL_THOUSEP THOUSEP +#else +# define PERL_THOUSEP -52 +#endif +#ifdef YESEXPR +# define PERL_YESEXPR YESEXPR +#else +# define PERL_YESEXPR -53 +#endif +#ifdef NOEXPR +# define PERL_NOEXPR NOEXPR +#else +# define PERL_NOEXPR -54 +#endif +#ifdef CRNCYSTR +# define PERL_CRNCYSTR CRNCYSTR +#else +# define PERL_CRNCYSTR -55 +#endif + +#endif diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 0ee5556286..0db7df45a2 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -346,7 +346,13 @@ well. =item * -XXX +A new function, L<C<Perl_langinfo()>|perlapi/Perl_langinfo> has been +added. It is an (almost) drop-in replacement for the system +C<nl_langinfo(3)>, but works on platforms that lack that; as well as +being more thread-safe, and hiding some gotchas with locale handling +from the caller. Code that uses this, needn't use L<C<localeconv(3)>> +(and be affected by the gotchas) to find the decimal point, thousands +separator, or currency symbol. See L<perlapi/Perl_langinfo>. =back @@ -3793,6 +3793,9 @@ PERL_CALLCONV char* Perl_ninstr(const char* big, const char* bigend, const char* assert(big); assert(bigend); assert(little); assert(lend) #endif +#if !(defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H)) +PERL_CALLCONV const char* Perl_langinfo(const int item); +#endif #if !(defined(HAS_SIGACTION) && defined(SA_SIGINFO)) PERL_CALLCONV Signal_t Perl_csighandler(int sig); PERL_CALLCONV Signal_t Perl_sighandler(int sig); @@ -4111,6 +4114,19 @@ PERL_CALLCONV SV* Perl_pad_sv(pTHX_ PADOFFSET po); PERL_CALLCONV void Perl_set_padlist(CV * cv, PADLIST * padlist); #define PERL_ARGS_ASSERT_SET_PADLIST \ assert(cv) +# if defined(PERL_IN_LOCALE_C) +# if defined(USE_LOCALE) +STATIC void S_print_bytes_for_locale(pTHX_ const char * const s, const char * const e, const bool is_utf8); +#define PERL_ARGS_ASSERT_PRINT_BYTES_FOR_LOCALE \ + assert(s); assert(e) +STATIC void S_print_collxfrm_input_and_return(pTHX_ const char * const s, const char * const e, const STRLEN * const xlen, const bool is_utf8); +#define PERL_ARGS_ASSERT_PRINT_COLLXFRM_INPUT_AND_RETURN \ + assert(s); assert(e) +STATIC char * S_setlocale_debug_string(const int category, const char* const locale, const char* const retval) + __attribute__warn_unused_result__; + +# endif +# endif # if defined(PERL_IN_PAD_C) STATIC void S_cv_dump(pTHX_ const CV *cv, const char *title); #define PERL_ARGS_ASSERT_CV_DUMP \ @@ -4179,17 +4195,6 @@ STATIC int S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp); #define PERL_ARGS_ASSERT_TOKEREPORT \ assert(lvalp) # endif -# if defined(USE_LOCALE) && defined(PERL_IN_LOCALE_C) -STATIC void S_print_bytes_for_locale(pTHX_ const char * const s, const char * const e, const bool is_utf8); -#define PERL_ARGS_ASSERT_PRINT_BYTES_FOR_LOCALE \ - assert(s); assert(e) -STATIC void S_print_collxfrm_input_and_return(pTHX_ const char * const s, const char * const e, const STRLEN * const xlen, const bool is_utf8); -#define PERL_ARGS_ASSERT_PRINT_COLLXFRM_INPUT_AND_RETURN \ - assert(s); assert(e) -STATIC char * S_setlocale_debug_string(const int category, const char* const locale, const char* const retval) - __attribute__warn_unused_result__; - -# endif #endif #if defined(DEBUGGING) && defined(ENABLE_REGEX_SETS_DEBUGGING) # if defined(PERL_IN_REGCOMP_C) @@ -4231,6 +4236,9 @@ PERL_CALLCONV I32 Perl_do_shmio(pTHX_ I32 optype, SV** mark, SV** sp); #define PERL_ARGS_ASSERT_DO_SHMIO \ assert(mark); assert(sp) #endif +#if defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H) +PERL_CALLCONV const char* Perl_langinfo(const nl_item item); +#endif #if defined(HAS_SIGACTION) && defined(SA_SIGINFO) PERL_CALLCONV Signal_t Perl_csighandler(int sig, siginfo_t *info, void *uap); PERL_CALLCONV Signal_t Perl_sighandler(int sig, siginfo_t *info, void *uap); @@ -4606,6 +4614,26 @@ PERL_CALLCONV SV* Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp); #define PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY \ assert(hv); assert(indexp) #endif +#if defined(PERL_IN_LOCALE_C) +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE const char * S_save_to_buffer(const char * string, char **buf, Size_t *buf_size, const Size_t offset); +#define PERL_ARGS_ASSERT_SAVE_TO_BUFFER \ + assert(string); assert(buf_size) +#endif +# if defined(USE_LOCALE) +STATIC void S_new_collate(pTHX_ const char* newcoll); +STATIC void S_new_ctype(pTHX_ const char* newctype); +#define PERL_ARGS_ASSERT_NEW_CTYPE \ + assert(newctype) +STATIC void S_set_numeric_radix(pTHX); +STATIC char* S_stdize_locale(pTHX_ char* locs); +#define PERL_ARGS_ASSERT_STDIZE_LOCALE \ + assert(locs) +# if defined(WIN32) +STATIC char* S_my_setlocale(pTHX_ int category, const char* locale); +# endif +# endif +#endif #if defined(PERL_IN_LOCALE_C) || defined(PERL_IN_SV_C) || defined(PERL_IN_MATHOMS_C) # if defined(USE_LOCALE_COLLATE) PERL_CALLCONV char* Perl__mem_collxfrm(pTHX_ const char* input_string, STRLEN len, STRLEN* xlen, bool utf8); @@ -6058,19 +6086,6 @@ PERL_CALLCONV SV* Perl_sv_dup_inc(pTHX_ const SV *const sstr, CLONE_PARAMS *cons #if defined(USE_LOCALE) && ( defined(PERL_IN_LOCALE_C) || defined(PERL_IN_MG_C) || defined (PERL_EXT_POSIX)) PERL_CALLCONV bool Perl__is_cur_LC_category_utf8(pTHX_ int category); #endif -#if defined(USE_LOCALE) && defined(PERL_IN_LOCALE_C) -STATIC void S_new_collate(pTHX_ const char* newcoll); -STATIC void S_new_ctype(pTHX_ const char* newctype); -#define PERL_ARGS_ASSERT_NEW_CTYPE \ - assert(newctype) -STATIC void S_set_numeric_radix(pTHX); -STATIC char* S_stdize_locale(pTHX_ char* locs); -#define PERL_ARGS_ASSERT_STDIZE_LOCALE \ - assert(locs) -# if defined(WIN32) -STATIC char* S_my_setlocale(pTHX_ int category, const char* locale); -# endif -#endif #if defined(USE_LOCALE_COLLATE) PERL_CALLCONV int Perl_magic_setcollxfrm(pTHX_ SV* sv, MAGIC* mg); #define PERL_ARGS_ASSERT_MAGIC_SETCOLLXFRM \ @@ -15660,6 +15660,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_numeric_radix_sv = sv_dup_inc(proto_perl->Inumeric_radix_sv, param); #endif /* !USE_LOCALE_NUMERIC */ + PL_langinfo_buf = NULL; + PL_langinfo_bufsize = 0; + /* Unicode inversion lists */ PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param); PL_UpperLatin1 = sv_dup_inc(proto_perl->IUpperLatin1, param); |