summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-02-18 15:45:08 -0700
committerKarl Williamson <khw@cpan.org>2018-02-18 15:45:08 -0700
commit6de18f4499a45f0d6876f2aca180b8a9f06e9240 (patch)
tree0434de0a4716d75d3c39d85601ca69347ba670e8
parent9aac5db886d0626569524a0be2a769ebb8078307 (diff)
parent3a01ed59fb40348bc99b7f6b26d1f06dbd909223 (diff)
downloadperl-6de18f4499a45f0d6876f2aca180b8a9f06e9240.tar.gz
Merge branch 'add thread-safe locales' into blead
-rw-r--r--dist/ExtUtils-ParseXS/lib/perlxs.pod120
-rw-r--r--dist/threads/lib/threads.pm29
-rw-r--r--dist/threads/threads.xs4
-rw-r--r--embed.fnc13
-rw-r--r--embed.h10
-rw-r--r--embedvar.h4
-rw-r--r--ext/POSIX/lib/POSIX.pod1
-rw-r--r--intrpvar.h9
-rw-r--r--lib/locale_threads.t73
-rw-r--r--locale.c1193
-rw-r--r--makedef.pl2
-rw-r--r--perl.c9
-rw-r--r--perl.h229
-rw-r--r--perlapi.h2
-rw-r--r--perlvars.h1
-rw-r--r--pod/perldelta.pod27
-rw-r--r--pod/perldiag.pod8
-rw-r--r--pod/perlembed.pod5
-rw-r--r--pod/perllocale.pod138
-rw-r--r--pod/perlvar.pod6
-rw-r--r--proto.h10
-rw-r--r--sv.c8
-rw-r--r--t/porting/known_pod_issues.dat1
-rw-r--r--vutil.c140
-rw-r--r--vutil.h29
25 files changed, 1850 insertions, 221 deletions
diff --git a/dist/ExtUtils-ParseXS/lib/perlxs.pod b/dist/ExtUtils-ParseXS/lib/perlxs.pod
index 78297c276c..1419ee0ddf 100644
--- a/dist/ExtUtils-ParseXS/lib/perlxs.pod
+++ b/dist/ExtUtils-ParseXS/lib/perlxs.pod
@@ -2195,7 +2195,7 @@ To summarize, here's what to expect and how to handle locales in XS code:
=item Non-locale-aware XS code
Keep in mind that even if you think your code is not locale-aware, it
-may call a C library function that is. Hopefully the man page for such
+may call a library function that is. Hopefully the man page for such
a function will indicate that dependency, but the documentation is
imperfect.
@@ -2223,20 +2223,122 @@ handled.
If the locale from the user's environment is desired, there should be no
need for XS code to set the locale except for C<LC_NUMERIC>, as perl has
-already set it up. XS code should avoid changing the locale, as it can
-adversely affect other, unrelated, code and may not be thread safe.
-However, some alien libraries that may be called do set it, such as
-C<Gtk>. This can cause problems for the perl core and other modules.
-Starting in v5.20.1, calling the function
+already set the others up. XS code should avoid changing the locale, as
+it can adversely affect other, unrelated, code and may not be
+thread-safe. To minimize problems, the macros
+L<perlapi/STORE_LC_NUMERIC_SET_TO_NEEDED>,
+L<perlapi/STORE_LC_NUMERIC_FORCE_TO_UNDERLYING>, and
+L<perlapi/RESTORE_LC_NUMERIC> should be used to affect any needed
+change.
+
+But, starting with Perl v5.28, locales are thread-safe on platforms that
+support this functionality. Windows has this starting with Visual
+Studio 2005. Many other modern platforms support the thread-safe POSIX
+2008 functions. The C C<#define> C<USE_THREAD_SAFE_LOCALE> will be
+defined iff this build is using these. From Perl-space, the read-only
+variable C<${SAFE_LOCALES}> is 1 if either the build is not threaded, or
+if C<USE_THREAD_SAFE_LOCALE> is defined; otherwise it is 0.
+
+The way this works under-the-hood is that every thread has a choice of
+using a locale specific to it (this is the Windows and POSIX 2008
+functionality), or the global locale that is accessible to all threads
+(this is the functionality that has always been there). The
+implementations for Windows and POSIX are completely different. On
+Windows, the runtime can be set up so that the standard
+L<C<setlocale(3)>> function either only knows about the global locale or
+the locale for this thread. On POSIX, C<setlocale> always deals with
+the global locale, and other functions have been created to handle
+per-thread locales. Perl makes this transparent to perl-space code. It
+continues to use C<POSIX::setlocale()>, and the interpreter translates
+that into the per-thread functions.
+
+All other locale-senstive functions automatically use the per-thread
+locale, if that is turned on, and failing that, the global locale. Thus
+calls to C<setlocale> are ineffective on POSIX systems for the current
+thread if that thread is using a per-thread locale. If perl is compiled
+for single-thread operation, it does not use the per-thread functions,
+so C<setlocale> does work as expected.
+
+If you have loaded the L<C<POSIX>> module you can use the methods given
+in L<perlcall> to call L<C<POSIX::setlocale>|POSIX/setlocale> to safely
+change or query the locale (on systems where it is safe to do so), or
+you can use the new 5.28 function L<perlapi/Perl_setlocale> instead,
+which is a drop-in replacement for the system L<C<setlocale(3)>>, and
+handles single-threaded and multi-threaded applications transparently.
+
+There are some locale-related library calls that still aren't
+thread-safe because they return data in a buffer global to all threads.
+In the past, these didn't matter as locales weren't thread-safe at all.
+But now you have to be aware of them in case your module is called in a
+multi-threaded application. The known ones are
+
+ asctime()
+ ctime()
+ gcvt() [POSIX.1-2001 only (function removed in POSIX.1-2008)]
+ getdate()
+ wcrtomb() if its final argument is NULL
+ wcsrtombs() if its final argument is NULL
+ wcstombs()
+ wctomb()
+
+Some of these shouldn't really be called in a Perl application, and for
+others there are thread-safe versions of these already implemented:
+
+ asctime_r()
+ ctime_r()
+ Perl_langinfo()
+
+The C<_r> forms are automatically used, starting in Perl 5.28, if you
+compile your code, with
+
+ #define PERL_REENTRANT
+
+See also L<perlapi/Perl_langinfo>.
+You can use the methods given in L<perlcall>, to get the best available
+locale-safe versions of these
+
+ POSIX::localeconv()
+ POSIX::wcstombs()
+ POSIX::wctomb()
+
+And note, that some items returned by C<Localeconv> are available
+through L<perlapi/Perl_langinfo>.
+
+The others shouldn't be used in a threaded application.
+
+Some modules may call a non-perl library that is locale-aware. This is
+fine as long as it doesn't try to query or change the locale using the
+system C<setlocale>. But if these do call the system C<setlocale>,
+those calls may be ineffective. Instead,
+L<C<Perl_setlocale>|perlapi/Perl_setlocale> works in all circumstances.
+Plain setlocale is ineffective on multi-threaded POSIX 2008 systems. It
+operates only on the global locale, whereas each thread has its own
+locale, paying no attention to the global one. Since converting
+these non-Perl libraries to C<Perl_setlocale> is out of the question,
+there is a new function in v5.28
+L<C<switch_to_global_locale>|perlapi/switch_to_global_locale> that will
+switch the thread it is called from so that any system C<setlocale>
+calls will have their desired effect. The function
+L<C<sync_locale>|perlapi/sync_locale> must be called before returning to
+perl.
+
+This thread can change the locale all it wants and it won't affect any
+other thread, except any that also have been switched to the global
+locale. This means that a multi-threaded application can have a single
+thread using an alien library without a problem; but no more than a
+single thread can be so-occupied. Bad results likely will happen.
+
+In perls without multi-thread locale support, some alien libraries,
+such as C<Gtk> change locales. This can cause problems for the Perl
+core and other modules. For these, before control is returned to
+perl, starting in v5.20.1, calling the function
L<sync_locale()|perlapi/sync_locale> from XS should be sufficient to
avoid most of these problems. Prior to this, you need a pure Perl
statement that does this:
POSIX::setlocale(LC_ALL, POSIX::setlocale(LC_ALL));
-In the event that your XS code may need the underlying C<LC_NUMERIC>
-locale, there are macros available to access this; see
-L<perlapi/Locale-related functions and macros>.
+or use the methods given in L<perlcall>.
=back
diff --git a/dist/threads/lib/threads.pm b/dist/threads/lib/threads.pm
index 2eb926a071..1b99567ef2 100644
--- a/dist/threads/lib/threads.pm
+++ b/dist/threads/lib/threads.pm
@@ -5,7 +5,7 @@ use 5.008;
use strict;
use warnings;
-our $VERSION = '2.21'; # remember to update version in POD!
+our $VERSION = '2.22'; # remember to update version in POD!
my $XS_VERSION = $VERSION;
$VERSION = eval $VERSION;
@@ -937,6 +937,33 @@ C<chdir()>) will affect all the threads in the application.
On MSWin32, each thread maintains its own the current working directory
setting.
+=item Locales
+
+Prior to Perl 5.28, locales could not be used with threads, due to various
+race conditions. Starting in that release, on systems that implement
+thread-safe locale functions, threads can be used, with some caveats.
+This includes Windows starting with Visual Studio 2005, and systems compatible
+with POSIX 2008. See L<perllocale/Multi-threaded operation>.
+
+Each thread (except the main thread) is started using the C locale. The main
+thread is started like all other Perl programs; see L<perllocale/ENVIRONMENT>.
+You can switch locales in any thread as often as you like.
+
+If you want to inherit the parent thread's locale, you can, in the parent, set
+a variable like so:
+
+ $foo = POSIX::setlocale(LC_ALL, NULL);
+
+and then pass to threads->create() a sub that closes over C<$foo>. Then, in
+the child, you say
+
+ POSIX::setlocale(LC_ALL, $foo);
+
+Or you can use the facilities in L<threads::shared> to pass C<$foo>;
+or if the environment hasn't changed, in the child, do
+
+ POSIX::setlocale(LC_ALL, "");
+
=item Environment variables
Currently, on all platforms except MSWin32, all I<system> calls (e.g., using
diff --git a/dist/threads/threads.xs b/dist/threads/threads.xs
index 4e9e31fdeb..3da9165c27 100644
--- a/dist/threads/threads.xs
+++ b/dist/threads/threads.xs
@@ -580,6 +580,8 @@ S_ithread_run(void * arg)
S_set_sigmask(&thread->initial_sigmask);
#endif
+ thread_locale_init();
+
PL_perl_destruct_level = 2;
{
@@ -665,6 +667,8 @@ S_ithread_run(void * arg)
MUTEX_UNLOCK(&thread->mutex);
MUTEX_UNLOCK(&MY_POOL.create_destruct_mutex);
+ thread_locale_term();
+
/* Exit application if required */
if (exit_app) {
(void)S_jmpenv_run(aTHX_ 2, thread, NULL, &exit_app, &exit_code);
diff --git a/embed.fnc b/embed.fnc
index 80035a8fdc..e748639e1a 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1306,11 +1306,13 @@ ApOM |int |init_i18nl10n |int printwarn
ApOM |int |init_i18nl14n |int printwarn
p |char* |my_strerror |const int errnum
Xpn |void |_warn_problematic_locale
-p |void |new_numeric |NULLOK const char* newcoll
Xp |void |set_numeric_underlying
Xp |void |set_numeric_standard
Xp |bool |_is_in_locale_category|const bool compiling|const int category
-Apd |void |sync_locale
+Apdn |void |switch_to_global_locale
+Apdn |bool |sync_locale
+ApMn |void |thread_locale_init
+ApMn |void |thread_locale_term
ApdO |void |require_pv |NN const char* pv
Apd |void |pack_cat |NN SV *cat|NN const char *pat|NN const char *patend \
|NN SV **beglist|NN SV **endlist|NN SV ***next_in_list|U32 flags
@@ -2796,6 +2798,13 @@ s |char* |stdize_locale |NN char* locs
s |void |new_collate |NULLOK const char* newcoll
s |void |new_ctype |NN const char* newctype
s |void |set_numeric_radix|const bool use_locale
+s |void |new_numeric |NULLOK const char* newnum
+# ifdef USE_POSIX_2008_LOCALE
+sn |const char*|emulate_setlocale|const int category \
+ |NULLOK const char* locale \
+ |unsigned int index \
+ |const bool is_index_valid
+# endif
# ifdef WIN32
s |char* |win32_setlocale|int category|NULLOK const char* locale
# endif
diff --git a/embed.h b/embed.h
index 1405176255..b417aaf083 100644
--- a/embed.h
+++ b/embed.h
@@ -714,9 +714,12 @@
#define sv_vsetpvf(a,b,c) Perl_sv_vsetpvf(aTHX_ a,b,c)
#define sv_vsetpvf_mg(a,b,c) Perl_sv_vsetpvf_mg(aTHX_ a,b,c)
#define sv_vsetpvfn(a,b,c,d,e,f,g) Perl_sv_vsetpvfn(aTHX_ a,b,c,d,e,f,g)
-#define sync_locale() Perl_sync_locale(aTHX)
+#define switch_to_global_locale Perl_switch_to_global_locale
+#define sync_locale Perl_sync_locale
#define taint_env() Perl_taint_env(aTHX)
#define taint_proper(a,b) Perl_taint_proper(aTHX_ a,b)
+#define thread_locale_init Perl_thread_locale_init
+#define thread_locale_term Perl_thread_locale_term
#define to_uni_lower(a,b,c) Perl_to_uni_lower(aTHX_ a,b,c)
#define to_uni_lower_lc(a) Perl_to_uni_lower_lc(aTHX_ a)
#define to_uni_title(a,b,c) Perl_to_uni_title(aTHX_ a,b,c)
@@ -1347,7 +1350,6 @@
#define newSVavdefelem(a,b,c) Perl_newSVavdefelem(aTHX_ a,b,c)
#define newXS_deffile(a,b) Perl_newXS_deffile(aTHX_ a,b)
#define newXS_len_flags(a,b,c,d,e,f,g) Perl_newXS_len_flags(aTHX_ a,b,c,d,e,f,g)
-#define new_numeric(a) Perl_new_numeric(aTHX_ a)
#define nextargv(a,b) Perl_nextargv(aTHX_ a,b)
#define noperl_die Perl_noperl_die
#define notify_parser_that_changed_to_utf8() Perl_notify_parser_that_changed_to_utf8(aTHX)
@@ -1633,8 +1635,12 @@
# if defined(USE_LOCALE)
#define new_collate(a) S_new_collate(aTHX_ a)
#define new_ctype(a) S_new_ctype(aTHX_ a)
+#define new_numeric(a) S_new_numeric(aTHX_ a)
#define set_numeric_radix(a) S_set_numeric_radix(aTHX_ a)
#define stdize_locale(a) S_stdize_locale(aTHX_ a)
+# if defined(USE_POSIX_2008_LOCALE)
+#define emulate_setlocale S_emulate_setlocale
+# endif
# if defined(WIN32)
#define win32_setlocale(a,b) S_win32_setlocale(aTHX_ a,b)
# endif
diff --git a/embedvar.h b/embedvar.h
index d7eb929aa5..73c20e7f1b 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -106,6 +106,7 @@
#define PL_cryptseen (vTHX->Icryptseen)
#define PL_curcop (vTHX->Icurcop)
#define PL_curcopdb (vTHX->Icurcopdb)
+#define PL_curlocales (vTHX->Icurlocales)
#define PL_curpad (vTHX->Icurpad)
#define PL_curpm (vTHX->Icurpm)
#define PL_curpm_under (vTHX->Icurpm_under)
@@ -187,6 +188,7 @@
#define PL_lastgotoprobe (vTHX->Ilastgotoprobe)
#define PL_laststatval (vTHX->Ilaststatval)
#define PL_laststype (vTHX->Ilaststype)
+#define PL_lc_numeric_mutex_depth (vTHX->Ilc_numeric_mutex_depth)
#define PL_locale_utf8ness (vTHX->Ilocale_utf8ness)
#define PL_localizing (vTHX->Ilocalizing)
#define PL_localpatches (vTHX->Ilocalpatches)
@@ -406,6 +408,8 @@
#define PL_Gkeyword_plugin (my_vars->Gkeyword_plugin)
#define PL_keyword_plugin_mutex (my_vars->Gkeyword_plugin_mutex)
#define PL_Gkeyword_plugin_mutex (my_vars->Gkeyword_plugin_mutex)
+#define PL_lc_numeric_mutex (my_vars->Glc_numeric_mutex)
+#define PL_Glc_numeric_mutex (my_vars->Glc_numeric_mutex)
#define PL_locale_mutex (my_vars->Glocale_mutex)
#define PL_Glocale_mutex (my_vars->Glocale_mutex)
#define PL_malloc_mutex (my_vars->Gmalloc_mutex)
diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod
index 712132bd57..c12aaefa63 100644
--- a/ext/POSIX/lib/POSIX.pod
+++ b/ext/POSIX/lib/POSIX.pod
@@ -939,6 +939,7 @@ containing the current underlying locale's formatting values. Users of this fun
should also read L<perllocale>, which provides a comprehensive
discussion of Perl locale handling, including
L<a section devoted to this function|perllocale/The localeconv function>.
+Prior to Perl 5.28, or when operating in a non thread-safe environment,
It should not be used in a threaded application unless it's certain that
the underlying locale is C or POSIX. This is because it otherwise
changes the locale, which globally affects all threads simultaneously.
diff --git a/intrpvar.h b/intrpvar.h
index 884fa87dc6..466785b882 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -262,6 +262,7 @@ PERLVAR(I, exit_flags, U8) /* was exit() unexpected, etc. */
PERLVAR(I, utf8locale, bool) /* utf8 locale detected */
PERLVAR(I, in_utf8_CTYPE_locale, bool)
PERLVAR(I, in_utf8_COLLATE_locale, bool)
+PERLVARI(I, lc_numeric_mutex_depth, int, 0) /* Emulate general semaphore */
PERLVARA(I, locale_utf8ness, 256, char)
#ifdef USE_LOCALE_CTYPE
@@ -575,7 +576,15 @@ PERLVAR(I, constpadix, PADOFFSET) /* lowest unused for constants */
PERLVAR(I, padix_floor, PADOFFSET) /* how low may inner block reset padix */
+#if defined(USE_POSIX_2008_LOCALE) \
+ && defined(USE_THREAD_SAFE_LOCALE) \
+ && ! defined(HAS_QUERYLOCALE)
+
+PERLVARA(I, curlocales, 12, char *)
+
+#endif
#ifdef USE_LOCALE_COLLATE
+
PERLVAR(I, collation_name, char *) /* Name of current collation */
PERLVAR(I, collxfrm_base, Size_t) /* Basic overhead in *xfrm() */
PERLVARI(I, collxfrm_mult,Size_t, 2) /* Expansion factor in *xfrm() */
diff --git a/lib/locale_threads.t b/lib/locale_threads.t
index 72d322ee37..cda570be3a 100644
--- a/lib/locale_threads.t
+++ b/lib/locale_threads.t
@@ -11,14 +11,19 @@ BEGIN {
skip_all("No locales") unless locales_enabled();
skip_all_without_config('useithreads');
$| = 1;
+ eval { require POSIX; POSIX->import(qw(locale_h unistd_h)) };
+ if ($@) {
+ skip_all("could not load the POSIX module"); # running minitest?
+ }
}
+# reset the locale environment
+local @ENV{'LANG', (grep /^LC_/, keys %ENV)};
+
SKIP: { # perl #127708
my @locales = grep { $_ !~ / ^ C \b | POSIX /x } find_locales('LC_MESSAGES');
skip("No valid locale to test with", 1) unless @locales;
- # reset the locale environment
- local @ENV{'LANG', (grep /^LC_/, keys %ENV)};
local $ENV{LC_MESSAGES} = $locales[0];
# We're going to try with all possible error numbers on this platform
@@ -49,4 +54,66 @@ SKIP: { # perl #127708
pass("Didn't segfault");
}
-done_testing;
+SKIP: {
+ skip("POSIX version doesn't support thread-safe locale operations", 1)
+ unless ${^SAFE_LOCALES};
+
+ my @locales = find_locales( 'LC_NUMERIC' );
+ skip("No LC_NUMERIC locales available", 1) unless @locales;
+
+ my $dot = "";
+ my $comma = "";
+ for (@locales) { # prefer C for the base if available
+ use locale;
+ setlocale(LC_NUMERIC, $_) or next;
+ my $in = 4.2; # avoid any constant folding bugs
+ if ((my $s = sprintf("%g", $in)) eq "4.2") {
+ $dot ||= $_;
+ } else {
+ my $radix = localeconv()->{decimal_point};
+ $comma ||= $_ if $radix eq ',';
+ }
+
+ last if $dot && $comma;
+ }
+
+ # See if multiple threads can simultaneously change the locale, and give
+ # the expected radix results. On systems without a comma radix locale,
+ # run this anyway skipping the use of that, to verify that we don't
+ # segfault
+ fresh_perl_is("
+ use threads;
+ use strict;
+ use warnings;
+ use POSIX qw(locale_h);
+
+ my \$result = 1;
+
+ my \@threads = map +threads->create(sub {
+ sleep 0.1;
+ for (1..5_000) {
+ my \$s;
+ my \$in = 4.2; # avoid any constant folding bugs
+
+ if ('$comma') {
+ setlocale(&LC_NUMERIC, '$comma');
+ use locale;
+ \$s = sprintf('%g', \$in);
+ return 0 if (\$s ne '4,2');
+ }
+
+ setlocale(&LC_NUMERIC, '$dot');
+ \$s = sprintf('%g', \$in);
+ return 0 if (\$s ne '4.2');
+ }
+
+ return 1;
+
+ }), (0..3);
+ \$result &= \$_->join for splice \@threads;
+ print \$result",
+ 1, {}, "Verify there were no failures with simultaneous running threads"
+ );
+}
+
+done_testing();
diff --git a/locale.c b/locale.c
index 357f9d4863..9f5f8426f2 100644
--- a/locale.c
+++ b/locale.c
@@ -32,6 +32,15 @@
* the desired behavior of those functions at the moment. And, LC_MESSAGES is
* switched to the C locale for outputting the message unless within the scope
* of 'use locale'.
+ *
+ * This code now has multi-thread-safe locale handling on systems that support
+ * that. This is completely transparent to most XS code. On earlier systems,
+ * it would be possible to emulate thread-safe locales, but this likely would
+ * involve a lot of locale switching, and would require XS code changes.
+ * Macros could be written so that the code wouldn't have to know which type of
+ * system is being used. It's unlikely that we would ever do that, since most
+ * modern systems support thread-safe locales, but there was code written to
+ * this end, and is retained, #ifdef'd out.
*/
#include "EXTERN.h"
@@ -378,16 +387,838 @@ S_category_name(const int category)
#endif /* ifdef USE_LOCALE */
/* Windows requres a customized base-level setlocale() */
-# ifdef WIN32
-# define my_setlocale(cat, locale) win32_setlocale(cat, locale)
+#ifdef WIN32
+# define my_setlocale(cat, locale) win32_setlocale(cat, locale)
+#else
+# define my_setlocale(cat, locale) setlocale(cat, locale)
+#endif
+
+#ifndef USE_POSIX_2008_LOCALE
+
+/* "do_setlocale_c" is intended to be called when the category is a constant
+ * known at compile time; "do_setlocale_r", not known until run time */
+# define do_setlocale_c(cat, locale) my_setlocale(cat, locale)
+# define do_setlocale_r(cat, locale) my_setlocale(cat, locale)
+
+#else /* Below uses POSIX 2008 */
+
+/* We emulate setlocale with our own function. LC_foo is not valid for the
+ * POSIX 2008 functions. Instead LC_foo_MASK is used, which we use an array
+ * lookup to convert to. At compile time we have defined LC_foo_INDEX as the
+ * proper offset into the array 'category_masks[]'. At runtime, we have to
+ * search through the array (as the actual numbers may not be small contiguous
+ * positive integers which would lend themselves to array lookup). */
+# define do_setlocale_c(cat, locale) \
+ emulate_setlocale(cat, locale, cat ## _INDEX, TRUE)
+# define do_setlocale_r(cat, locale) emulate_setlocale(cat, locale, 0, FALSE)
+
+/* A third array, parallel to the ones above to map from category to its
+ * equivalent mask */
+const int category_masks[] = {
+# ifdef USE_LOCALE_NUMERIC
+ LC_NUMERIC_MASK,
+# endif
+# ifdef USE_LOCALE_CTYPE
+ LC_CTYPE_MASK,
+# endif
+# ifdef USE_LOCALE_COLLATE
+ LC_COLLATE_MASK,
+# endif
+# ifdef USE_LOCALE_TIME
+ LC_TIME_MASK,
+# endif
+# ifdef USE_LOCALE_MESSAGES
+ LC_MESSAGES_MASK,
+# endif
+# ifdef USE_LOCALE_MONETARY
+ LC_MONETARY_MASK,
+# endif
+# ifdef USE_LOCALE_ADDRESS
+ LC_ADDRESS_MASK,
+# endif
+# ifdef USE_LOCALE_IDENTIFICATION
+ LC_IDENTIFICATION_MASK,
+# endif
+# ifdef USE_LOCALE_MEASUREMENT
+ LC_MEASUREMENT_MASK,
+# endif
+# ifdef USE_LOCALE_PAPER
+ LC_PAPER_MASK,
+# endif
+# ifdef USE_LOCALE_TELEPHONE
+ LC_TELEPHONE_MASK,
+# endif
+ /* LC_ALL can't be turned off by a Configure
+ * option, and in Posix 2008, should always be
+ * here, so compile it in unconditionally.
+ * This could catch some glitches at compile
+ * time */
+ LC_ALL_MASK
+ };
+
+STATIC const char *
+S_emulate_setlocale(const int category,
+ const char * locale,
+ unsigned int index,
+ const bool is_index_valid
+ )
+{
+ /* This function effectively performs a setlocale() on just the current
+ * thread; thus it is thread-safe. It does this by using the POSIX 2008
+ * locale functions to emulate the behavior of setlocale(). Similar to
+ * regular setlocale(), the return from this function points to memory that
+ * can be overwritten by other system calls, so needs to be copied
+ * immediately if you need to retain it. The difference here is that
+ * system calls besides another setlocale() can overwrite it.
+ *
+ * By doing this, most locale-sensitive functions become thread-safe. The
+ * exceptions are mostly those that return a pointer to static memory.
+ *
+ * This function takes the same parameters, 'category' and 'locale', that
+ * the regular setlocale() function does, but it also takes two additional
+ * ones. This is because the 2008 functions don't use a category; instead
+ * they use a corresponding mask. Because this function operates in both
+ * worlds, it may need one or the other or both. This function can
+ * calculate the mask from the input category, but to avoid this
+ * calculation, if the caller knows at compile time what the mask is, it
+ * can pass it, setting 'is_index_valid' to TRUE; otherwise the mask
+ * parameter is ignored.
+ *
+ * POSIX 2008, for some sick reason, chose not to provide a method to find
+ * the category name of a locale. Some vendors have created a
+ * querylocale() function to do just that. This function is a lot simpler
+ * to implement on systems that have this. Otherwise, we have to keep
+ * track of what the locale has been set to, so that we can return its
+ * name to emulate setlocale(). It's also possible for C code in some
+ * library to change the locale without us knowing it, though as of
+ * September 2017, there are no occurrences in CPAN of uselocale(). Some
+ * libraries do use setlocale(), but that changes the global locale, and
+ * threads using per-thread locales will just ignore those changes.
+ * Another problem is that without querylocale(), we have to guess at what
+ * was meant by setting a locale of "". We handle this by not actually
+ * ever setting to "" (unless querylocale exists), but to emulate what we
+ * think should happen for "".
+ */
+
+ int mask;
+ locale_t old_obj;
+ locale_t new_obj;
+ dTHX;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale input=%d (%s), \"%s\", %d, %d\n", __FILE__, __LINE__, category, category_name(category), locale, index, is_index_valid);
+ }
+
+# endif
+
+ /* If the input mask might be incorrect, calculate the correct one */
+ if (! is_index_valid) {
+ unsigned int i;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: finding index of category %d (%s)\n", __FILE__, __LINE__, category, category_name(category));
+ }
+
+# endif
+
+ for (i = 0; i <= LC_ALL_INDEX; i++) {
+ if (category == categories[i]) {
+ index = i;
+ goto found_index;
+ }
+ }
+
+ /* Here, we don't know about this category, so can't handle it.
+ * Fallback to the early POSIX usages */
+ Perl_warner(aTHX_ packWARN(WARN_LOCALE),
+ "Unknown locale category %d; can't set it to %s\n",
+ category, locale);
+ return NULL;
+
+ found_index: ;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: index is %d for %s\n", __FILE__, __LINE__, index, category_name(category));
+ }
+
+# endif
+
+ }
+
+ mask = category_masks[index];
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: category name is %s; mask is 0x%x\n", __FILE__, __LINE__, category_names[index], mask);
+ }
+
+# endif
+
+ /* If just querying what the existing locale is ... */
+ if (locale == NULL) {
+ locale_t cur_obj = uselocale((locale_t) 0);
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale querying %p\n", __FILE__, __LINE__, cur_obj);
+ }
+
+# endif
+
+ if (cur_obj == LC_GLOBAL_LOCALE) {
+ return my_setlocale(category, NULL);
+ }
+
+# ifdef HAS_QUERYLOCALE
+
+ return (char *) querylocale(mask, cur_obj);
+
+# else
+# if defined(_NL_LOCALE_NAME) && defined(DEBUGGING)
+
+ {
+ /* Internal glibc for querylocale(), but doesn't handle
+ * empty-string ("") locale properly; who knows what other
+ * glitches. Check it for now, under debug. */
+
+ char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category),
+ uselocale((locale_t) 0));
+ /*
+ PerlIO_printf(Perl_debug_log, "%s:%d: temp_name=%s\n", __FILE__, __LINE__, temp_name ? temp_name : "NULL");
+ PerlIO_printf(Perl_debug_log, "%s:%d: index=%d\n", __FILE__, __LINE__, index);
+ PerlIO_printf(Perl_debug_log, "%s:%d: PL_curlocales[index]=%s\n", __FILE__, __LINE__, PL_curlocales[index]);
+ */
+ if (temp_name && PL_curlocales[index] && strNE(temp_name, "")) {
+ if ( strNE(PL_curlocales[index], temp_name)
+ && ! ( isNAME_C_OR_POSIX(temp_name)
+ && isNAME_C_OR_POSIX(PL_curlocales[index]))) {
+
+# ifdef USE_C_BACKTRACE
+
+ dump_c_backtrace(Perl_debug_log, 20, 1);
+
+# endif
+
+ Perl_croak(aTHX_ "panic: Mismatch between what Perl thinks %s is"
+ " (%s) and what internal glibc thinks"
+ " (%s)\n", category_names[index],
+ PL_curlocales[index], temp_name);
+ }
+
+ return temp_name;
+ }
+ }
+
+# endif
+
+ /* Without querylocale(), we have to use our record-keeping we've
+ * done. */
+
+ if (category != LC_ALL) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[index]);
+ }
+
+# endif
+
+ return PL_curlocales[index];
+ }
+ else { /* For LC_ALL */
+ unsigned int i;
+ Size_t names_len = 0;
+ char * all_string;
+
+ /* If we have a valid LC_ALL value, just return it */
+ if (PL_curlocales[LC_ALL_INDEX]) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, PL_curlocales[LC_ALL_INDEX]);
+ }
+
+# endif
+
+ return PL_curlocales[LC_ALL_INDEX];
+ }
+
+ /* Otherwise, we need to construct a string of name=value pairs.
+ * We use the glibc syntax, like
+ * LC_NUMERIC=C;LC_TIME=en_US.UTF-8;...
+ * First calculate the needed size. */
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]);
+ }
+
+# endif
+
+ names_len += strlen(category_names[i])
+ + 1 /* '=' */
+ + strlen(PL_curlocales[i])
+ + 1; /* ';' */
+ }
+ names_len++; /* Trailing '\0' */
+ SAVEFREEPV(Newx(all_string, names_len, char));
+ *all_string = '\0';
+
+ /* Then fill in the string */
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale i=%d, name=%s, locale=%s\n", __FILE__, __LINE__, i, category_names[i], PL_curlocales[i]);
+ }
+
+# endif
+
+ my_strlcat(all_string, category_names[i], names_len);
+ my_strlcat(all_string, "=", names_len);
+ my_strlcat(all_string, PL_curlocales[i], names_len);
+ my_strlcat(all_string, ";", names_len);
+ }
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale returning %s\n", __FILE__, __LINE__, all_string);
+ }
+
+ #endif
+
+ return all_string;
+ }
+
+# ifdef EINVAL
+
+ SETERRNO(EINVAL, LIB_INVARG);
+
+# endif
+
+ return NULL;
+
+# endif
+
+ }
+
+ assert(PL_C_locale_obj);
+
+ /* Otherwise, we are switching locales. This will generally entail freeing
+ * the current one's space (at the C library's discretion). We need to
+ * stop using that locale before the switch. So switch to a known locale
+ * object that we don't otherwise mess with. This returns the locale
+ * object in effect at the time of the switch. */
+ old_obj = uselocale(PL_C_locale_obj);
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale was using %p\n", __FILE__, __LINE__, old_obj);
+ }
+
+# endif
+
+ if (! old_obj) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ dSAVE_ERRNO;
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to C failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
+ RESTORE_ERRNO;
+ }
+
+# endif
+
+ return NULL;
+ }
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, PL_C_locale_obj);
+ }
+
+# endif
+
+ /* If we weren't in a thread safe locale, set so that newlocale() below
+ which uses 'old_obj', uses an empty one. Same for our reserved C object.
+ The latter is defensive coding, so that, even if there is some bug, we
+ will never end up trying to modify either of these, as if passed to
+ newlocale(), they can be. */
+ if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) {
+ old_obj = (locale_t) 0;
+ }
+
+ /* Create the new locale (it may actually modify the current one). */
+
+# ifndef HAS_QUERYLOCALE
+
+ if (strEQ(locale, "")) {
+
+ /* For non-querylocale() systems, we do the setting of "" ourselves to
+ * be sure that we really know what's going on. We follow the Linux
+ * documented behavior (but if that differs from the actual behavior,
+ * this won't work exactly as the OS implements). We go out and
+ * examine the environment based on our understanding of how the system
+ * works, and use that to figure things out */
+
+ const char * const lc_all = PerlEnv_getenv("LC_ALL");
+
+ /* Use any "LC_ALL" environment variable, as it overrides everything
+ * else. */
+ if (lc_all && strNE(lc_all, "")) {
+ locale = lc_all;
+ }
+ else {
+
+ /* Otherwise, we need to dig deeper. Unless overridden, the
+ * default is the LANG environment variable; if it doesn't exist,
+ * then "C" */
+
+ const char * default_name;
+
+ /* To minimize other threads messing with the environment, we copy
+ * the variable, making it a temporary. But this doesn't work upon
+ * program initialization before any scopes are created, and at
+ * this time, there's nothing else going on that would interfere.
+ * So skip the copy in that case */
+ if (PL_scopestack_ix == 0) {
+ default_name = PerlEnv_getenv("LANG");
+ }
+ else {
+ default_name = savepv(PerlEnv_getenv("LANG"));
+ }
+
+ if (! default_name || strEQ(default_name, "")) {
+ default_name = "C";
+ }
+ else if (PL_scopestack_ix != 0) {
+ SAVEFREEPV(default_name);
+ }
+
+ if (category != LC_ALL) {
+ const char * const name = PerlEnv_getenv(category_names[index]);
+
+ /* Here we are setting a single category. Assume will have the
+ * default name */
+ locale = default_name;
+
+ /* But then look for an overriding environment variable */
+ if (name && strNE(name, "")) {
+ locale = name;
+ }
+ }
+ else {
+ bool did_override = FALSE;
+ unsigned int i;
+
+ /* Here, we are getting LC_ALL. Any categories that don't have
+ * a corresponding environment variable set should be set to
+ * LANG, or to "C" if there is no LANG. If no individual
+ * categories differ from this, we can just set LC_ALL. This
+ * is buggy on systems that have extra categories that we don't
+ * know about. If there is an environment variable that sets
+ * that category, we won't know to look for it, and so our use
+ * of LANG or "C" improperly overrides it. On the other hand,
+ * if we don't do what is done here, and there is no
+ * environment variable, the category's locale should be set to
+ * LANG or "C". So there is no good solution. khw thinks the
+ * best is to look at systems to see what categories they have,
+ * and include them, and then to assume that we know the
+ * complete set */
+
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+ const char * const env_override
+ = savepv(PerlEnv_getenv(category_names[i]));
+ const char * this_locale = ( env_override
+ && strNE(env_override, ""))
+ ? env_override
+ : default_name;
+ emulate_setlocale(categories[i], this_locale, i, TRUE);
+
+ if (strNE(this_locale, default_name)) {
+ did_override = TRUE;
+ }
+
+ Safefree(env_override);
+ }
+
+ /* If all the categories are the same, we can set LC_ALL to
+ * that */
+ if (! did_override) {
+ locale = default_name;
+ }
+ else {
+
+ /* Here, LC_ALL is no longer valid, as some individual
+ * categories don't match it. We call ourselves
+ * recursively, as that will execute the code that
+ * generates the proper locale string for this situation.
+ * We don't do the remainder of this function, as that is
+ * to update our records, and we've just done that for the
+ * individual categories in the loop above, and doing so
+ * would cause LC_ALL to be done as well */
+ return emulate_setlocale(LC_ALL, NULL, LC_ALL_INDEX, TRUE);
+ }
+ }
+ }
+ }
+ else if (strchr(locale, ';')) {
+
+ /* LC_ALL may actually incude a conglomeration of various categories.
+ * Without querylocale, this code uses the glibc (as of this writing)
+ * syntax for representing that, but that is not a stable API, and
+ * other platforms do it differently, so we have to handle all cases
+ * ourselves */
+
+ const char * s = locale;
+ const char * e = locale + strlen(locale);
+ const char * p = s;
+ const char * category_end;
+ const char * name_start;
+ const char * name_end;
+
+ while (s < e) {
+ unsigned int i;
+
+ /* Parse through the category */
+ while (isWORDCHAR(*p)) {
+ p++;
+ }
+ category_end = p;
+
+ if (*p++ != '=') {
+ Perl_croak(aTHX_
+ "panic: %s: %d: Unexpected character in locale name '%02X",
+ __FILE__, __LINE__, *(p-1));
+ }
+
+ /* Parse through the locale name */
+ name_start = p;
+ while (isGRAPH(*p) && *p != ';') {
+ p++;
+ }
+ name_end = p;
+
+ if (*p++ != ';') {
+ Perl_croak(aTHX_
+ "panic: %s: %d: Unexpected character in locale name '%02X",
+ __FILE__, __LINE__, *(p-1));
+ }
+
+ /* Find the index of the category name in our lists */
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+
+ /* Keep going if this isn't the index. The strnNE() avoids a
+ * Perl_form(), but would fail if ever a category name could be
+ * a substring of another one, like if there were a
+ * "LC_TIME_DATE" */
+ if strnNE(s, category_names[i], category_end - s) {
+ continue;
+ }
+
+ /* If this index is for the single category we're changing, we
+ * have found the locale to set it to. */
+ if (category == categories[i]) {
+ locale = Perl_form(aTHX_ "%.*s",
+ (int) (name_end - name_start),
+ name_start);
+ goto ready_to_set;
+ }
+
+ if (category == LC_ALL) {
+ char * individ_locale = Perl_form(aTHX_ "%.*s", (int) (p - s), s);
+ emulate_setlocale(categories[i], individ_locale, i, TRUE);
+ Safefree(individ_locale);
+ }
+ }
+
+ s = p;
+ }
+
+ /* Here we have set all the individual categories by recursive calls.
+ * These collectively should have fixed up LC_ALL, so can just query
+ * what that now is */
+ assert(category == LC_ALL);
+
+ return do_setlocale_c(LC_ALL, NULL);
+ }
+
+ ready_to_set: ;
+
+# endif /* end of ! querylocale */
+
+ /* Ready to create a new locale by modification of the exising one */
+ new_obj = newlocale(mask, locale, old_obj);
+
+ if (! new_obj) {
+ dSAVE_ERRNO;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale creating new object failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
+ }
+
+# endif
+
+ if (! uselocale(old_obj)) {
+ SAVE_ERRNO;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
+ }
+
+# endif
+
+ }
+ RESTORE_ERRNO;
+ return NULL;
+ }
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale created %p\n", __FILE__, __LINE__, new_obj);
+ }
+
+# endif
+
+ /* And switch into it */
+ if (! uselocale(new_obj)) {
+ dSAVE_ERRNO;
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale switching to new object failed\n", __FILE__, __LINE__);
+ }
+
+# endif
+
+ if (! uselocale(old_obj)) {
+
+# ifdef DEBUGGING
+
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: switching back failed: %d\n", __FILE__, __LINE__, GET_ERRNO);
+ }
+
+# endif
+
+ }
+ freelocale(new_obj);
+ RESTORE_ERRNO;
+ return NULL;
+ }
+
+# ifdef DEBUGGING
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: emulate_setlocale now using %p\n", __FILE__, __LINE__, new_obj);
+ }
+
+# endif
+
+ /* We are done, except for updating our records (if the system doesn't keep
+ * them) and in the case of locale "", we don't actually know what the
+ * locale that got switched to is, as it came from the environment. So
+ * have to find it */
+
+# ifdef HAS_QUERYLOCALE
+
+ if (strEQ(locale, "")) {
+ locale = querylocale(mask, new_obj);
+ }
+
# else
-# define my_setlocale(cat, locale) setlocale(cat, locale)
+
+ /* Here, 'locale' is the return value */
+
+ /* Without querylocale(), we have to update our records */
+
+ if (category == LC_ALL) {
+ unsigned int i;
+
+ /* For LC_ALL, we change all individual categories to correspond */
+ /* PL_curlocales is a parallel array, so has same
+ * length as 'categories' */
+ for (i = 0; i <= LC_ALL_INDEX; i++) {
+ Safefree(PL_curlocales[i]);
+ PL_curlocales[i] = savepv(locale);
+ }
+ }
+ else {
+
+ /* For a single category, if it's not the same as the one in LC_ALL, we
+ * nullify LC_ALL */
+
+ if (PL_curlocales[LC_ALL_INDEX] && strNE(PL_curlocales[LC_ALL_INDEX], locale)) {
+ Safefree(PL_curlocales[LC_ALL_INDEX]);
+ PL_curlocales[LC_ALL_INDEX] = NULL;
+ }
+
+ /* Then update the category's record */
+ Safefree(PL_curlocales[index]);
+ PL_curlocales[index] = savepv(locale);
+ }
+
# endif
-/* Just placeholders for now. "_c" is intended to be called when the category
- * is a constant known at compile time; "_r", not known until run time */
-# define do_setlocale_c(category, locale) my_setlocale(category, locale)
-# define do_setlocale_r(category, locale) my_setlocale(category, locale)
+ return locale;
+}
+
+#endif /* USE_POSIX_2008_LOCALE */
+
+#if 0 /* Code that was to emulate thread-safe locales on platforms that
+ didn't natively support them */
+
+/* The way this would work is that we would keep a per-thread list of the
+ * correct locale for that thread. Any operation that was locale-sensitive
+ * would have to be changed so that it would look like this:
+ *
+ * LOCALE_LOCK;
+ * setlocale to the correct locale for this operation
+ * do operation
+ * LOCALE_UNLOCK
+ *
+ * This leaves the global locale in the most recently used operation's, but it
+ * was locked long enough to get the result. If that result is static, it
+ * needs to be copied before the unlock.
+ *
+ * Macros could be written like SETUP_LOCALE_DEPENDENT_OP(category) that did
+ * the setup, but are no-ops when not needed, and similarly,
+ * END_LOCALE_DEPENDENT_OP for the tear-down
+ *
+ * But every call to a locale-sensitive function would have to be changed, and
+ * if a module didn't cooperate by using the mutex, things would break.
+ *
+ * This code was abandoned before being completed or tested, and is left as-is
+*/
+
+# define do_setlocale_c(cat, locale) locking_setlocale(cat, locale, cat ## _INDEX, TRUE)
+# define do_setlocale_r(cat, locale) locking_setlocale(cat, locale, 0, FALSE)
+
+STATIC char *
+S_locking_setlocale(pTHX_
+ const int category,
+ const char * locale,
+ int index,
+ const bool is_index_valid
+ )
+{
+ /* This function kind of performs a setlocale() on just the current thread;
+ * thus it is kind of thread-safe. It does this by keeping a thread-level
+ * array of the current locales for each category. Every time a locale is
+ * switched to, it does the switch globally, but updates the thread's
+ * array. A query as to what the current locale is just returns the
+ * appropriate element from the array, and doesn't actually call the system
+ * setlocale(). The saving into the array is done in an uninterruptible
+ * section of code, so is unaffected by whatever any other threads might be
+ * doing.
+ *
+ * All locale-sensitive operations must work by first starting a critical
+ * section, then switching to the thread's locale as kept by this function,
+ * and then doing the operation, then ending the critical section. Thus,
+ * each gets done in the appropriate locale. simulating thread-safety.
+ *
+ * This function takes the same parameters, 'category' and 'locale', that
+ * the regular setlocale() function does, but it also takes two additional
+ * ones. This is because as described earlier. If we know on input the
+ * index corresponding to the category into the array where we store the
+ * current locales, we don't have to calculate it. If the caller knows at
+ * compile time what the index is, it it can pass it, setting
+ * 'is_index_valid' to TRUE; otherwise the index parameter is ignored.
+ *
+ */
+
+ /* If the input index might be incorrect, calculate the correct one */
+ if (! is_index_valid) {
+ unsigned int i;
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: converting category %d to index\n", __FILE__, __LINE__, category);
+ }
+
+ for (i = 0; i <= LC_ALL_INDEX; i++) {
+ if (category == categories[i]) {
+ index = i;
+ goto found_index;
+ }
+ }
+
+ /* Here, we don't know about this category, so can't handle it.
+ * XXX best we can do is to unsafely set this
+ * XXX warning */
+
+ return my_setlocale(category, locale);
+
+ found_index: ;
+
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: index is 0x%x\n", __FILE__, __LINE__, index);
+ }
+ }
+
+ /* For a query, just return what's in our records */
+ if (new_locale == NULL) {
+ return curlocales[index];
+ }
+
+
+ /* Otherwise, we need to do the switch, and save the result, all in a
+ * critical section */
+
+ Safefree(curlocales[[index]]);
+
+ /* It might be that this is called from an already-locked section of code.
+ * We would have to detect and skip the LOCK/UNLOCK if so */
+ LOCALE_LOCK;
+
+ curlocales[index] = savepv(my_setlocale(category, new_locale));
+
+ if (strEQ(new_locale, "")) {
+
+#ifdef LC_ALL
+
+ /* The locale values come from the environment, and may not all be the
+ * same, so for LC_ALL, we have to update all the others, while the
+ * mutex is still locked */
+
+ if (category == LC_ALL) {
+ unsigned int i;
+ for (i = 0; i < LC_ALL_INDEX) {
+ curlocales[i] = my_setlocale(categories[i], NULL);
+ }
+ }
+ }
+
+#endif
+
+ LOCALE_UNLOCK;
+
+ return curlocales[index];
+}
+
+#endif
STATIC void
S_set_numeric_radix(pTHX_ const bool use_locale)
@@ -427,9 +1258,8 @@ S_set_numeric_radix(pTHX_ const bool use_locale)
}
-
-void
-Perl_new_numeric(pTHX_ const char *newnum)
+STATIC void
+S_new_numeric(pTHX_ const char *newnum)
{
#ifndef USE_LOCALE_NUMERIC
@@ -468,10 +1298,7 @@ Perl_new_numeric(pTHX_ const char *newnum)
* variables are true at the same time. (Toggling is a
* no-op under these circumstances.) This variable is
* used to avoid having to recalculate.
- * Any code changing the locale (outside this file) should use
- * POSIX::setlocale, which calls this function. Therefore this function
- * should be called directly only from this file and from
- * POSIX::setlocale() */
+ */
char *save_newnum;
@@ -1220,14 +2047,21 @@ returns the correct underlying C<LC_NUMERIC> locale, instead of C<C> always, as
perl keeps that locale category as C<C>, changing it briefly during the
operations where the underlying one is required.
-The other reason it isn't completely a drop-in replacement is that it is
+Another reason it isn't completely a drop-in replacement is that it is
declared to return S<C<const char *>>, whereas the system setlocale omits the
C<const>. (If it were being written today, plain setlocale would be declared
const, since it is illegal to change the information it returns; doing so leads
to segfaults.)
+Finally, C<Perl_setlocale> works under all circumstances, whereas plain
+C<setlocale> can be completely ineffective on some platforms under some
+configurations.
+
C<Perl_setlocale> should not be used to change the locale except on systems
-where the predefined variable C<${^SAFE_LOCALES}> is 1.
+where the predefined variable C<${^SAFE_LOCALES}> is 1. On some such systems,
+the system C<setlocale()> is ineffective, returning the wrong information, and
+failing to actually change the locale. C<Perl_setlocale>, however works
+properly in all circumstances.
The return points to a per-thread static buffer, which is overwritten the next
time C<Perl_setlocale> is called from the same thread.
@@ -1554,6 +2388,7 @@ S_my_nl_langinfo(const nl_item item, bool toggle)
S_my_nl_langinfo(const int item, bool toggle)
#endif
{
+ const char * retval;
dTHX;
/* We only need to toggle into the underlying LC_NUMERIC locale for these
@@ -1565,7 +2400,8 @@ S_my_nl_langinfo(const int item, bool toggle)
}
#if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available. */
-#if ! defined(HAS_POSIX_2008_LOCALE)
+# if ! defined(HAS_THREAD_SAFE_NL_LANGINFO_L) \
+ || ! defined(HAS_POSIX_2008_LOCALE)
/* Here, use plain nl_langinfo(), switching to the underlying LC_NUMERIC
* for those items dependent on it. This must be copied to a buffer before
@@ -1583,8 +2419,15 @@ S_my_nl_langinfo(const int item, bool toggle)
this code section (the only call to nl_langinfo in
the core) */
- save_to_buffer(nl_langinfo(item), &PL_langinfo_buf,
- &PL_langinfo_bufsize, 0);
+ retval = nl_langinfo(item);
+
+# ifdef USE_ITHREADS
+
+ /* Copy to a per-thread buffer */
+ save_to_buffer(retval, &PL_langinfo_buf, &PL_langinfo_bufsize, 0);
+ retval = PL_langinfo_buf;
+
+# endif
LOCALE_UNLOCK;
@@ -1614,8 +2457,10 @@ S_my_nl_langinfo(const int item, bool toggle)
}
}
- save_to_buffer(nl_langinfo_l(item, cur),
- &PL_langinfo_buf, &PL_langinfo_bufsize, 0);
+ /* We don't have to copy it to a buffer, as this is a thread-safe
+ * function which Configure has made sure of */
+ retval = nl_langinfo_l(item, cur);
+
if (do_free) {
freelocale(cur);
}
@@ -1623,7 +2468,7 @@ S_my_nl_langinfo(const int item, bool toggle)
# endif
- if (strEQ(PL_langinfo_buf, "")) {
+ if (strEQ(retval, "")) {
if (item == PERL_YESSTR) {
return "yes";
}
@@ -1632,7 +2477,7 @@ S_my_nl_langinfo(const int item, bool toggle)
}
}
- return PL_langinfo_buf;
+ return retval;
#else /* Below, emulate nl_langinfo as best we can */
@@ -1662,7 +2507,6 @@ S_my_nl_langinfo(const int item, bool toggle)
switch (item) {
Size_t len;
- const char * retval;
/* These 2 are unimplemented */
case PERL_CODESET:
@@ -2128,51 +2972,87 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
# ifdef USE_LOCALE_NUMERIC
assert(categories[LC_NUMERIC_INDEX] == LC_NUMERIC);
assert(strEQ(category_names[LC_NUMERIC_INDEX], "LC_NUMERIC"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_NUMERIC_INDEX] == LC_NUMERIC_MASK);
+# endif
# endif
# ifdef USE_LOCALE_CTYPE
assert(categories[LC_CTYPE_INDEX] == LC_CTYPE);
assert(strEQ(category_names[LC_CTYPE_INDEX], "LC_CTYPE"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_CTYPE_INDEX] == LC_CTYPE_MASK);
+# endif
# endif
# ifdef USE_LOCALE_COLLATE
assert(categories[LC_COLLATE_INDEX] == LC_COLLATE);
assert(strEQ(category_names[LC_COLLATE_INDEX], "LC_COLLATE"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_COLLATE_INDEX] == LC_COLLATE_MASK);
+# endif
# endif
# ifdef USE_LOCALE_TIME
assert(categories[LC_TIME_INDEX] == LC_TIME);
assert(strEQ(category_names[LC_TIME_INDEX], "LC_TIME"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_TIME_INDEX] == LC_TIME_MASK);
+# endif
# endif
# ifdef USE_LOCALE_MESSAGES
assert(categories[LC_MESSAGES_INDEX] == LC_MESSAGES);
assert(strEQ(category_names[LC_MESSAGES_INDEX], "LC_MESSAGES"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_MESSAGES_INDEX] == LC_MESSAGES_MASK);
+# endif
# endif
# ifdef USE_LOCALE_MONETARY
assert(categories[LC_MONETARY_INDEX] == LC_MONETARY);
assert(strEQ(category_names[LC_MONETARY_INDEX], "LC_MONETARY"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_MONETARY_INDEX] == LC_MONETARY_MASK);
+# endif
# endif
# ifdef USE_LOCALE_ADDRESS
assert(categories[LC_ADDRESS_INDEX] == LC_ADDRESS);
assert(strEQ(category_names[LC_ADDRESS_INDEX], "LC_ADDRESS"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_ADDRESS_INDEX] == LC_ADDRESS_MASK);
+# endif
# endif
# ifdef USE_LOCALE_IDENTIFICATION
assert(categories[LC_IDENTIFICATION_INDEX] == LC_IDENTIFICATION);
assert(strEQ(category_names[LC_IDENTIFICATION_INDEX], "LC_IDENTIFICATION"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_IDENTIFICATION_INDEX] == LC_IDENTIFICATION_MASK);
+# endif
# endif
# ifdef USE_LOCALE_MEASUREMENT
assert(categories[LC_MEASUREMENT_INDEX] == LC_MEASUREMENT);
assert(strEQ(category_names[LC_MEASUREMENT_INDEX], "LC_MEASUREMENT"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_MEASUREMENT_INDEX] == LC_MEASUREMENT_MASK);
+# endif
# endif
# ifdef USE_LOCALE_PAPER
assert(categories[LC_PAPER_INDEX] == LC_PAPER);
assert(strEQ(category_names[LC_PAPER_INDEX], "LC_PAPER"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_PAPER_INDEX] == LC_PAPER_MASK);
+# endif
# endif
# ifdef USE_LOCALE_TELEPHONE
assert(categories[LC_TELEPHONE_INDEX] == LC_TELEPHONE);
assert(strEQ(category_names[LC_TELEPHONE_INDEX], "LC_TELEPHONE"));
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_TELEPHONE_INDEX] == LC_TELEPHONE_MASK);
+# endif
# endif
# ifdef LC_ALL
assert(categories[LC_ALL_INDEX] == LC_ALL);
assert(strEQ(category_names[LC_ALL_INDEX], "LC_ALL"));
assert(NOMINAL_LC_ALL_INDEX == LC_ALL_INDEX);
+# ifdef USE_POSIX_2008_LOCALE
+ assert(category_masks[LC_ALL_INDEX] == LC_ALL_MASK);
+# endif
# endif
# endif /* DEBUGGING */
@@ -2181,8 +3061,34 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
my_strlcpy(PL_locale_utf8ness, C_and_POSIX_utf8ness,
sizeof(PL_locale_utf8ness));
+# ifdef USE_THREAD_SAFE_LOCALE
+# ifdef WIN32
+
+ _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
+
+# endif
+# endif
+# if defined(LC_ALL_MASK) && defined(HAS_POSIX_2008_LOCALE)
+
+ PL_C_locale_obj = newlocale(LC_ALL_MASK, "C", (locale_t) 0);
+ if (! PL_C_locale_obj) {
+ Perl_croak_nocontext(
+ "panic: Cannot create POSIX 2008 C locale object; errno=%d", errno);
+ }
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "%s:%d: created C object %p\n", __FILE__, __LINE__, PL_C_locale_obj);
+ }
+
+# endif
+
PL_numeric_radix_sv = newSVpvs(".");
+# if defined(USE_POSIX_2008_LOCALE) && ! defined(HAS_QUERYLOCALE)
+
+ /* Initialize our records. If we have POSIX 2008, we have LC_ALL */
+ do_setlocale_c(LC_ALL, my_setlocale(LC_ALL, NULL));
+
+# endif
# ifdef LOCALE_ENVIRON_REQUIRED
/*
@@ -2541,11 +3447,12 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
for (i = 0; i < NOMINAL_LC_ALL_INDEX; i++) {
-# if defined(USE_ITHREADS)
+# if defined(USE_ITHREADS) && ! defined(USE_THREAD_SAFE_LOCALE)
/* This caches whether each category's locale is UTF-8 or not. This
* may involve changing the locale. It is ok to do this at
- * initialization time before any threads have started, but not later.
+ * initialization time before any threads have started, but not later
+ * unless thread-safe operations are used.
* Caching means that if the program heeds our dictate not to change
* locales in threaded applications, this data will remain valid, and
* it may get queried without having to change locales. If the
@@ -3976,45 +4883,18 @@ Perl_my_strerror(pTHX_ const int errnum)
# endif
# else /* Doesn't have strerror_l() */
-# ifdef USE_POSIX_2008_LOCALE
-
- locale_t save_locale = NULL;
-
-# else
-
const char * save_locale = NULL;
bool locale_is_C = FALSE;
/* We have a critical section to prevent another thread from executing this
- * same code at the same time. (On unthreaded perls, the LOCK is a
+ * same code at the same time. (On thread-safe perls, the LOCK is a
* no-op.) Since this is the only place in core that changes LC_MESSAGES
* (unless the user has called setlocale(), this works to prevent races. */
LOCALE_LOCK;
-# endif
-
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
"my_strerror called with errnum %d\n", errnum));
if (! within_locale_scope) {
- errno = 0;
-
-# ifdef USE_POSIX_2008_LOCALE /* Use the thread-safe locale functions */
-
- DEBUG_Lv(PerlIO_printf(Perl_debug_log,
- "Not within locale scope, about to call"
- " uselocale(0x%p)\n", PL_C_locale_obj));
- save_locale = uselocale(PL_C_locale_obj);
- if (! save_locale) {
- DEBUG_L(PerlIO_printf(Perl_debug_log,
- "uselocale failed, errno=%d\n", errno));
- }
- else {
- DEBUG_Lv(PerlIO_printf(Perl_debug_log,
- "uselocale returned 0x%p\n", save_locale));
- }
-
-# else /* Not thread-safe build */
-
save_locale = do_setlocale_c(LC_MESSAGES, NULL);
if (! save_locale) {
Perl_croak(aTHX_
@@ -4033,9 +4913,6 @@ Perl_my_strerror(pTHX_ const int errnum)
do_setlocale_c(LC_MESSAGES, "C");
}
}
-
-# endif
-
} /* end of ! within_locale_scope */
else {
DEBUG_Lv(PerlIO_printf(Perl_debug_log, "%s: %d: WITHIN locale scope\n",
@@ -4047,21 +4924,6 @@ Perl_my_strerror(pTHX_ const int errnum)
errstr = savepv(Strerror(errnum));
if (! within_locale_scope) {
- errno = 0;
-
-# ifdef USE_POSIX_2008_LOCALE
-
- DEBUG_Lv(PerlIO_printf(Perl_debug_log,
- "%s: %d: not within locale scope, restoring the locale\n",
- __FILE__, __LINE__));
- if (save_locale && ! uselocale(save_locale)) {
- DEBUG_L(PerlIO_printf(Perl_debug_log,
- "uselocale restore failed, errno=%d\n", errno));
- }
- }
-
-# else
-
if (save_locale && ! locale_is_C) {
if (! do_setlocale_c(LC_MESSAGES, save_locale)) {
Perl_croak(aTHX_
@@ -4074,7 +4936,6 @@ Perl_my_strerror(pTHX_ const int errnum)
LOCALE_UNLOCK;
-# endif
# endif /* End of doesn't have strerror_l */
#endif /* End of does have locale messages */
@@ -4094,21 +4955,140 @@ Perl_my_strerror(pTHX_ const int errnum)
/*
-=for apidoc sync_locale
+=for apidoc switch_to_global_locale
+
+On systems without locale support, or on single-threaded builds, or on
+platforms that do not support per-thread locale operations, this function does
+nothing. On such systems that do have locale support, only a locale global to
+the whole program is available.
+
+On multi-threaded builds on systems that do have per-thread locale operations,
+this function converts the thread it is running in to use the global locale.
+This is for code that has not yet or cannot be updated to handle multi-threaded
+locale operation. As long as only a single thread is so-converted, everything
+works fine, as all the other threads continue to ignore the global one, so only
+this thread looks at it.
+
+Without this function call, threads that use the L<C<setlocale(3)>> system
+function will not work properly, as all the locale-sensitive functions will
+look at the per-thread locale, and C<setlocale> will have no effect on this
+thread.
+
+Perl code should convert to either call
+L<C<Perl_setlocale>|perlapi/Perl_setlocale> (which is a drop-in for the system
+C<setlocale>) or use the methods given in L<perlcall> to call
+L<C<POSIX::setlocale>|POSIX/setlocale>. Either one will transparently properly
+handle all cases of single- vs multi-thread, POSIX 2008-supported or not.
+
+Non-Perl libraries, such as C<gtk>, that call the system C<setlocale> can
+continue to work if this function is called before transferring control to the
+library.
-Changing the program's locale should be avoided by XS code. Nevertheless,
-certain non-Perl libraries called from XS, such as C<Gtk> do so. When this
-happens, Perl needs to be told that the locale has changed. Use this function
-to do so, before returning to Perl.
+Upon return from the code that needs to use the global locale,
+L<C<sync_locale()>|perlapi/sync_locale> should be called to restore the safe
+multi-thread operation.
=cut
*/
void
-Perl_sync_locale(pTHX)
+Perl_switch_to_global_locale()
+{
+
+#ifdef USE_THREAD_SAFE_LOCALE
+# ifdef WIN32
+
+ _configthreadlocale(_DISABLE_PER_THREAD_LOCALE);
+
+# else
+# ifdef HAS_QUERYLOCALE
+
+ setlocale(LC_ALL, querylocale(LC_ALL_MASK, uselocale((locale_t) 0)));
+
+# else
+
+ {
+ unsigned int i;
+
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+ setlocale(categories[i], do_setlocale_r(categories[i], NULL));
+ }
+ }
+
+# endif
+
+ uselocale(LC_GLOBAL_LOCALE);
+
+# endif
+#endif
+
+}
+
+/*
+
+=for apidoc sync_locale
+
+L<C<Perl_setlocale>|perlapi/Perl_setlocale> can be used at any time to query or
+change the locale (though changing the locale is antisocial and dangerous on
+multi-threaded systems that don't have multi-thread safe locale operations.
+(See L<perllocale/Multi-threaded operation>). Using the system
+L<C<setlocale(3)>> should be avoided. Nevertheless, certain non-Perl libraries
+called from XS, such as C<Gtk> do so, and this can't be changed. When the
+locale is changed by XS code that didn't use
+L<C<Perl_setlocale>|perlapi/Perl_setlocale>, Perl needs to be told that the
+locale has changed. Use this function to do so, before returning to Perl.
+
+The return value is a boolean: TRUE if the global locale at the time of call
+was in effect; and FALSE if a per-thread locale was in effect. This can be
+used by the caller that needs to restore things as-they-were to decide whether
+or not to call
+L<C<Perl_switch_to_global_locale>|perlapi/switch_to_global_locale>.
+
+=cut
+*/
+
+bool
+Perl_sync_locale()
{
- char * newlocale;
+ const char * newlocale;
+ dTHX;
+
+#ifdef USE_POSIX_2008_LOCALE
+
+ bool was_in_global_locale = FALSE;
+ locale_t cur_obj = uselocale((locale_t) 0);
+
+ /* On Windows, unless the foreign code has turned off the thread-safe
+ * locale setting, any plain setlocale() will have affected what we see, so
+ * no need to worry. Otherwise, If the foreign code has done a plain
+ * setlocale(), it will only affect the global locale on POSIX systems, but
+ * will affect the */
+ if (cur_obj == LC_GLOBAL_LOCALE) {
+
+# ifdef HAS_QUERY_LOCALE
+
+ do_setlocale_c(LC_ALL, setlocale(LC_ALL, NULL));
+
+# else
+
+ unsigned int i;
+
+ /* We can't trust that we can read the LC_ALL format on the
+ * platform, so do them individually */
+ for (i = 0; i < LC_ALL_INDEX; i++) {
+ do_setlocale_r(categories[i], setlocale(categories[i], NULL));
+ }
+
+# endif
+
+ was_in_global_locale = TRUE;
+ }
+
+#else
+ bool was_in_global_locale = TRUE;
+
+#endif
#ifdef USE_LOCALE_CTYPE
newlocale = do_setlocale_c(LC_CTYPE, NULL);
@@ -4137,6 +5117,7 @@ Perl_sync_locale(pTHX)
#endif /* USE_LOCALE_NUMERIC */
+ return was_in_global_locale;
}
#if defined(DEBUGGING) && defined(USE_LOCALE)
@@ -4192,6 +5173,58 @@ S_setlocale_debug_string(const int category, /* category number,
#endif
+void
+Perl_thread_locale_init()
+{
+ /* Called from a thread on startup*/
+
+#ifdef USE_THREAD_SAFE_LOCALE
+
+ dTHX_DEBUGGING;
+
+ /* C starts the new thread in the global C locale. If we are thread-safe,
+ * we want to not be in the global locale */
+
+ DEBUG_L(PerlIO_printf(Perl_debug_log,
+ "%s:%d: new thread, initial locale is %s; calling setlocale\n",
+ __FILE__, __LINE__, setlocale(LC_ALL, NULL)));
+
+# ifdef WIN32
+
+ _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
+
+# else
+
+ Perl_setlocale(LC_ALL, "C");
+
+# endif
+#endif
+
+}
+
+void
+Perl_thread_locale_term()
+{
+ /* Called from a thread as it gets ready to terminate */
+
+#ifdef USE_THREAD_SAFE_LOCALE
+
+ /* C starts the new thread in the global C locale. If we are thread-safe,
+ * we want to not be in the global locale */
+
+# ifndef WIN32
+
+ { /* Free up */
+ locale_t cur_obj = uselocale(LC_GLOBAL_LOCALE);
+ if (cur_obj != LC_GLOBAL_LOCALE) {
+ freelocale(cur_obj);
+ }
+ }
+
+# endif
+#endif
+
+}
/*
* ex: set ts=8 sts=4 sw=4 et:
diff --git a/makedef.pl b/makedef.pl
index aabdaa78ee..06c647fc2c 100644
--- a/makedef.pl
+++ b/makedef.pl
@@ -360,12 +360,14 @@ unless ($define{'USE_ITHREADS'}) {
++$skip{$_} foreach qw(
PL_keyword_plugin_mutex
PL_check_mutex
+ PL_curlocales
PL_op_mutex
PL_regex_pad
PL_regex_padav
PL_dollarzero_mutex
PL_hints_mutex
PL_locale_mutex
+ PL_lc_numeric_mutex
PL_my_ctx_mutex
PL_perlio_mutex
PL_stashpad
diff --git a/perl.c b/perl.c
index a638baeeb7..70dc823040 100644
--- a/perl.c
+++ b/perl.c
@@ -1146,7 +1146,14 @@ perl_destruct(pTHXx)
Safefree(PL_collation_name);
PL_collation_name = NULL;
#endif
-
+#if defined(USE_POSIX_2008_LOCALE) \
+ && defined(USE_THREAD_SAFE_LOCALE) \
+ && ! defined(HAS_QUERYLOCALE)
+ for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) {
+ Safefree(PL_curlocales[i]);
+ PL_curlocales[i] = NULL;
+ }
+#endif
#ifdef USE_LOCALE_NUMERIC
Safefree(PL_numeric_name);
PL_numeric_name = NULL;
diff --git a/perl.h b/perl.h
index 05ceff4ce5..1473721e68 100644
--- a/perl.h
+++ b/perl.h
@@ -759,7 +759,6 @@
#endif /* !NO_LOCALE && HAS_SETLOCALE */
#ifdef USE_LOCALE /* These locale things are all subject to change */
-
# if defined(HAS_NEWLOCALE) \
&& defined(LC_ALL_MASK) \
&& defined(HAS_FREELOCALE) \
@@ -772,6 +771,15 @@
# define HAS_POSIX_2008_LOCALE
# endif
+# if defined(USE_ITHREADS) \
+ && ( defined(HAS_POSIX_2008_LOCALE) \
+ || (defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1400)) \
+ && ! defined(NO_THREAD_SAFE_LOCALE)
+# define USE_THREAD_SAFE_LOCALE
+# ifdef HAS_POSIX_2008_LOCALE
+# define USE_POSIX_2008_LOCALE
+# endif
+# endif
#endif
#include <setjmp.h>
@@ -5556,20 +5564,101 @@ typedef struct am_table_short AMTS;
# define LOCALE_INIT
# define LOCALE_LOCK
# define LOCALE_UNLOCK
+# define LC_NUMERIC_LOCK(cond)
+# define LC_NUMERIC_UNLOCK
# define LOCALE_TERM STMT_START { _LOCALE_TERM_POSIX_2008; } STMT_END
-# else /* Below is do use threads */
-# define LOCALE_INIT MUTEX_INIT(&PL_locale_mutex)
-# define LOCALE_LOCK MUTEX_LOCK(&PL_locale_mutex)
-# define LOCALE_UNLOCK MUTEX_UNLOCK(&PL_locale_mutex)
+# else
+# define LOCALE_INIT STMT_START { \
+ MUTEX_INIT(&PL_locale_mutex); \
+ MUTEX_INIT(&PL_lc_numeric_mutex); \
+ } STMT_END
+
+/* This mutex is used to create critical sections where we want the LC_NUMERIC
+ * locale to be locked into either the C (standard) locale, or the underlying
+ * locale, so that other threads interrupting this one don't change it to the
+ * wrong state before we've had a chance to complete our operation. It can
+ * stay locked over an entire printf operation, for example. And so is made
+ * distinct from the LOCALE_LOCK mutex.
+ *
+ * This simulates kind of a general semaphore. The current thread will lock
+ * the mutex if the per-thread variable is zero, and then increments that
+ * variable. Each corresponding UNLOCK decrements the variable until it is 0,
+ * at which point it actually unlocks the mutex. Since the variable is
+ * per-thread, there is no race with other threads.
+ *
+ * The single argument is a condition to test for, and if true, to panic, as
+ * this would be an attempt to complement the LC_NUMERIC state, and we're not
+ * supposed to because it's locked */
+# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \
+ STMT_START { \
+ if (PL_lc_numeric_mutex_depth <= 0) { \
+ MUTEX_LOCK(&PL_lc_numeric_mutex); \
+ PL_lc_numeric_mutex_depth = 1; \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: locking lc_numeric; depth=1\n", \
+ __FILE__, __LINE__)); \
+ } \
+ else { \
+ PL_lc_numeric_mutex_depth++; \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: avoided lc_numeric_lock; depth=%d\n", \
+ __FILE__, __LINE__, PL_lc_numeric_mutex_depth)); \
+ if (cond_to_panic_if_already_locked) { \
+ Perl_croak_nocontext("panic: %s: %d: Trying to change" \
+ " LC_NUMERIC incompatibly", \
+ __FILE__, __LINE__); \
+ } \
+ } \
+ } STMT_END
+
+# define LC_NUMERIC_UNLOCK \
+ STMT_START { \
+ if (PL_lc_numeric_mutex_depth <= 1) { \
+ MUTEX_UNLOCK(&PL_lc_numeric_mutex); \
+ PL_lc_numeric_mutex_depth = 0; \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: unlocking lc_numeric; depth=0\n", \
+ __FILE__, __LINE__)); \
+ } \
+ else { \
+ PL_lc_numeric_mutex_depth--; \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: avoided lc_numeric_unlock; depth=%d\n", \
+ __FILE__, __LINE__, PL_lc_numeric_mutex_depth)); \
+ } \
+ } STMT_END
+
+/* This is used as a generic lock for locale operations. For example this is
+ * used when calling nl_langinfo() so that another thread won't zap the
+ * contents of its buffer before it gets saved; and it's called when changing
+ * the locale of LC_MESSAGES. On some systems the latter can cause the
+ * nl_langinfo buffer to be zapped under a race condition.
+ *
+ * If combined with LC_NUMERIC_LOCK, calls to this and its corresponding unlock
+ * should be contained entirely within the locked portion of LC_NUMERIC. This
+ * mutex should be used only in very short sections of code, while
+ * LC_NUMERIC_LOCK may span more operations. By always following this
+ * convention, deadlock should be impossible. But if necessary, the two
+ * mutexes could be combined */
+# define LOCALE_LOCK \
+ STMT_START { \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: locking locale\n", __FILE__, __LINE__)); \
+ MUTEX_LOCK(&PL_locale_mutex); \
+ } STMT_END
+# define LOCALE_UNLOCK \
+ STMT_START { \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: unlocking locale\n", __FILE__, __LINE__)); \
+ MUTEX_UNLOCK(&PL_locale_mutex); \
+ } STMT_END
+
# define LOCALE_TERM \
STMT_START { \
MUTEX_DESTROY(&PL_locale_mutex); \
+ MUTEX_DESTROY(&PL_lc_numeric_mutex); \
_LOCALE_TERM_POSIX_2008; \
} STMT_END
-# ifdef HAS_POSIX_2008_LOCALE
-# define USE_POSIX_2008_LOCALE
-# define USE_THREAD_SAFE_LOCALE
-# endif
# endif
/* Returns TRUE if the plain locale pragma without a parameter is in effect
@@ -5719,6 +5808,10 @@ argument list, like this:
The private variable is used to save the current locale state, so
that the requisite matching call to L</RESTORE_LC_NUMERIC> can restore it.
+On threaded perls not operating with thread-safe functionality, this macro uses
+a mutex to force a critical section. Therefore the matching RESTORE should be
+close by, and guaranteed to be called.
+
=for apidoc Am|void|STORE_LC_NUMERIC_SET_TO_NEEDED
This is used to help wrap XS or C code that that is C<LC_NUMERIC> locale-aware.
@@ -5749,6 +5842,10 @@ argument list, like this:
...
}
+On threaded perls not operating with thread-safe functionality, this macro uses
+a mutex to force a critical section. Therefore the matching RESTORE should be
+close by, and guaranteed to be called.
+
=for apidoc Am|void|RESTORE_LC_NUMERIC
This is used in conjunction with one of the macros
@@ -5798,65 +5895,99 @@ expression, but with an empty argument list, like this:
void (*_restore_LC_NUMERIC_function)(pTHX) = NULL
# define STORE_LC_NUMERIC_SET_TO_NEEDED() \
- if (IN_LC(LC_NUMERIC)) { \
- if (_NOT_IN_NUMERIC_UNDERLYING) { \
- Perl_set_numeric_underlying(aTHX); \
- _restore_LC_NUMERIC_function = &Perl_set_numeric_standard; \
- } \
- } \
- else { \
- if (_NOT_IN_NUMERIC_STANDARD) { \
- SET_NUMERIC_STANDARD(); \
- _restore_LC_NUMERIC_function = &Perl_set_numeric_underlying; \
- } \
- }
+ STMT_START { \
+ LC_NUMERIC_LOCK( \
+ (IN_LC(LC_NUMERIC) && _NOT_IN_NUMERIC_UNDERLYING) \
+ || _NOT_IN_NUMERIC_STANDARD); \
+ if (IN_LC(LC_NUMERIC)) { \
+ if (_NOT_IN_NUMERIC_UNDERLYING) { \
+ Perl_set_numeric_underlying(aTHX); \
+ _restore_LC_NUMERIC_function \
+ = &Perl_set_numeric_standard; \
+ } \
+ } \
+ else { \
+ if (_NOT_IN_NUMERIC_STANDARD) { \
+ Perl_set_numeric_standard(aTHX); \
+ _restore_LC_NUMERIC_function \
+ = &Perl_set_numeric_underlying; \
+ } \
+ } \
+ } STMT_END
# define RESTORE_LC_NUMERIC() \
- if (_restore_LC_NUMERIC_function) { \
- _restore_LC_NUMERIC_function(aTHX); \
- }
+ STMT_START { \
+ if (_restore_LC_NUMERIC_function) { \
+ _restore_LC_NUMERIC_function(aTHX); \
+ } \
+ LC_NUMERIC_UNLOCK; \
+ } STMT_END
/* The next two macros set unconditionally. These should be rarely used, and
* only after being sure that this is what is needed */
# define SET_NUMERIC_STANDARD() \
- STMT_START { if (_NOT_IN_NUMERIC_STANDARD) \
- Perl_set_numeric_standard(aTHX); \
- } STMT_END
+ STMT_START { \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: lc_numeric standard=%d\n", \
+ __FILE__, __LINE__, PL_numeric_standard)); \
+ Perl_set_numeric_standard(aTHX); \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: lc_numeric standard=%d\n", \
+ __FILE__, __LINE__, PL_numeric_standard)); \
+ } STMT_END
# define SET_NUMERIC_UNDERLYING() \
- STMT_START { if (_NOT_IN_NUMERIC_UNDERLYING) \
- Perl_set_numeric_underlying(aTHX); } STMT_END
+ STMT_START { \
+ if (_NOT_IN_NUMERIC_UNDERLYING) { \
+ Perl_set_numeric_underlying(aTHX); \
+ } \
+ } STMT_END
/* The rest of these LC_NUMERIC macros toggle to one or the other state, with
* the RESTORE_foo ones called to switch back, but only if need be */
# define STORE_LC_NUMERIC_SET_STANDARD() \
- if (_NOT_IN_NUMERIC_STANDARD) { \
- _restore_LC_NUMERIC_function = &Perl_set_numeric_underlying; \
- Perl_set_numeric_standard(aTHX); \
- }
+ STMT_START { \
+ LC_NUMERIC_LOCK(_NOT_IN_NUMERIC_STANDARD); \
+ if (_NOT_IN_NUMERIC_STANDARD) { \
+ _restore_LC_NUMERIC_function = &Perl_set_numeric_underlying;\
+ Perl_set_numeric_standard(aTHX); \
+ } \
+ } STMT_END
/* Rarely, we want to change to the underlying locale even outside of 'use
* locale'. This is principally in the POSIX:: functions */
# define STORE_LC_NUMERIC_FORCE_TO_UNDERLYING() \
- if (_NOT_IN_NUMERIC_UNDERLYING) { \
- Perl_set_numeric_underlying(aTHX); \
- _restore_LC_NUMERIC_function = &Perl_set_numeric_standard; \
- }
+ STMT_START { \
+ LC_NUMERIC_LOCK(_NOT_IN_NUMERIC_UNDERLYING); \
+ if (_NOT_IN_NUMERIC_UNDERLYING) { \
+ Perl_set_numeric_underlying(aTHX); \
+ _restore_LC_NUMERIC_function = &Perl_set_numeric_standard; \
+ } \
+ } STMT_END
/* Lock/unlock to the C locale until unlock is called. This needs to be
* recursively callable. [perl #128207] */
-# define LOCK_LC_NUMERIC_STANDARD() \
- (__ASSERT_(PL_numeric_standard) \
- PL_numeric_standard++)
-# define UNLOCK_LC_NUMERIC_STANDARD() \
- STMT_START { \
- if (PL_numeric_standard > 1) { \
- PL_numeric_standard--; \
- } \
- else { \
- assert(0); \
- } \
- } STMT_END
+# define LOCK_LC_NUMERIC_STANDARD() \
+ STMT_START { \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: lock lc_numeric_standard: new depth=%d\n", \
+ __FILE__, __LINE__, PL_numeric_standard + 1)); \
+ __ASSERT_(PL_numeric_standard) \
+ PL_numeric_standard++; \
+ } STMT_END
+
+# define UNLOCK_LC_NUMERIC_STANDARD() \
+ STMT_START { \
+ if (PL_numeric_standard > 1) { \
+ PL_numeric_standard--; \
+ } \
+ else { \
+ assert(0); \
+ } \
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, \
+ "%s: %d: lc_numeric_standard decrement lock, new depth=%d\n", \
+ __FILE__, __LINE__, PL_numeric_standard)); \
+ } STMT_END
#else /* !USE_LOCALE_NUMERIC */
diff --git a/perlapi.h b/perlapi.h
index c461593dae..b39c8ccd04 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -131,6 +131,8 @@ END_EXTERN_C
#define PL_keyword_plugin (*Perl_Gkeyword_plugin_ptr(NULL))
#undef PL_keyword_plugin_mutex
#define PL_keyword_plugin_mutex (*Perl_Gkeyword_plugin_mutex_ptr(NULL))
+#undef PL_lc_numeric_mutex
+#define PL_lc_numeric_mutex (*Perl_Glc_numeric_mutex_ptr(NULL))
#undef PL_locale_mutex
#define PL_locale_mutex (*Perl_Glocale_mutex_ptr(NULL))
#undef PL_malloc_mutex
diff --git a/perlvars.h b/perlvars.h
index 708badef07..be67a59988 100644
--- a/perlvars.h
+++ b/perlvars.h
@@ -100,6 +100,7 @@ PERLVARI(G, mmap_page_size, IV, 0)
#if defined(USE_ITHREADS)
PERLVAR(G, hints_mutex, perl_mutex) /* Mutex for refcounted he refcounting */
PERLVAR(G, locale_mutex, perl_mutex) /* Mutex for setlocale() changing */
+PERLVAR(G, lc_numeric_mutex, perl_mutex) /* Mutex for switching LC_NUMERIC */
#endif
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index b4e10fe113..00a33b60c2 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -31,8 +31,18 @@ here, but most should go in the L</Performance Enhancements> section.
This variable is 1 if the Perl interpreter is operating in an
environment where it is safe to use and change locales (see
-L<perllocale>.) Currently this is true only when the perl is
-unthreaded.
+L<perllocale>.) This variable is true when the perl is
+unthreaded, or compiled in a platform that supports thread-safe locale
+operation (see next item).
+
+=head2 Locales are now thread-safe on systems that support them
+
+These systems include Windows starting with Visual Studio 2005, and in
+POSIX 2008 systems.
+
+The implication is that you are now free to use locales and changes them
+in a threaded environment. Your changes affect only your thread.
+See L<perllocale/Multi-threaded operation>
=head1 Security
@@ -375,6 +385,11 @@ one.
A new API function L<perlapi/Perl_setlocale> has been added.
+=item *
+
+L<perlapi/sync_locale> has been revised to return a boolean as to
+whether the system was using the global locale or not.
+
=back
=head1 Selected Bug Fixes
@@ -397,6 +412,14 @@ but the failed sub-parse could leave partly parsed constructs on the
parser shift-reduce stack, confusing the parser, leading to perl
crashes. [perl #125351]
+=item *
+
+On threaded perls where the decimal point (radix) character is not a
+dot, it has been possible for a race to occur between threads when one
+needs to use the real radix character. This has now been fixed by use
+of a mutex on systems without thread-safe locales, and the problem just
+doesn't come up on those with thread-safe locales.
+
=back
=head1 Known Problems
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index b610dcf2a8..c726a631a6 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -7291,6 +7291,14 @@ a range. For these, what should happen isn't clear at all. In
these circumstances, Perl discards all but the first character
of the returned sequence, which is not likely what you want.
+=item Unknown locale category %d; can't set it to %s
+
+(W locale) You used a locale category that perl doesn't recognize, so it
+cannot carry out your request. Check that you are using a valid
+category. If so, see L<perllocale/Multi-threaded> for advice on
+reporting this as a bug, and for modifying perl locally to accommodate
+your needs.
+
=item Using /u for '%s' instead of /%s in regex; marked by S<<-- HERE> in m/%s/
(W regexp) You used a Unicode boundary (C<\b{...}> or C<\B{...}>) in a
diff --git a/pod/perlembed.pod b/pod/perlembed.pod
index 2196ad8094..d6391f7a26 100644
--- a/pod/perlembed.pod
+++ b/pod/perlembed.pod
@@ -1108,6 +1108,11 @@ C pre-processor symbol C<HAS_SKIP_LOCALE_INIT>. This allows code that
has to work with multiple Perl versions to do some sort of work-around
when confronted with an earlier Perl.
+If your program is using the POSIX 2008 multi-thread locale
+functionality, you should switch into the global locale and set that up
+properly before starting the Perl interpreter. It will then properly
+switch back to using the thread-safe functions.
+
=head1 Hiding Perl_
If you completely hide the short forms of the Perl public API,
diff --git a/pod/perllocale.pod b/pod/perllocale.pod
index 233aaeba1a..52266a114a 100644
--- a/pod/perllocale.pod
+++ b/pod/perllocale.pod
@@ -171,14 +171,17 @@ L</The setlocale function>.
=head2 The C<"use locale"> pragma
-WARNING! Do NOT use this pragma in scripts that have multiple
-L<threads|threads> active. The locale is not local to a single thread.
-Another thread may change the locale at any time, which could cause at a
-minimum that a given thread is operating in a locale it isn't expecting
-to be in. On some platforms, segfaults can also occur. The locale
-change need not be explicit; some operations cause perl to change the
-locale itself. You are vulnerable simply by having done a S<C<"use
-locale">>.
+Starting in Perl 5.28, this pragma may be used in
+L<multi-threaded|threads> applications on systems that have thread-safe
+locale ability. Some caveats apply, see L</Multi-threaded> below. On
+systems without this capability, or in earlier Perls, do NOT use this
+pragma in scripts that have multiple L<threads|threads> active. The
+locale in these cases is not local to a single thread. Another thread
+may change the locale at any time, which could cause at a minimum that a
+given thread is operating in a locale it isn't expecting to be in. On
+some platforms, segfaults can also occur. The locale change need not be
+explicit; some operations cause perl to change the locale itself. You
+are vulnerable simply by having done a S<C<"use locale">>.
By default, Perl itself (outside the L<POSIX> module)
ignores the current locale. The S<C<use locale>>
@@ -369,7 +372,7 @@ will be locale aware. Everything else is unaffected.
Since Perl doesn't currently do anything with the C<LC_MONETARY>
category, specifying C<:monetary> does effectively nothing. Some
systems have other categories, such as C<LC_PAPER>, but Perl
-also doesn't know anything about them, and there is no way to specify
+also doesn't do anything with them, and there is no way to specify
them in this pragma's arguments.
You can also easily say to use all categories but one, by either, for
@@ -407,12 +410,13 @@ this, as described in L</Unicode and UTF-8>.
=head2 The setlocale function
-WARNING! Do NOT use this function in a L<thread|threads>. The locale
-will change in all other threads at the same time, and should your
-thread get paused by the operating system, and another started, that
-thread will not have the locale it is expecting. On some platforms,
-there can be a race leading to segfaults if two threads call this
-function nearly simultaneously.
+WARNING! Prior to Perl 5.28 or on a system that does not support
+thread-safe locale operations, do NOT use this function in a
+L<thread|threads>. The locale will change in all other threads at the
+same time, and should your thread get paused by the operating system,
+and another started, that thread will not have the locale it is
+expecting. On some platforms, there can be a race leading to segfaults
+if two threads call this function nearly simultaneously.
You can switch locales as often as you wish at run time with the
C<POSIX::setlocale()> function:
@@ -485,9 +489,59 @@ If C<set_locale()> fails for some reason (for example, an attempt to set
to a locale unknown to the system), the locale for the category is not
changed, and the function returns C<undef>.
+Starting in Perl 5.28, on multi-threaded perls compiled on systems that
+implement POSIX 2008 thread-safe locale operations, this function
+doesn't actually call the system C<setlocale>. Instead those
+thread-safe operations are used to emulate the C<setlocale> function,
+but in a thread-safe manner.
For further information about the categories, consult L<setlocale(3)>.
+=head2 Multi-threaded operation
+
+Beginning in Perl 5.28, multi-threaded locale operation is supported on
+systems that implement either the POSIX 2008 or Windows-specific
+thread-safe locale operations. Many modern systems, such as various
+Unix variants and Darwin do have this.
+
+You can tell if using locales is safe on your system by looking at the
+read-only boolean variable C<${^SAFE_LOCALES}>. The value is 1 if the
+perl is not threaded, or if it is using thread-safe locale operations.
+
+Thread-safe operations are supported in Windows starting in Visual Studio
+2005, and in systems compatible with POSIX 2008. Some platforms claim
+to support POSIX 2008, but have buggy implementations, so that the hints
+files for compiling to run on them turn off attempting to use
+thread-safety. C<${^SAFE_LOCALES}> will be 0 on them.
+
+Be aware that writing a multi-threaded application will not be portable
+to a platform which lacks the native thread-safe locale support. On
+systems that do have it, you automatically get this behavior for
+threaded perls, without having to do anything. If for some reason, you
+don't want to use this capability (perhaps the POSIX 2008 support is
+buggy on your system), you can manually compile Perl to use the old
+non-thread-safe implementation by passing the argument
+C<-Accflags='-DNO_THREAD_SAFE_LOCALE'> to F<Configure>.
+Except on Windows, this will continue to use certain of the POSIX 2008
+functions in some situations. If these are buggy, you can pass the
+following to F<Configure> instead or additionally:
+C<-Accflags='-DNO_POSIX_2008_LOCALE'>. This will also keep the code
+from using thread-safe locales.
+C<${^SAFE_LOCALES}> will be 0 on systems that turn off the thread-safe
+operations.
+
+The initial program is started up using the locale specified from the
+environment, as currently, described in L</ENVIRONMENT>. All newly
+created threads start with C<LC_ALL> set to C<"C">>. Each thread may
+use C<POSIX::setlocale()> to query or switch its locale at any time,
+without affecting any other thread. All locale-dependent operations
+automatically use their thread's locale.
+
+This should be completely transparent to any applications written
+entirely in Perl (minus a few rarely encountered caveats given in the
+L</Multi-threaded> section). Information for XS module writers is given
+in L<perlxs/Locale-aware XS code>.
+
=head2 Finding locales
For locales available in your system, consult also L<setlocale(3)> to
@@ -1433,12 +1487,10 @@ the same way, "localization" is often abbreviated to B<l10n>.
=head2 An imperfect standard
Internationalization, as defined in the C and POSIX standards, can be
-criticized as incomplete, ungainly, and having too large a granularity.
-(Locales apply to a whole process, when it would arguably be more useful
-to have them apply to a single thread, window group, or whatever.) They
-also have a tendency, like standards groups, to divide the world into
-nations, when we all know that the world can equally well be divided
-into bankers, bikers, gamers, and so on.
+criticized as incomplete and ungainly. They also have a tendency, like
+standards groups, to divide the world into nations, when we all know
+that the world can equally well be divided into bankers, bikers, gamers,
+and so on.
=head1 Unicode and UTF-8
@@ -1609,6 +1661,50 @@ control, but doesn't. If two strings do collate identically, the one
containing the C<NUL> will sort to earlier. Prior to 5.26, there were
more bugs.
+=head2 Multi-threaded
+
+XS code or C-language libraries called from it that use the system
+L<C<setlocale(3)>> function (except on Windows) likely will not work
+from a multi-threaded application without changes. See
+L<perlxs/Locale-aware XS code>.
+
+An XS module that is locale-dependent could have been written under the
+assumption that it will never be called in a multi-threaded environment,
+and so uses other non-locale constructs that aren't multi-thread-safe.
+See L<perlxs/Thread-aware system interfaces>.
+
+POSIX does not define a way to get the name of the current per-thread
+locale. Some systems, such as Darwin and NetBSD do implement a
+function, L<querylocale(3)> to do this. On non-Windows systems without
+it, such as Linux, there are some additional caveats:
+
+=over
+
+=item *
+
+An embedded perl needs to be started up while the global locale is in
+effect. See L<perlembed/Using embedded Perl with POSIX locales>.
+
+=item *
+
+It becomes more important for perl to know about all the possible
+locale categories on the platform, even if they aren't apparently used
+in your program. Perl knows all of the Linux ones. If your platform
+has others, you can send email to L<mailto:perlbug@perl.org> for
+inclusion of it in the next release. In the meantime, it is possible to
+edit the Perl source to teach it about the category, and then recompile.
+Search for instances of, say, C<LC_PAPER> in the source, and use that as
+a template to add the omitted one.
+
+=item *
+
+It is possible, though hard to do, to call C<POSIX::setlocale> with a
+locale that it doesn't recognize as syntactically legal, but actually is
+legal on that system. This should happen only with embedded perls, or
+if you hand-craft a locale name yourself.
+
+=back
+
=head2 Broken systems
In certain systems, the operating system's locale support
diff --git a/pod/perlvar.pod b/pod/perlvar.pod
index 8f99c406fc..ba23771c8d 100644
--- a/pod/perlvar.pod
+++ b/pod/perlvar.pod
@@ -2264,7 +2264,11 @@ X<${^SAFE_LOCALES}>
Reflects if safe locale operations are available to this perl (when the
value is 1) or not (the value is 0). This variable is always 1 if the
-perl has been compiled without threads, and currently 0 otherwise.
+perl has been compiled without threads. It is also 1 if this perl is
+using thread-safe locale operations. Note that an individual thread may
+choose to use the global locale (generally unsafe) by calling
+L<perlapi/switch_to_global_locale>. This variable currently is still
+set to 1 in such threads.
This variable is read-only.
diff --git a/proto.h b/proto.h
index d35e295fba..80b9e240b5 100644
--- a/proto.h
+++ b/proto.h
@@ -2387,7 +2387,6 @@ PERL_CALLCONV CV * Perl_newXS_flags(pTHX_ const char *name, XSUBADDR_t subaddr,
PERL_CALLCONV CV * Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, XSUBADDR_t subaddr, const char *const filename, const char *const proto, SV **const_svp, U32 flags);
#define PERL_ARGS_ASSERT_NEWXS_LEN_FLAGS \
assert(subaddr)
-PERL_CALLCONV void Perl_new_numeric(pTHX_ const char* newcoll);
PERL_CALLCONV PERL_SI* Perl_new_stackinfo(pTHX_ I32 stitems, I32 cxitems)
__attribute__warn_unused_result__;
@@ -3514,7 +3513,8 @@ PERL_CALLCONV SV* Perl_swash_init(pTHX_ const char* pkg, const char* name, SV* l
#define PERL_ARGS_ASSERT_SWASH_INIT \
assert(pkg); assert(name); assert(listsv)
-PERL_CALLCONV void Perl_sync_locale(pTHX);
+PERL_CALLCONV void Perl_switch_to_global_locale(void);
+PERL_CALLCONV bool Perl_sync_locale(void);
PERL_CALLCONV void Perl_sys_init(int* argc, char*** argv);
#define PERL_ARGS_ASSERT_SYS_INIT \
assert(argc); assert(argv)
@@ -3526,6 +3526,8 @@ PERL_CALLCONV void Perl_taint_env(pTHX);
PERL_CALLCONV void Perl_taint_proper(pTHX_ const char* f, const char *const s);
#define PERL_ARGS_ASSERT_TAINT_PROPER \
assert(s)
+PERL_CALLCONV void Perl_thread_locale_init(void);
+PERL_CALLCONV void Perl_thread_locale_term(void);
PERL_CALLCONV OP * Perl_tied_method(pTHX_ SV *methname, SV **sp, SV *const sv, const MAGIC *const mg, const U32 flags, U32 argc, ...);
#define PERL_ARGS_ASSERT_TIED_METHOD \
assert(methname); assert(sp); assert(sv); assert(mg)
@@ -4709,10 +4711,14 @@ STATIC void S_new_collate(pTHX_ const char* newcoll);
STATIC void S_new_ctype(pTHX_ const char* newctype);
#define PERL_ARGS_ASSERT_NEW_CTYPE \
assert(newctype)
+STATIC void S_new_numeric(pTHX_ const char* newnum);
STATIC void S_set_numeric_radix(pTHX_ const bool use_locale);
STATIC char* S_stdize_locale(pTHX_ char* locs);
#define PERL_ARGS_ASSERT_STDIZE_LOCALE \
assert(locs)
+# if defined(USE_POSIX_2008_LOCALE)
+STATIC const char* S_emulate_setlocale(const int category, const char* locale, unsigned int index, const bool is_index_valid);
+# endif
# if defined(WIN32)
STATIC char* S_win32_setlocale(pTHX_ int category, const char* locale);
# endif
diff --git a/sv.c b/sv.c
index 4377e95411..1aa5966489 100644
--- a/sv.c
+++ b/sv.c
@@ -15235,6 +15235,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_in_utf8_CTYPE_locale = proto_perl->Iin_utf8_CTYPE_locale;
PL_in_utf8_COLLATE_locale = proto_perl->Iin_utf8_COLLATE_locale;
my_strlcpy(PL_locale_utf8ness, proto_perl->Ilocale_utf8ness, sizeof(PL_locale_utf8ness));
+ PL_lc_numeric_mutex_depth = 0;
/* Unicode features (see perlrun/-C) */
PL_unicode = proto_perl->Iunicode;
@@ -15548,6 +15549,13 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_subname = sv_dup_inc(proto_perl->Isubname, param);
+#if defined(USE_POSIX_2008_LOCALE) \
+ && defined(USE_THREAD_SAFE_LOCALE) \
+ && ! defined(HAS_QUERYLOCALE)
+ for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) {
+ PL_curlocales[i] = savepv("."); /* An illegal value */
+ }
+#endif
#ifdef USE_LOCALE_CTYPE
/* Should we warn if uses locale? */
PL_warn_locale = sv_dup_inc(proto_perl->Iwarn_locale, param);
diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat
index 7545ccf888..78e0ec659d 100644
--- a/t/porting/known_pod_issues.dat
+++ b/t/porting/known_pod_issues.dat
@@ -240,6 +240,7 @@ provide
ptar(1)
ptargrep(1)
pwd_mkdb(8)
+querylocale(3)
RDF::Trine
read(2)
Readonly
diff --git a/vutil.c b/vutil.c
index 655fdc5047..a66d6ef2f4 100644
--- a/vutil.c
+++ b/vutil.c
@@ -618,43 +618,80 @@ VER_NV:
goto VER_PV;
}
#endif
-
#ifdef USE_LOCALE_NUMERIC
- {
- const char * const cur_numeric = setlocale(LC_NUMERIC, NULL);
- assert(cur_numeric);
-
- /* XS code can set the locale without us knowing. To protect the
- * version number parsing, which requires the radix character to be a
- * dot, update our records as to what the locale is, so that our
- * existing macro mechanism can correctly change it to a dot and back
- * if necessary. This code is extremely unlikely to be in a loop, so
- * the extra work will have a negligible performance impact. See [perl
- * #121930].
- *
- * If the current locale is a standard one, but we are expecting it to
- * be a different, underlying locale, update our records to make the
- * underlying locale this (standard) one. If the current locale is not
- * a standard one, we should be expecting a non-standard one, the same
- * one that we have recorded as the underlying locale. If not, update
- * our records. */
- if (strEQ(cur_numeric, "C") || strEQ(cur_numeric, "POSIX")) {
- if (! PL_numeric_standard) {
- new_numeric(cur_numeric);
- }
- }
- else if (PL_numeric_standard
- || ! PL_numeric_name
- || strNE(PL_numeric_name, cur_numeric))
- {
- new_numeric(cur_numeric);
- }
- }
-#endif
- { /* Braces needed because macro just below declares a variable */
+
+ {
+ /* This may or may not be called from code that has switched
+ * locales without letting perl know, therefore we have to find it
+ * from first principals. See [perl #121930]. */
+
+ /* In windows, or not threaded, or not thread-safe, if it isn't C,
+ * set it to C. */
+
+# ifndef USE_POSIX_2008_LOCALE
+
+ const char * locale_name_on_entry;
+
+ LC_NUMERIC_LOCK(0); /* Start critical section */
+
+ locale_name_on_entry = setlocale(LC_NUMERIC, NULL);
+ if ( strNE(locale_name_on_entry, "C")
+ && strNE(locale_name_on_entry, "POSIX"))
+ {
+ setlocale(LC_NUMERIC, "C");
+ }
+ else { /* This value indicates to the restore code that we didn't
+ change the locale */
+ locale_name_on_entry = NULL;
+ }
+
+# else
+
+ const locale_t locale_obj_on_entry = uselocale((locale_t) 0);
+ const char * locale_name_on_entry = NULL;
DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
- STORE_LC_NUMERIC_SET_STANDARD();
- LOCK_NUMERIC_STANDARD();
+
+ if (locale_obj_on_entry == LC_GLOBAL_LOCALE) {
+
+ /* in the global locale, we can call system setlocale and if it
+ * isn't C, set it to C. */
+ LC_NUMERIC_LOCK(0);
+
+ locale_name_on_entry = setlocale(LC_NUMERIC, NULL);
+ if ( strNE(locale_name_on_entry, "C")
+ && strNE(locale_name_on_entry, "POSIX"))
+ {
+ setlocale(LC_NUMERIC, "C");
+ }
+ else { /* This value indicates to the restore code that we
+ didn't change the locale */
+ locale_name_on_entry = NULL;
+ }
+ }
+ else if (locale_obj_on_entry == PL_underlying_numeric_obj) {
+ /* Here, the locale appears to have been changed to use the
+ * program's underlying locale. Just use our mechanisms to
+ * switch back to C. It might be possible for this pointer to
+ * actually refer to something else if it got released and
+ * reused somehow. But it doesn't matter, our mechanisms will
+ * work even so */
+ STORE_LC_NUMERIC_SET_STANDARD();
+ }
+ else if (locale_obj_on_entry != PL_C_locale_obj) {
+ /* The C object should be unchanged during a program's
+ * execution, so it should be safe to assume it means what it
+ * says, so if we are in it, no locale change is required.
+ * Otherwise, simply use the thread-safe operation. */
+ uselocale(PL_C_locale_obj);
+ }
+
+# endif
+
+ /* Prevent recursed calls from trying to change back */
+ LOCK_LC_NUMERIC_STANDARD();
+
+#endif
+
if (sv) {
Perl_sv_catpvf(aTHX_ sv, "%.9" NVff, SvNVX(ver));
len = SvCUR(sv);
@@ -664,9 +701,38 @@ VER_NV:
len = my_snprintf(tbuf, sizeof(tbuf), "%.9" NVff, SvNVX(ver));
buf = tbuf;
}
- UNLOCK_NUMERIC_STANDARD();
- RESTORE_LC_NUMERIC();
+
+#ifdef USE_LOCALE_NUMERIC
+
+ UNLOCK_LC_NUMERIC_STANDARD();
+
+# ifndef USE_POSIX_2008_LOCALE
+
+ if (locale_name_on_entry) {
+ setlocale(LC_NUMERIC, locale_name_on_entry);
+ }
+
+ LC_NUMERIC_UNLOCK; /* End critical section */
+
+# else
+
+ if (locale_name_on_entry) {
+ setlocale(LC_NUMERIC, locale_name_on_entry);
+ LC_NUMERIC_UNLOCK;
+ }
+ else if (locale_obj_on_entry == PL_underlying_numeric_obj) {
+ RESTORE_LC_NUMERIC();
+ }
+ else if (locale_obj_on_entry != PL_C_locale_obj) {
+ uselocale(locale_obj_on_entry);
+ }
+
+# endif
+
}
+
+#endif /* USE_LOCALE_NUMERIC */
+
while (buf[len-1] == '0' && len > 0) len--;
if ( buf[len-1] == '.' ) len--; /* eat the trailing decimal */
version = savepvn(buf, len);
diff --git a/vutil.h b/vutil.h
index e291408db6..193c66d561 100644
--- a/vutil.h
+++ b/vutil.h
@@ -223,21 +223,28 @@ const char * Perl_prescan_version(pTHX_ const char *s, bool strict, const char**
#endif
-#if PERL_VERSION_LT(5,19,0)
-# undef STORE_NUMERIC_LOCAL_SET_STANDARD
-# undef RESTORE_NUMERIC_LOCAL
-# ifdef USE_LOCALE
-# define STORE_NUMERIC_LOCAL_SET_STANDARD()\
- char *loc = savepv(setlocale(LC_NUMERIC, NULL)); \
+#if PERL_VERSION_LT(5,27,9)
+# define LC_NUMERIC_LOCK
+# define LC_NUMERIC_UNLOCK
+# if PERL_VERSION_LT(5,19,0)
+# undef STORE_LC_NUMERIC_SET_STANDARD
+# undef RESTORE_LC_NUMERIC
+# undef DECLARATION_FOR_LC_NUMERIC_MANIPULATION
+# ifdef USE_LOCALE
+# define DECLARATION_FOR_LC_NUMERIC_MANIPULATION char *loc
+# define STORE_NUMERIC_SET_STANDARD()\
+ loc = savepv(setlocale(LC_NUMERIC, NULL)); \
SAVEFREEPV(loc); \
setlocale(LC_NUMERIC, "C");
-# define RESTORE_NUMERIC_LOCAL()\
+# define RESTORE_LC_NUMERIC()\
setlocale(LC_NUMERIC, loc);
-# else
-# define STORE_NUMERIC_LOCAL_SET_STANDARD()
-# define RESTORE_NUMERIC_LOCAL()
-# endif
+# else
+# define DECLARATION_FOR_LC_NUMERIC_MANIPULATION
+# define STORE_LC_NUMERIC_SET_STANDARD()
+# define RESTORE_LC_NUMERIC()
+# endif
+# endif
#endif
#ifndef LOCK_NUMERIC_STANDARD