locale.c: Comment clarifications, white space

Some of these are to make future difference listings shorter Some of the changes look like incorrect indentation here, but anticipate future commits.
author: Karl Williamson <khw@cpan.org> 2021-03-17 11:13:56 -0600
committer: Karl Williamson <khw@cpan.org> 2022-06-19 13:29:35 -0600
commit: fe1c14942b81da8e421933b78e87d61c3cfbaafb (patch)
tree: 044fa23f55d96cd6f84c5afb6c8d0882510c00ba
parent: 5bdad801a7181c1f85b383d092faea9838f0cf64 (diff)
download: perl-fe1c14942b81da8e421933b78e87d61c3cfbaafb.tar.gz
1 files changed, 335 insertions, 208 deletions
diff --git a/locale.c b/locale.c
index 75b592abb2..8a4b28800b 100644
--- a/locale.c
+++ b/locale.c
@@ -38,9 +38,54 @@
  * it would be possible to emulate thread-safe locales, but this likely would
  * involve a lot of locale switching, and would require XS code changes.
  * Macros could be written so that the code wouldn't have to know which type of
- * system is being used.  It's unlikely that we would ever do that, since most
- * modern systems support thread-safe locales, but there was code written to
- * this end, and is retained, #ifdef'd out.
+ * system is being used.
+ *
+ * Table-driven code is used for simplicity and clarity, as many operations
+ * differ only in which category is being worked on.  However the system
+ * categories need not be small contiguous integers, so do not lend themselves
+ * to table lookup.  Instead we have created our own equivalent values which
+ * are all small contiguous non-negative integers, and translation functions
+ * between the two sets.  For category 'LC_foo', the name of our index is
+ * LC_foo_INDEX_.  Various parallel tables, indexed by these, are used.
+ *
+ * Many of the macros and functions in this file have one of the suffixes '_c',
+ * '_r', or '_i'.  khw found these useful in remembering what type of locale
+ * category to use as their parameter.  '_r' takes an int category number as
+ * passed to setlocale(), like LC_ALL, LC_CTYPE, etc.  The 'r' indicates that
+ * the value isn't known until runtime.  '_c' also indicates such a category
+ * number, but its value is known at compile time.  These are both converted
+ * into unsigned indexes into various tables of category information, where the
+ * real work is generally done.  The tables are generated at compile-time based
+ * on platform characteristics and Configure options.  They hide from the code
+ * many of the vagaries of the different locale implementations out there.  You
+ * may have already guessed that '_i' indicates the parameter is such an
+ * unsigned index.  Converting from '_r' to '_i' requires run-time lookup.
+ * '_c' is used to get cpp to do this at compile time.  To avoid the runtime
+ * expense, the code is structured to use '_r' at the API level, and once
+ * converted, everything possible is done using the table indexes.
+ *
+ * On unthreaded perls, most operations expand out to just the basic
+ * setlocale() calls.  The same is true on threaded perls on modern Windows
+ * systems where the same API, after set up, is used for thread-safe locale
+ * handling.  On other systems, there is a completely different API, specified
+ * in POSIX 2008, to do thread-safe locales.  On these systems, our
+ * emulate_setlocale_i() function is used to hide the different API from the
+ * outside.  This makes it completely transparent to most XS code.
+ *
+ * A huge complicating factor is that the LC_NUMERIC category is normally held
+ * in the C locale, except during those relatively rare times when it needs to
+ * be in the underlying locale.  There is a bunch of code to accomplish this,
+ * and to allow easy switches from one state to the other.
+ *
+ * z/OS (os390) is an outlier.  Locales really don't work under threads when
+ * either the radix character isn't a dot, or attempts are made to change
+ * locales after the first thread is created.  The reason is that IBM has made
+ * it thread-safe by refusing to change locales (returning failure if
+ * attempted) any time after an application has called pthread_create() to
+ * create another thread.  The expectation is that an application will set up
+ * its locale information before the first fork, and be stable thereafter.  But
+ * perl toggles LC_NUMERIC if the locale's radix character isn't a dot, as do
+ * the other toggles, which are less common.
  */
 
 /* If the environment says to, we can output debugging information during
@@ -96,8 +141,8 @@ static int debug_initialization = 0;
 /* This code keeps a LRU cache of the UTF-8ness of the locales it has so-far
  * looked up.  This is in the form of a C string:  */
 
-#define UTF8NESS_SEP     "\v"
-#define UTF8NESS_PREFIX  "\f"
+#  define UTF8NESS_SEP     "\v"
+#  define UTF8NESS_PREFIX  "\f"
 
 /* So, the string looks like:
  *
@@ -109,7 +154,7 @@ static int debug_initialization = 0;
 STATIC_ASSERT_DECL(STRLENs(UTF8NESS_SEP) == 1);
 STATIC_ASSERT_DECL(STRLENs(UTF8NESS_PREFIX) == 1);
 
-#define C_and_POSIX_utf8ness    UTF8NESS_SEP "C"     UTF8NESS_PREFIX "0"    \
+#  define C_and_POSIX_utf8ness    UTF8NESS_SEP "C"     UTF8NESS_PREFIX "0"    \
                                 UTF8NESS_SEP "POSIX" UTF8NESS_PREFIX "0"
 
 /* The cache is initialized to C_and_POSIX_utf8ness at start up.  These are
@@ -166,9 +211,10 @@ S_stdize_locale(pTHX_ char *locs)
     return locs;
 }
 
-/* Two parallel arrays; first the locale categories Perl uses on this system;
- * the second array is their names.  These arrays are in mostly arbitrary
- * order. */
+/* Two parallel arrays indexed by our mapping of category numbers into small
+ * non-negative indexes; first the locale categories Perl uses on this system,
+ * used to do the inverse mapping.  The second array is their names.  These
+ * arrays are in mostly arbitrary order. */
 
 STATIC const int categories[] = {
 
@@ -277,13 +323,11 @@ STATIC const char * const category_names[] = {
     /* On systems with LC_ALL, it is kept in the highest index position.  (-2
      * to account for the final unused placeholder element.) */
 #    define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 2)
-
 #  else
 
     /* On systems without LC_ALL, we pretend it is there, one beyond the real
      * top element, hence in the unused placeholder element. */
 #    define NOMINAL_LC_ALL_INDEX (C_ARRAY_LENGTH(categories) - 1)
-
 #  endif
 
 /* Pretending there is an LC_ALL element just above allows us to avoid most
@@ -363,7 +407,8 @@ S_category_name(const int category)
 
 #endif /* ifdef USE_LOCALE */
 
-/* Windows requres a customized base-level setlocale() */
+/* porcelain_setlocale() presents a consistent POSIX-compliant interface to
+ * setlocale().   Windows requres a customized base-level setlocale() */
 #ifdef WIN32
 #  define porcelain_setlocale(cat, locale) win32_setlocale(cat, locale)
 #else
@@ -371,22 +416,29 @@ S_category_name(const int category)
                                 ((const char *) setlocale(cat, locale))
 #endif
 
+/* In contrast, the do_setlocale() macros are our added layers upon the base
+ * setlocale.  These are used to present a uniform API to the rest of the code
+ * in this file in spite of the disparate underlying implementations. */
+
 #ifndef USE_POSIX_2008_LOCALE
 
-/* "do_setlocale_c" is intended to be called when the category is a constant
- * known at compile time; "do_setlocale_r", not known until run time  */
+/* For non-threaded perls (which we are not to use the POSIX 2008 API on), or a
+ * thread-safe Windows one in which threading is invisible to us, the added
+ * layer just calls the base-level functions.  See the introductory comments in
+ * this file for the meaning of the suffixes '_c', '_r', '_i'. */
+
 #  define do_setlocale_c(cat, locale)  porcelain_setlocale(cat, locale)
 #  define do_setlocale_r(cat, locale)  porcelain_setlocale(cat, locale)
 #  define FIX_GLIBC_LC_MESSAGES_BUG(i)
 
 #else   /* Below uses POSIX 2008 */
 
-/* We emulate setlocale with our own function.  LC_foo is not valid for the
- * POSIX 2008 functions.  Instead LC_foo_MASK is used, which we use an array
- * lookup to convert to.  At compile time we have defined LC_foo_INDEX_ as the
- * proper offset into the array 'category_masks[]'.  At runtime, we have to
- * search through the array (as the actual numbers may not be small contiguous
- * positive integers which would lend themselves to array lookup). */
+/* Here, there is a completely different API to get thread-safe locales.  We
+ * emulate the setlocale() API with our own function(s).  setlocale categories,
+ * like LC_NUMERIC, are not valid here for the POSIX 2008 API.  Instead, there
+ * are equivalents, like LC_NUMERIC_MASK, which we use instead, converting to
+ * by using get_category_index() followed by table lookup. */
+
 #  define do_setlocale_c(cat, locale)                                       \
                         emulate_setlocale_i(cat ## _INDEX_, locale)
 #  define do_setlocale_r(cat, locale)                                       \
@@ -460,7 +512,7 @@ STATIC const int category_masks[] = {
    /* Placeholder as a precaution if code fails to check the return of
     * get_category_index(), which returns this element to indicate an error */
                                 0
-                            };
+};
 
 STATIC const char *
 S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
@@ -552,9 +604,12 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
             char * temp_name = nl_langinfo_l(_NL_LOCALE_NAME(category),
                                              uselocale((locale_t) 0));
             /*
-            PerlIO_printf(Perl_debug_log, "%s:%d: temp_name=%s\n", __FILE__, __LINE__, temp_name ? temp_name : "NULL");
-            PerlIO_printf(Perl_debug_log, "%s:%d: index=%d\n", __FILE__, __LINE__, index);
-            PerlIO_printf(Perl_debug_log, "%s:%d: PL_curlocales[index]=%s\n", __FILE__, __LINE__, PL_curlocales[index]);
+        PerlIO_printf(Perl_debug_log, "%s:%d: temp_name=%s\n",
+                      __FILE__, __LINE__, temp_name ? temp_name : "NULL");
+        PerlIO_printf(Perl_debug_log, "%s:%d: index=%d\n",
+                      __FILE__, __LINE__, index);
+        PerlIO_printf(Perl_debug_log, "%s:%d: PL_curlocales[index]=%s\n",
+                        __FILE__, __LINE__, PL_curlocales[index]);
             */
             if (temp_name && PL_curlocales[index] && strNE(temp_name, "")) {
                 if (         strNE(PL_curlocales[index], temp_name)
@@ -579,9 +634,7 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
 
 #    endif
 
-        /* Without querylocale(), we have to use our record-keeping we've
-         *  done. */
-
+    /* Without querylocale(), we have to use our record-keeping we've done. */
         if (category != LC_ALL) {
 
             DEBUG_Lv(PerlIO_printf(Perl_debug_log,
@@ -629,8 +682,8 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
             }
 
             /* If they are the same, we don't actually have to construct the
-             * string; we just make the entry in LC_ALL_INDEX_ valid, and be
-             * that single name */
+         * string; we just make the entry in LC_ALL_INDEX_ valid, and be that
+         * single name */
             if (are_all_categories_the_same_locale) {
                 PL_curlocales[LC_ALL_INDEX_] = savepv(PL_curlocales[0]);
                 return PL_curlocales[LC_ALL_INDEX_];
@@ -676,7 +729,15 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
          * documented behavior (but if that differs from the actual behavior,
          * this won't work exactly as the OS implements).  We go out and
          * examine the environment based on our understanding of how the system
-         * works, and use that to figure things out */
+         * works, and use that to figure things out.
+         *
+         * Another option would be to toggle to the global locale, and do a
+         * straight setlocale(LC_ALL, "").  But that could cause races with any
+         * other thread that has also switched.  That's probably a rare event,
+         * and we could have a global boolean that indicates if any thread has
+         * switched, but we'd still need the following backup code anyway.  The
+         * only real reason to make the switch is because some alien library
+         * that can't be changed, like GTk, is doing its own setlocales, */
 
         const char * const lc_all = PerlEnv_getenv("LC_ALL");
 
@@ -879,10 +940,10 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
     assert(PL_C_locale_obj);
 
     /* Switching locales generally entails freeing the current one's space (at
-     * the C library's discretion).  We need to stop using that locale before
-     * the switch.  So switch to a known locale object that we don't otherwise
-     * mess with.  This returns the locale object in effect at the time of the
-     * switch. */
+     * the C library's discretion), hence we can't be using that locale at the
+     * time of the switch (this wasn't obvious to khw from the man pages).  So
+     * switch to a known locale object that we don't otherwise mess with; the
+     * function returns the locale object in effect prior to the switch. */
     old_obj = uselocale(PL_C_locale_obj);
 
     DEBUG_Lv(PerlIO_printf(Perl_debug_log,
@@ -903,8 +964,8 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
              "%s:%d: emulate_setlocale_i now using %p\n",
              __FILE__, __LINE__, PL_C_locale_obj));
 
-    /* If this call is to switch to the LC_ALL C locale, it already exists, and
-     * in fact, we already have switched to it (in preparation for what
+    /* If this call is to switch LC_ALL to the 'C' locale, it already exists,
+     * and in fact, we already have switched to it (in preparation for what
      * normally is to come).  But since we're already there, continue to use
      * it instead of trying to create a new locale */
     if (mask == LC_ALL_MASK && isNAME_C_OR_POSIX(locale)) {
@@ -926,7 +987,7 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
          * which uses 'old_obj', uses an empty one.  Same for our reserved C
          * object.  The latter is defensive coding, so that, even if there is
          * some bug, we will never end up trying to modify either of these, as
-         * if passed to newlocale(), they can be. */
+         * newlocale() just below would otherwise do. */
         if (old_obj == LC_GLOBAL_LOCALE || old_obj == PL_C_locale_obj) {
             old_obj = (locale_t) 0;
         }
@@ -942,11 +1003,9 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
                     " failed: %d\n", __FILE__, __LINE__, GET_ERRNO));
 
             if (! uselocale(old_obj)) {
-
                 DEBUG_L(PerlIO_printf(Perl_debug_log,
                                   "%s:%d: switching back failed: %d\n",
                         __FILE__, __LINE__, GET_ERRNO));
-
             }
             RESTORE_ERRNO;
             return NULL;
@@ -982,6 +1041,7 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
         }
     }
 
+    /* Here, we are using 'new_obj' which matches the input 'locale'. */
     DEBUG_Lv(PerlIO_printf(Perl_debug_log,
              "%s:%d: emulate_setlocale_i now using %p\n",
              __FILE__, __LINE__, new_obj));
@@ -1021,7 +1081,9 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
         /* For a single category, if it's not the same as the one in LC_ALL, we
          * nullify LC_ALL */
 
-        if (PL_curlocales[LC_ALL_INDEX_] && strNE(PL_curlocales[LC_ALL_INDEX_], locale)) {
+        if (PL_curlocales[LC_ALL_INDEX_] && strNE(PL_curlocales[LC_ALL_INDEX_],
+                                                  locale))
+        {
             Safefree(PL_curlocales[LC_ALL_INDEX_]);
             PL_curlocales[LC_ALL_INDEX_] = NULL;
         }
@@ -1038,7 +1100,8 @@ S_emulate_setlocale_i(pTHX_ const unsigned int index, const char * locale)
     return locale;
 }
 
-#endif /* USE_POSIX_2008_LOCALE */
+#endif   /* End of the various implementations of the do_setlocale and
+            my_querylocale macros used in the remainder of this program */
 
 #ifdef USE_LOCALE
 
@@ -1048,7 +1111,7 @@ S_set_numeric_radix(pTHX_ const bool use_locale)
     /* If 'use_locale' is FALSE, set to use a dot for the radix character.  If
      * TRUE, use the radix character derived from the current locale */
 
-#if defined(USE_LOCALE_NUMERIC) && (   defined(HAS_LOCALECONV)              \
+#  if defined(USE_LOCALE_NUMERIC) && (   defined(HAS_LOCALECONV)              \
                                     || defined(HAS_NL_LANGINFO))
 
     const char * radix = (use_locale)
@@ -1070,11 +1133,11 @@ S_set_numeric_radix(pTHX_ const bool use_locale)
     DEBUG_L(PerlIO_printf(Perl_debug_log, "Locale radix is '%s', ?UTF-8=%d\n",
                                            SvPVX(PL_numeric_radix_sv),
                                            cBOOL(SvUTF8(PL_numeric_radix_sv))));
-#else
+#  else
 
     PERL_UNUSED_ARG(use_locale);
 
-#endif /* USE_LOCALE_NUMERIC and can find the radix char */
+#  endif /* USE_LOCALE_NUMERIC and can find the radix char */
 
 }
 
@@ -1082,15 +1145,18 @@ STATIC void
 S_new_numeric(pTHX_ const char *newnum)
 {
 
-#ifndef USE_LOCALE_NUMERIC
+#  ifndef USE_LOCALE_NUMERIC
 
     PERL_UNUSED_ARG(newnum);
 
-#else
+#  else
 
     /* Called after each libc setlocale() call affecting LC_NUMERIC, to tell
-     * core Perl this and that 'newnum' is the name of the new locale.
-     * It installs this locale as the current underlying default.
+     * core Perl this and that 'newnum' is the name of the new locale, and we
+     * are switched into it.  It installs this locale as the current underlying
+     * default, and then switches to the C locale, if necessary, so that the
+     * code that has traditionally expected the radix character to be a dot may
+     * continue to do so.
      *
      * The default locale and the C locale can be toggled between by use of the
      * set_numeric_underlying() and set_numeric_standard() functions, which
@@ -1098,8 +1164,8 @@ S_new_numeric(pTHX_ const char *newnum)
      * SET_NUMERIC_STANDARD() in perl.h.
      *
      * The toggling is necessary mainly so that a non-dot radix decimal point
-     * character can be output, while allowing internal calculations to use a
-     * dot.
+     * character can be input and output, while allowing internal calculations
+     * to use a dot.
      *
      * This sets several interpreter-level variables:
      * PL_numeric_name  The underlying locale's name: a copy of 'newnum'
@@ -1118,6 +1184,13 @@ S_new_numeric(pTHX_ const char *newnum)
      *                  variables are true at the same time.  (Toggling is a
      *                  no-op under these circumstances.)  This variable is
      *                  used to avoid having to recalculate.
+     * PL_numeric_radix_sv  Contains the string that code should use for the
+     *                  decimal point.  It is set to either a dot or the
+     *                  program's underlying locale's radix character string,
+     *                  depending on the situation.
+     * PL_underlying_numeric_obj = (only on POSIX 2008 platforms)  An object
+     *                  with everything set up properly so as to avoid work on
+     *                  such platforms.
      */
 
     char *save_newnum;
@@ -1135,7 +1208,7 @@ S_new_numeric(pTHX_ const char *newnum)
     PL_numeric_underlying = TRUE;
     PL_numeric_standard = isNAME_C_OR_POSIX(save_newnum);
 
-#ifndef TS_W32_BROKEN_LOCALECONV
+#    ifndef TS_W32_BROKEN_LOCALECONV
 
     /* If its name isn't C nor POSIX, it could still be indistinguishable from
      * them.  But on broken Windows systems calling my_nl_langinfo() for
@@ -1147,10 +1220,11 @@ S_new_numeric(pTHX_ const char *newnum)
                                  && strEQ("",  my_nl_langinfo(THOUSEP, FALSE)));
     }
 
-#endif
+#    endif
 
     /* Save the new name if it isn't the same as the previous one, if any */
     if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) {
+    /* Save the locale name for future use */
         Safefree(PL_numeric_name);
         PL_numeric_name = save_newnum;
     }
@@ -1162,19 +1236,21 @@ S_new_numeric(pTHX_ const char *newnum)
 
 #  ifdef USE_POSIX_2008_LOCALE
 
+    /* We keep a special object for easy switching to */
     PL_underlying_numeric_obj = newlocale(LC_NUMERIC_MASK,
                                           PL_numeric_name,
                                           PL_underlying_numeric_obj);
 
-#endif
+#    endif
 
     DEBUG_L( PerlIO_printf(Perl_debug_log,
                             "Called new_numeric with %s, PL_numeric_name=%s\n",
                             newnum, PL_numeric_name));
 
-    /* Keep LC_NUMERIC in the C locale.  This is for XS modules, so they don't
-     * have to worry about the radix being a non-dot.  (Core operations that
-     * need the underlying locale change to it temporarily). */
+    /* Keep LC_NUMERIC so that it has the C locale radix and thousands
+     * separator.  This is for XS modules, so they don't have to worry about
+     * the radix being a non-dot.  (Core operations that need the underlying
+     * locale change to it temporarily). */
     if (PL_numeric_standard) {
         set_numeric_radix(0);
     }
@@ -1182,7 +1258,7 @@ S_new_numeric(pTHX_ const char *newnum)
         set_numeric_standard();
     }
 
-#endif /* USE_LOCALE_NUMERIC */
+#  endif
 
 }
 
@@ -1190,13 +1266,15 @@ void
 Perl_set_numeric_standard(pTHX)
 {
 
-#ifdef USE_LOCALE_NUMERIC
+#  ifdef USE_LOCALE_NUMERIC
 
-    /* Toggle the LC_NUMERIC locale to C.  Most code should use the macros like
-     * SET_NUMERIC_STANDARD() in perl.h instead of calling this directly.  The
-     * macro avoids calling this routine if toggling isn't necessary according
-     * to our records (which could be wrong if some XS code has changed the
-     * locale behind our back) */
+    /* Unconditionally toggle the LC_NUMERIC locale to the current underlying
+     * default.
+     *
+     * Most code should use the macro SET_NUMERIC_STANDARD() in perl.h
+     * instead of calling this directly.  The macro avoids calling this routine
+     * if toggling isn't necessary according to our records (which could be
+     * wrong if some XS code has changed the locale behind our back) */
 
     DEBUG_L(PerlIO_printf(Perl_debug_log,
                                   "Setting LC_NUMERIC locale to standard C\n"));
@@ -1206,7 +1284,7 @@ Perl_set_numeric_standard(pTHX)
     PL_numeric_underlying = PL_numeric_underlying_is_standard;
     set_numeric_radix(0);
 
-#endif /* USE_LOCALE_NUMERIC */
+#  endif /* USE_LOCALE_NUMERIC */
 
 }
 
@@ -1214,10 +1292,12 @@ void
 Perl_set_numeric_underlying(pTHX)
 {
 
-#ifdef USE_LOCALE_NUMERIC
+#  ifdef USE_LOCALE_NUMERIC
 
-    /* Toggle the LC_NUMERIC locale to the current underlying default.  Most
-     * code should use the macros like SET_NUMERIC_UNDERLYING() in perl.h
+    /* Unconditionally toggle the LC_NUMERIC locale to the current underlying
+     * default.
+     *
+     * Most code should use the macro SET_NUMERIC_UNDERLYING() in perl.h
      * instead of calling this directly.  The macro avoids calling this routine
      * if toggling isn't necessary according to our records (which could be
      * wrong if some XS code has changed the locale behind our back) */
@@ -1230,7 +1310,7 @@ Perl_set_numeric_underlying(pTHX)
     PL_numeric_underlying = TRUE;
     set_numeric_radix(! PL_numeric_standard);
 
-#endif /* USE_LOCALE_NUMERIC */
+#  endif /* USE_LOCALE_NUMERIC */
 
 }
 
@@ -1241,12 +1321,12 @@ STATIC void
 S_new_ctype(pTHX_ const char *newctype)
 {
 
-#ifndef USE_LOCALE_CTYPE
+#  ifndef USE_LOCALE_CTYPE
 
     PERL_UNUSED_ARG(newctype);
     PERL_UNUSED_CONTEXT;
 
-#else
+#  else
 
     /* Called after each libc setlocale() call affecting LC_CTYPE, to tell
      * core Perl this and that 'newctype' is the name of the new locale.
@@ -1282,18 +1362,21 @@ S_new_ctype(pTHX_ const char *newctype)
         Copy(PL_fold_latin1, PL_fold_locale, 256, U8);
 
         /* UTF-8 locales can have special handling for 'I' and 'i' if they are
-         * Turkic.  Make sure these two are the only anomalies.  (We don't use
-         * towupper and towlower because they aren't in C89.) */
+         * Turkic.  Make sure these two are the only anomalies.  (We don't
+         * require towupper and towlower because they aren't in C89.) */
 
-#if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER)
+#    if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER)
 
-        if (towupper('i') == 0x130 && towlower('I') == 0x131) {
+        if (towupper('i') == 0x130 && towlower('I') == 0x131)
 
-#else
+#    else
 
-        if (toU8_UPPER_LC('i') == 'i' && toU8_LOWER_LC('I') == 'I') {
+        if (toU8_UPPER_LC('i') == 'i' && toU8_LOWER_LC('I') == 'I')
 
-#endif
+#    endif
+
+        {
+            /* This is how we determine it really is Turkic */
             check_for_problems = TRUE;
             maybe_utf8_turkic = TRUE;
         }
@@ -1528,7 +1611,8 @@ S_new_ctype(pTHX_ const char *newctype)
             if (IN_LC(LC_CTYPE) || UNLIKELY(DEBUG_L_TEST)) {
 
                 /* The '0' below suppresses a bogus gcc compiler warning */
-                Perl_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale), 0);
+                Perl_warner(aTHX_ packWARN(WARN_LOCALE), SvPVX(PL_warn_locale),
+                                                                            0);
 
                 if (IN_LC(LC_CTYPE)) {
                     SvREFCNT_dec_NN(PL_warn_locale);
@@ -1538,7 +1622,7 @@ S_new_ctype(pTHX_ const char *newctype)
         }
     }
 
-#endif /* USE_LOCALE_CTYPE */
+#  endif /* USE_LOCALE_CTYPE */
 
 }
 
@@ -1546,7 +1630,7 @@ void
 Perl__warn_problematic_locale()
 {
 
-#ifdef USE_LOCALE_CTYPE
+#  ifdef USE_LOCALE_CTYPE
 
     dTHX;
 
@@ -1562,7 +1646,7 @@ Perl__warn_problematic_locale()
         PL_warn_locale = NULL;
     }
 
-#endif
+#  endif
 
 }
 
@@ -1570,12 +1654,12 @@ STATIC void
 S_new_collate(pTHX_ const char *newcoll)
 {
 
-#ifndef USE_LOCALE_COLLATE
+#  ifndef USE_LOCALE_COLLATE
 
     PERL_UNUSED_ARG(newcoll);
     PERL_UNUSED_CONTEXT;
 
-#else
+#  else
 
     /* Called after each libc setlocale() call affecting LC_COLLATE, to tell
      * core Perl this and that 'newcoll' is the name of the new locale.
@@ -1778,11 +1862,11 @@ S_new_collate(pTHX_ const char *newcoll)
         }
     }
 
-#endif /* USE_LOCALE_COLLATE */
+#  endif /* USE_LOCALE_COLLATE */
 
 }
 
-#endif
+#endif  /* USE_LOCALE */
 
 #ifdef WIN32
 
@@ -2054,8 +2138,8 @@ Perl_setlocale(const int category, const char * locale)
         return retval;
     }
 
-    /* Now that have switched locales, we have to update our records to
-     * correspond. */
+    /* Now that have changed locales, we have to update our records to
+     * correspond.  Only certain categories have extra work to update. */
 
     switch (category) {
 
@@ -2122,10 +2206,11 @@ Perl_setlocale(const int category, const char * locale)
 }
 
 PERL_STATIC_INLINE const char *
-S_save_to_buffer(const char * string, char **buf, Size_t *buf_size, const Size_t offset)
+S_save_to_buffer(const char * string, char **buf, Size_t *buf_size,
+                 const Size_t offset)
 {
-    /* Copy the NUL-terminated 'string' to 'buf' + 'offset'.  'buf' has size 'buf_size',
-     * growing it if necessary */
+    /* Copy the NUL-terminated 'string' to 'buf' + 'offset'.  'buf' has size
+     * 'buf_size', growing it if necessary */
 
     Size_t string_size;
 
@@ -2262,27 +2347,30 @@ Perl_langinfo(const int item)
 }
 
 STATIC const char *
-#ifdef HAS_NL_LANGINFO
+#  ifdef HAS_NL_LANGINFO
 S_my_nl_langinfo(const nl_item item, bool toggle)
-#else
+#  else
 S_my_nl_langinfo(const int item, bool toggle)
-#endif
+#  endif
 {
     dTHX;
     const char * retval;
 
-#ifdef USE_LOCALE_NUMERIC
+#  ifdef USE_LOCALE_NUMERIC
 
     /* We only need to toggle into the underlying LC_NUMERIC locale for these
      * two items, and only if not already there */
     if (toggle && ((   item != RADIXCHAR && item != THOUSEP)
                     || PL_numeric_underlying))
 
-#endif  /* No toggling needed if not using LC_NUMERIC */
+#  endif  /* No toggling needed if not using LC_NUMERIC */
 
         toggle = FALSE;
 
-#if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available.  */
+/*--------------------------------------------------------------------------*/
+/* Above is the common beginning to all the implementations of my_langinfo().
+ * Below are the various completions */
+#  if defined(HAS_NL_LANGINFO) /* nl_langinfo() is available.  */
 #  if   ! defined(HAS_THREAD_SAFE_NL_LANGINFO_L)      \
      || ! defined(USE_POSIX_2008_LOCALE)
 
@@ -2313,8 +2401,9 @@ S_my_nl_langinfo(const int item, bool toggle)
             RESTORE_LC_NUMERIC();
         }
     }
-
-#  else /* Use nl_langinfo_l(), avoiding both a mutex and changing the locale */
+/*--------------------------------------------------------------------------*/
+#    else /* Use nl_langinfo_l(), avoiding both a mutex and changing the
+             locale. */
 
     {
         bool do_free = FALSE;
@@ -2351,6 +2440,7 @@ S_my_nl_langinfo(const int item, bool toggle)
 
 #  endif
 
+    /* We can return 'yes' and 'no' even if we didn't get a result */
     if (strEQ(retval, "")) {
         if (item == YESSTR) {
             return "yes";
@@ -2361,8 +2451,8 @@ S_my_nl_langinfo(const int item, bool toggle)
     }
 
     return retval;
-
-#else   /* Below, emulate nl_langinfo as best we can */
+/*--------------------------------------------------------------------------*/
+#  else   /* Below, emulate nl_langinfo as best we can */
 
     {
 
@@ -2505,6 +2595,7 @@ S_my_nl_langinfo(const int item, bool toggle)
 #    endif
 
                 lc = localeconv();
+
                 if (   ! lc
                     || ! lc->currency_symbol
                     || strEQ("", lc->currency_symbol))
@@ -2563,7 +2654,8 @@ S_my_nl_langinfo(const int item, bool toggle)
                 if (needed_size >= (int) PL_langinfo_bufsize) {
                     PL_langinfo_bufsize = needed_size + 1;
                     Renew(PL_langinfo_buf, PL_langinfo_bufsize, char);
-                    needed_size = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize,
+            needed_size
+                    = my_snprintf(PL_langinfo_buf, PL_langinfo_bufsize,
                                              "%.1f", 1.5);
                     assert(needed_size < (int) PL_langinfo_bufsize);
                 }
@@ -2590,7 +2682,8 @@ S_my_nl_langinfo(const int item, bool toggle)
                 }
                 else {
                     *ptr = '\0';
-                    Move(item_start, PL_langinfo_buf, ptr - PL_langinfo_buf, char);
+            Move(item_start, PL_langinfo_buf, ptr - PL_langinfo_buf,
+                                                                char);
                 }
 
                 if (toggle) {
@@ -2631,7 +2724,8 @@ S_my_nl_langinfo(const int item, bool toggle)
                  * thousands separator.  It needs to handle UTF-16 vs -8
                  * issues. */
 
-                needed_size = GetNumberFormatEx(PL_numeric_name, 0, "1234.5", NULL, PL_langinfo_buf, PL_langinfo_bufsize);
+        needed_size = GetNumberFormatEx(PL_numeric_name, 0, "1234.5",
+                            NULL, PL_langinfo_buf, PL_langinfo_bufsize);
                 DEBUG_L(PerlIO_printf(Perl_debug_log,
                     "%s: %d: return from GetNumber, count=%d, val=%s\n",
                     __FILE__, __LINE__, needed_size, PL_langinfo_buf));
@@ -2861,33 +2955,29 @@ S_my_nl_langinfo(const int item, bool toggle)
 
                 /* Here, we got a result.
                  *
-                 * If the item is 'ALT_DIGITS', PL_langinfo_buf contains the
-                 * alternate format for wday 0.  If the value is the same as
-                 * the normal 0, there isn't an alternate, so clear the buffer.
-                 * */
-                if (   item == ALT_DIGITS
-                    && strEQ(PL_langinfo_buf, "0"))
-                {
+             * If the item is 'ALT_DIGITS', 'PL_langinfo_buf' contains the
+             * alternate format for wday 0.  If the value is the same as the
+             * normal 0, there isn't an alternate, so clear the buffer.  */
+            if (item == ALT_DIGITS && strEQ(PL_langinfo_buf, "0")) {
                     *PL_langinfo_buf = '\0';
                 }
 
                 /* ALT_DIGITS is problematic.  Experiments on it showed that
-                 * strftime() did not always work properly when going from
-                 * alt-9 to alt-10.  Only a few locales have this item defined,
-                 * and in all of them on Linux that khw was able to find,
-                 * nl_langinfo() merely returned the alt-0 character, possibly
-                 * doubled.  Most Unicode digits are in blocks of 10
-                 * consecutive code points, so that is sufficient information
-                 * for those scripts, as we can infer alt-1, alt-2, ....  But
-                 * for a Japanese locale, a CJK ideographic 0 is returned, and
-                 * the CJK digits are not in code point order, so you can't
-                 * really infer anything.  The localedef for this locale did
-                 * specify the succeeding digits, so that strftime() works
-                 * properly on them, without needing to infer anything.  But
-                 * the nl_langinfo() return did not give sufficient information
-                 * for the caller to understand what's going on.  So until
-                 * there is evidence that it should work differently, this
-                 * returns the alt-0 string for ALT_DIGITS.
+             * strftime() did not always work properly when going from alt-9 to
+             * alt-10.  Only a few locales have this item defined, and in all
+             * of them on Linux that khw was able to find, nl_langinfo() merely
+             * returned the alt-0 character, possibly doubled.  Most Unicode
+             * digits are in blocks of 10 consecutive code points, so that is
+             * sufficient information for such scripts, as we can infer alt-1,
+             * alt-2, ....  But for a Japanese locale, a CJK ideographic 0 is
+             * returned, and the CJK digits are not in code point order, so you
+             * can't really infer anything.  The localedef for this locale did
+             * specify the succeeding digits, so that strftime() works properly
+             * on them, without needing to infer anything.  But the
+             * nl_langinfo() return did not give sufficient information for the
+             * caller to understand what's going on.  So until there is
+             * evidence that it should work differently, this returns the alt-0
+             * string for ALT_DIGITS.
                  *
                  * wday was chosen because its range is all a single digit.
                  * Things like tm_sec have two digits as the minimum: '00' */
@@ -2895,8 +2985,8 @@ S_my_nl_langinfo(const int item, bool toggle)
                 retval = PL_langinfo_buf;
 
                 /* If to return the format, not the value, overwrite the buffer
-                 * with it.  But some strftime()s will keep the original format
-                 * if illegal, so change those to "" */
+             * with it.  But some strftime()s will keep the original format if
+             * illegal, so change those to "" */
                 if (return_format) {
                     if (strEQ(PL_langinfo_buf, format)) {
                         *PL_langinfo_buf = '\0';
@@ -2916,8 +3006,8 @@ S_my_nl_langinfo(const int item, bool toggle)
 
     return retval;
 
-#endif
-
+#  endif
+/*--------------------------------------------------------------------------*/
 }
 
 /*
@@ -2944,43 +3034,62 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
      * error handling.
      *
      * Besides some asserts, data structure initialization, and specific
-     * platform complications, this routine is effectively just two things.
-     *
-     *    a)    setlocale(LC_ALL, "");
+     * platform complications, this routine is effectively represented by this
+     * pseudo-code:
      *
-     * which sets LC_ALL to the values in the current environment.
+     *      setlocale(LC_ALL, "");                                            x
+     *      foreach (subcategory) {                                           x
+     *          curlocales[f(subcategory)] = setlocale(subcategory, NULL);    x
+     *      }                                                                 x
+     *      if (platform_so_requires) {
+     *          foreach (subcategory) {
+     *            PL_curlocales[f(subcategory)] = curlocales[f(subcategory)]
+     *          }
+     *      }
+     *      foreach (subcategory) {
+     *          if (needs_special_handling[f(subcategory)] &this_subcat_handler
+     *      }
      *
-     * And for each individual category 'foo' whose value we care about:
+     * This sets all the categories to the values in the current environment,
+     * saves them temporarily in curlocales[] until they can be handled and/or
+     * on some platforms saved in a per-thread array PL_curlocales[].
      *
-     *    b)    save_foo = setlocale(LC_foo, NULL); handle_foo(save_foo);
+     * f(foo) is a mapping from the opaque system category numbers to small
+     * non-negative integers used most everywhere in this file as indices into
+     * arrays (such as curlocales[]) so the program doesn't have to otherwise
+     * deal with the opaqueness.
      *
-     * (We don't tend to care about categories like LC_PAPER, for example.)
+     * If the platform doesn't have LC_ALL, the lines marked 'x' above are
+     * effectively replaced by:
+     *      foreach (subcategory) {                                           y
+     *          curlocales[f(subcategory)] = setlocale(subcategory, "");      y
+     *      }                                                                 y
      *
-     * But there are complications.  On systems without LC_ALL, it emulates
-     * step a) by looping through all the categories, and doing
+     * The only differences being the lack of an LC_ALL call, and using ""
+     * instead of NULL in the setlocale calls.
      *
-     *    setlocale(LC_foo, "");
+     * But there are, of course, complications.
      *
-     * on each.
+     * it has to deal with if this is an embedded perl, whose locale doesn't
+     * come from the environment, but has been set up by the caller.  This is
+     * pretty simply handled: the "" in the setlocale calls is not a string
+     * constant, but a variable which is set to NULL in the embedded case.
      *
-     * And it has to deal with if this is an embedded perl, whose locale
-     * doesn't come from the environment, but has been set up by the caller.
-     * This is pretty simply handled: the "" in the setlocale calls is not a
-     * string constant, but a variable which is set to NULL in the embedded
-     * case.
+     * But the major complication is handling failure and doing fallback.  All
+     * the code marked 'x' or 'y' above is actually enclosed in an outer loop,
+     * using the array trial_locales[].  On entry, trial_locales[] is
+     * initialized to just one entry, containing the NULL or "" locale argument
+     * shown above.  If, as is almost always the case, everything works, it
+     * exits after just the one iteration, going on to the next step.
      *
-     * But the major complication is handling failure and doing fallback.
-     * There is an array, trial_locales, the elements of which are looped over
-     * until the locale is successfully set.  The array is initialized with
-     * just one element, for
-     *      setlocale(LC_ALL, $NULL_or_empty)
-     * If that works, as it almost always does, there's no more elements and
-     * the loop iterates just the once.  Otherwise elements are added for each
-     * of the environment variables that POSIX dictates should control the
-     * program, in priority order, with a final one being "C".  The loop is
-     * repeated until the first one succeeds.  If all fail, we limp along with
-     * whatever state we got to.  If there is no LC_ALL, an inner loop is run
-     * through all categories (making things look complex).
+     * But if there is a failure, the code tries its best to honor the
+     * environment as much as possible.  It self-modifies trial_locales[] to
+     * have more elements, one for each of the POSIX-specified settings from
+     * the environment, such as LANG, ending in the ultimate fallback, the C
+     * locale.  Thus if there is something bogus with a higher priority
+     * environment variable, it will try with the next highest, until something
+     * works.  If everything fails, it limps along with whatever state it got
+     * to.
      *
      * A further complication is that Windows has an additional fallback, the
      * user-default ANSI code page obtained from the operating system.  This is
@@ -3168,15 +3277,15 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
 
     /* Initialize the per-thread mbrFOO() state variables.  See POSIX.xs for
      * why these particular incantations are used. */
-#ifdef HAS_MBRLEN
+#  ifdef HAS_MBRLEN
     memzero(&PL_mbrlen_ps, sizeof(PL_mbrlen_ps));
-#endif
-#ifdef HAS_MBRTOWC
+#  endif
+#  ifdef HAS_MBRTOWC
     memzero(&PL_mbrtowc_ps, sizeof(PL_mbrtowc_ps));
-#endif
-#ifdef HAS_WCTOMBR
+#  endif
+#  ifdef HAS_WCTOMBR
     wcrtomb(NULL, L'\0', &PL_wcrtomb_ps);
-#endif
+#  endif
 
     /* Initialize the cache of the program's UTF-8ness for the always known
      * locales C and POSIX */
@@ -3205,13 +3314,11 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
                            "%s:%d: created C object %p\n",
                            __FILE__, __LINE__, PL_C_locale_obj));
 #  endif
-
 #  ifdef USE_LOCALE_NUMERIC
 
     PL_numeric_radix_sv = newSVpvs(".");
 
 #  endif
-
 #  if defined(USE_POSIX_2008_LOCALE) && ! defined(HAS_QUERYLOCALE)
 
     /* Initialize our records.  If we have POSIX 2008, we have LC_ALL */
@@ -3598,15 +3705,18 @@ Perl__mem_collxfrm(pTHX_ const char *input_string,
                          bool utf8      /* Is the input in UTF-8? */
                    )
 {
-
-    /* _mem_collxfrm() is a bit like strxfrm() but with two important
-     * differences. First, it handles embedded NULs. Second, it allocates a bit
-     * more memory than needed for the transformed data itself.  The real
-     * transformed data begins at offset COLLXFRM_HDR_LEN.  *xlen is set to
-     * the length of that, and doesn't include the collation index size.
+    /* _mem_collxfrm() is like strxfrm() but with two important differences.
+     * First, it handles embedded NULs. Second, it allocates a bit more memory
+     * than needed for the transformed data itself.  The real transformed data
+     * begins at offset COLLXFRM_HDR_LEN.  *xlen is set to the length of that,
+     * and doesn't include the collation index size.
+     *
+     * It is the caller's responsibility to eventually free the memory returned
+     * by this function.
+     *
      * Please see sv_collxfrm() to see how this is used. */
 
-#define COLLXFRM_HDR_LEN    sizeof(PL_collation_ix)
+#  define COLLXFRM_HDR_LEN    sizeof(PL_collation_ix)
 
     char * s = (char *) input_string;
     STRLEN s_strlen = strlen(input_string);
@@ -4075,7 +4185,7 @@ Perl__mem_collxfrm(pTHX_ const char *input_string,
                       _byte_dump_string((U8 *) xbuf + COLLXFRM_HDR_LEN,
                             *xlen, 1))));
 
-    /* Free up unneeded space; retain ehough for trailing NUL */
+    /* Free up unneeded space; retain enough for trailing NUL */
     Renew(xbuf, COLLXFRM_HDR_LEN + *xlen + 1, char);
 
     if (s != input_string) {
@@ -4126,6 +4236,7 @@ S_print_collxfrm_input_and_return(pTHX_
 
 #  endif    /* DEBUGGING */
 #endif /* USE_LOCALE_COLLATE */
+
 #ifdef USE_LOCALE
 #  ifdef DEBUGGING
 
@@ -4167,7 +4278,9 @@ S_print_bytes_for_locale(pTHX_
 #  endif   /* #ifdef DEBUGGING */
 
 STATIC const char *
-S_switch_category_locale_to_template(pTHX_ const int switch_category, const int template_category, const char * template_locale)
+S_switch_category_locale_to_template(pTHX_ const int switch_category,
+                                     const int template_category,
+                                     const char * template_locale)
 {
     /* Changes the locale for LC_'switch_category" to that of
      * LC_'template_category', if they aren't already the same.  If not NULL,
@@ -4228,7 +4341,8 @@ S_switch_category_locale_to_template(pTHX_ const int switch_category, const int
 }
 
 STATIC void
-S_restore_switched_locale(pTHX_ const int category, const char * const original_locale)
+S_restore_switched_locale(pTHX_ const int category,
+                                const char * const original_locale)
 {
     /* Restores the locale for LC_'category' to 'original_locale' (which is a
      * copy that will be freed by this function), or do nothing if the latter
@@ -4249,7 +4363,7 @@ S_restore_switched_locale(pTHX_ const int category, const char * const original_
 }
 
 /* is_cur_LC_category_utf8 uses a small char buffer to avoid malloc/free */
-#define CUR_LC_BUFFER_SIZE  64
+#  define CUR_LC_BUFFER_SIZE  64
 
 bool
 Perl__is_cur_LC_category_utf8(pTHX_ int category)
@@ -4518,7 +4632,10 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
 
             if (is_utf8_invariant_string_loc(currency_string, 0, &first_variant))
             {
-                DEBUG_L(PerlIO_printf(Perl_debug_log, "Couldn't get currency symbol for %s, or contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
+                DEBUG_L(PerlIO_printf(Perl_debug_log,
+                        "Couldn't get currency symbol for %s, or contains"
+                        " only ASCII; can't use for determining if UTF-8"
+                        " locale\n", save_input_locale));
                 only_ascii = TRUE;
             }
             else {
@@ -4532,7 +4649,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
                 /* It isn't a UTF-8 locale if the symbol is not legal UTF-8;
                  * otherwise assume the locale is UTF-8 if and only if the symbol
                  * is non-ascii UTF-8. */
-                DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\t?Currency symbol for %s is UTF-8=%d\n",
+                DEBUG_Lv(PerlIO_printf(Perl_debug_log,
+                                      "\t?Currency symbol for %s is UTF-8=%d\n",
                                         save_input_locale, is_utf8));
                 goto finish_and_return;
             }
@@ -4556,10 +4674,10 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
             char * formatted_time;
 
             /* Here the current LC_TIME is set to the locale of the category
-             * whose information is desired.  Look at all the days of the week and
-             * month names, and the timezone and am/pm indicator for UTF-8 variant
-             * characters.  The first such a one found will tell us if the locale
-             * is UTF-8 or not */
+             * whose information is desired.  Look at all the days of the week
+             * and month names, and the timezone and am/pm indicator for UTF-8
+             * variant characters.  The first such a one found will tell us if
+             * the locale is UTF-8 or not */
 
             for (i = 0; i < 7 + 12; i++) {  /* 7 days; 12 months */
                 formatted_time = my_strftime("%A %B %Z %p",
@@ -4568,10 +4686,10 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
                     || is_utf8_invariant_string((U8 *) formatted_time, 0))
                 {
 
-                    /* Here, we didn't find a non-ASCII.  Try the next time through
-                     * with the complemented dst and am/pm, and try with the next
-                     * weekday.  After we have gotten all weekdays, try the next
-                     * month */
+                    /* Here, we didn't find a non-ASCII.  Try the next time
+                     * through with the complemented dst and am/pm, and try
+                     * with the next weekday.  After we have gotten all
+                     * weekdays, try the next month */
                     is_dst = ! is_dst;
                     hour = (hour + 12) % 24;
                     dom++;
@@ -4586,7 +4704,8 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
                  * locale if we changed it */
                 restore_switched_locale(LC_TIME, original_time_locale);
 
-                DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\t?time-related strings for %s are UTF-8=%d\n",
+                DEBUG_Lv(PerlIO_printf(Perl_debug_log,
+                            "\t?time-related strings for %s are UTF-8=%d\n",
                                     save_input_locale,
                                     is_utf8_string((U8 *) formatted_time, 0)));
                 is_utf8 = is_utf8_string((U8 *) formatted_time, 0);
@@ -4597,24 +4716,28 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
              * ASCII.  Go on to the next test.  If we changed it, restore LC_TIME
              * to its original locale */
             restore_switched_locale(LC_TIME, original_time_locale);
-            DEBUG_Lv(PerlIO_printf(Perl_debug_log, "All time-related words for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
+            DEBUG_Lv(PerlIO_printf(Perl_debug_log,
+                     "All time-related words for %s contain only ASCII;"
+                     " can't use for determining if UTF-8 locale\n",
+                     save_input_locale));
         }
 
 #    endif
 
 #    if 0 && defined(USE_LOCALE_MESSAGES) && defined(HAS_SYS_ERRLIST)
 
-    /* This code is ifdefd out because it was found to not be necessary in testing
-     * on our dromedary test machine, which has over 700 locales.  There, this
-     * added no value to looking at the currency symbol and the time strings.  I
-     * left it in so as to avoid rewriting it if real-world experience indicates
-     * that dromedary is an outlier.  Essentially, instead of returning abpve if we
-     * haven't found illegal utf8, we continue on and examine all the strerror()
-     * messages on the platform for utf8ness.  If all are ASCII, we still don't
-     * know the answer; but otherwise we have a pretty good indication of the
-     * utf8ness.  The reason this doesn't help much is that the messages may not
-     * have been translated into the locale.  The currency symbol and time strings
-     * are much more likely to have been translated.  */
+    /* This code is ifdefd out because it was found to not be necessary in
+     * testing on our dromedary test machine, which has over 700 locales.
+     * There, this added no value to looking at the currency symbol and the
+     * time strings.  I left it in so as to avoid rewriting it if real-world
+     * experience indicates that dromedary is an outlier.  Essentially, instead
+     * of returning abpve if we haven't found illegal utf8, we continue on and
+     * examine all the strerror() messages on the platform for utf8ness.  If
+     * all are ASCII, we still don't know the answer; but otherwise we have a
+     * pretty good indication of the utf8ness.  The reason this doesn't help
+     * much is that the messages may not have been translated into the locale.
+     * The currency symbol and time strings are much more likely to have been
+     * translated.  */
         {
             int e;
             bool non_ascii = FALSE;
@@ -4648,15 +4771,20 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
 
             if (non_ascii) {
 
-                /* Any non-UTF-8 message means not a UTF-8 locale; if all are valid,
-                 * any non-ascii means it is one; otherwise we assume it isn't */
-                DEBUG_Lv(PerlIO_printf(Perl_debug_log, "\t?error messages for %s are UTF-8=%d\n",
+                /* Any non-UTF-8 message means not a UTF-8 locale; if all are
+                 * valid, any non-ascii means it is one; otherwise we assume it
+                 * isn't */
+                DEBUG_Lv(PerlIO_printf(Perl_debug_log,
+                                    "\t?error messages for %s are UTF-8=%d\n",
                                     save_input_locale,
                                     is_utf8));
                 goto finish_and_return;
             }
 
-            DEBUG_L(PerlIO_printf(Perl_debug_log, "All error messages for %s contain only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
+            DEBUG_L(PerlIO_printf(Perl_debug_log,
+                    "All error messages for %s contain only ASCII;"
+                    " can't use for determining if UTF-8 locale\n",
+                    save_input_locale));
         }
 
 #    endif
@@ -4914,8 +5042,7 @@ Perl_my_strerror(pTHX_ const int errnum)
         Safefree(save_locale);
     }
 
-#  elif   defined(USE_POSIX_2008_LOCALE)                      \
-     &&   defined(HAS_STRERROR_L)
+#  elif defined(USE_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L)
 
     /* This function is also trivial if we don't have to worry about thread
      * safety and have strerror_l(), as it handles the switch of locales so we
@@ -5301,8 +5428,6 @@ Perl_thread_locale_init()
 
     dTHX_DEBUGGING;
 
-    /* C starts the new thread in the global C locale.  If we are thread-safe,
-     * we want to not be in the global locale */
 
      DEBUG_L(PerlIO_printf(Perl_debug_log,
             "%s:%d: new thread, initial locale is %s; calling setlocale\n",
@@ -5310,10 +5435,12 @@ Perl_thread_locale_init()
 
 #  ifdef WIN32
 
+    /* On Windows, make sure new thread has per-thread locales enabled */
     _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
 
 #  else
 
+    /* This thread starts off in the C locale */
     Perl_setlocale(LC_ALL, "C");
 
 #  endif
author	Karl Williamson <khw@cpan.org>	2021-03-17 11:13:56 -0600
committer	Karl Williamson <khw@cpan.org>	2022-06-19 13:29:35 -0600
commit	fe1c14942b81da8e421933b78e87d61c3cfbaafb (patch)
tree	044fa23f55d96cd6f84c5afb6c8d0882510c00ba
parent	5bdad801a7181c1f85b383d092faea9838f0cf64 (diff)
download	perl-fe1c14942b81da8e421933b78e87d61c3cfbaafb.tar.gz