summaryrefslogtreecommitdiff
path: root/locale.c
diff options
context:
space:
mode:
Diffstat (limited to 'locale.c')
-rw-r--r--locale.c884
1 files changed, 539 insertions, 345 deletions
diff --git a/locale.c b/locale.c
index b3b4d74b6a..b9d2b7edb4 100644
--- a/locale.c
+++ b/locale.c
@@ -3193,8 +3193,7 @@ HV *
Perl_localeconv(pTHX)
{
-#if ! defined(HAS_LOCALECONV) \
- || (! defined(USE_LOCALE_MONETARY) && ! defined(USE_LOCALE_NUMERIC))
+#if ! defined(HAS_LOCALECONV)
return newHV();
@@ -3206,141 +3205,453 @@ Perl_localeconv(pTHX)
}
-#if defined(HAS_LOCALECONV) \
- && (defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC))
+#if defined(HAS_LOCALECONV)
HV *
S_my_localeconv(pTHX_ const int item)
{
- HV * retval;
- locale_utf8ness_t numeric_locale_is_utf8 = LOCALE_UTF8NESS_UNKNOWN;
- locale_utf8ness_t monetary_locale_is_utf8 = LOCALE_UTF8NESS_UNKNOWN;
- HV * (*copy_localeconv)(pTHX_ const struct lconv *,
- const int,
- const locale_utf8ness_t,
- const locale_utf8ness_t);
-
- /* A thread-safe locale_conv(). The locking mechanisms vary greatly
- * depending on platform capabilities. They all share this common set up
- * code for the function, and then conditional compilations choose one of
- * several terminations.
+ PERL_ARGS_ASSERT_MY_LOCALECONV;
+
+ /* This returns a mortalized hash containing all or one of the elements
+ * returned by localeconv(). It is used by Perl_localeconv() and
+ * POSIX::localeconv() and is thread-safe.
*
* There are two use cases:
- * 1) Called from POSIX::locale_conv(). This returns lconv() copied to
- * a hash, based on the current underlying locale.
+ * 1) Called from POSIX::locale_conv(). This returns the lconv structure
+ * copied to a hash, based on the current underlying locales for
+ * LC_NUMERIC and LC_MONETARY. An input item==0 signifies this case, or
+ * on many platforms it is the only use case compiled.
* 2) Certain items that nl_langinfo() provides are also derivable from
* the return of localeconv(). Windows notably doesn't have
* nl_langinfo(), so on that, and actually any platform lacking it,
- * my_localeconv() is used to emulate it for those particular items.
- * The code to do this is compiled only on such platforms. Rather than
- * going to the expense of creating a full hash when only one item is
- * needed, just the desired item is returned, in an SV cast to an HV.
+ * my_localeconv() is used also to emulate it for those particular
+ * items. The code to do this is compiled only on such platforms.
+ * Rather than going to the expense of creating a full hash when only
+ * one item is needed, the returned hash has just the desired item in
+ * it.
+ *
+ * To access all the localeconv() struct lconv fields, there is a data
+ * structure that contains every commonly documented field in it. (Maybe
+ * some minority platforms have extra fields. Those could be added here
+ * without harm; they would just be ignored on platforms lacking them.)
*
- * There is a helper function to accomplish each of the two tasks. The
- * function pointer just below is set to the appropriate one, and is called
- * from each of the various implementations, in the middle of whatever
- * necessary locking/locale swapping have been done. */
+ * Our structure is compiled to make looping through the fields easier by
+ * pointing each name to its value's offset within lconv, e.g.,
+ { "thousands_sep", STRUCT_OFFSET(struct lconv, thousands_sep) }
+ */
+# define LCONV_ENTRY(name) \
+ {STRINGIFY(name), STRUCT_OFFSET(struct lconv, name)}
+
+ /* These synonyms are just for clarity, and to make it easier in case
+ * something needs to change in the future */
+# define LCONV_NUMERIC_ENTRY(name) LCONV_ENTRY(name)
+# define LCONV_MONETARY_ENTRY(name) LCONV_ENTRY(name)
+
+ /* There are just a few fields for NUMERIC strings */
+ const lconv_offset_t lconv_numeric_strings[] = {
+# ifndef NO_LOCALECONV_GROUPING
+ LCONV_NUMERIC_ENTRY(grouping),
+# endif
+ LCONV_NUMERIC_ENTRY(thousands_sep),
+ LCONV_NUMERIC_ENTRY(decimal_point),
+ {NULL, 0}
+ };
-# ifdef HAS_SOME_LANGINFO
+ /* When used to implement nl_langinfo(), we save time by only populating
+ * the hash with the field(s) needed. Thus we would need a data structure
+ * of just:
+ * LCONV_NUMERIC_ENTRY(decimal_point),
+ * {NULL, 0}
+ *
+ * By placing the decimal_point field last in the full structure, we can
+ * use just the tail for this bit of it, saving space. This macro yields
+ * the address of the sub structure. */
+# define DECIMAL_POINT_ADDRESS \
+ &lconv_numeric_strings[(C_ARRAY_LENGTH(lconv_numeric_strings) - 2)]
+
+ /* And the MONETARY string fields */
+ const lconv_offset_t lconv_monetary_strings[] = {
+ LCONV_MONETARY_ENTRY(int_curr_symbol),
+ LCONV_MONETARY_ENTRY(mon_decimal_point),
+# ifndef NO_LOCALECONV_MON_THOUSANDS_SEP
+ LCONV_MONETARY_ENTRY(mon_thousands_sep),
+# endif
+# ifndef NO_LOCALECONV_MON_GROUPING
+ LCONV_MONETARY_ENTRY(mon_grouping),
+# endif
+ LCONV_MONETARY_ENTRY(positive_sign),
+ LCONV_MONETARY_ENTRY(negative_sign),
+ LCONV_MONETARY_ENTRY(currency_symbol),
+ {NULL, 0}
+ };
- PERL_UNUSED_ARG(item);
+ /* Like above, this field being last can be used as a sub structure */
+# define CURRENCY_SYMBOL_ADDRESS \
+ &lconv_monetary_strings[(C_ARRAY_LENGTH(lconv_monetary_strings) - 2)]
- const bool is_localeconv_call = true;
+ /* Finally there are integer fields, all are for monetary purposes */
+ const lconv_offset_t lconv_integers[] = {
+ LCONV_ENTRY(int_frac_digits),
+ LCONV_ENTRY(frac_digits),
+ LCONV_ENTRY(p_sep_by_space),
+ LCONV_ENTRY(n_cs_precedes),
+ LCONV_ENTRY(n_sep_by_space),
+ LCONV_ENTRY(p_sign_posn),
+ LCONV_ENTRY(n_sign_posn),
+# ifdef HAS_LC_MONETARY_2008
+ LCONV_ENTRY(int_p_cs_precedes),
+ LCONV_ENTRY(int_p_sep_by_space),
+ LCONV_ENTRY(int_n_cs_precedes),
+ LCONV_ENTRY(int_n_sep_by_space),
+ LCONV_ENTRY(int_p_sign_posn),
+ LCONV_ENTRY(int_n_sign_posn),
+# endif
+ LCONV_ENTRY(p_cs_precedes),
+ {NULL, 0}
+ };
-# else
+ /* Like above, this field being last can be used as a sub structure */
+# define P_CS_PRECEDES_ADDRESS \
+ &lconv_integers[(C_ARRAY_LENGTH(lconv_integers) - 2)]
- /* Note we use this sentinel; this works because this only gets compiled
- * when our perl_langinfo.h is used, and that uses negative numbers for all
- * the items */
- const bool is_localeconv_call = (item == 0);
+ /* If we aren't paying attention to a given category, use LC_CTYPE instead;
+ * If not paying attention to that either, the code below should end up not
+ * using this. Make sure that things blow up if that avoidance gets lost,
+ * by setting the category to -1 */
+ unsigned int numeric_index;
+ unsigned int monetary_index;
+# ifdef USE_LOCALE_NUMERIC
+ numeric_index = LC_NUMERIC_INDEX_;
+# elif defined(USE_LOCALE_CTYPE)
+ numeric_index = LC_CTYPE_INDEX_;
+# else
+ numeric_index = (unsigned) -1;
# endif
+# ifdef USE_LOCALE_MONETARY
+ monetary_index = LC_MONETARY_INDEX_;
+# elif defined(USE_LOCALE_CTYPE)
+ monetary_index = LC_CTYPE_INDEX_;
+# else
+ monetary_index = (unsigned) -1;
+# endif
+
+ /* Some platforms, for correct non-mojibake results, require LC_CTYPE's
+ * locale to match LC_NUMERIC's for the numeric fields, and LC_MONETARY's
+ * for the monetary ones. What happens if LC_NUMERIC and LC_MONETARY
+ * aren't compatible? Wrong results. To avoid that, we call localeconv()
+ * twice, once for each locale, setting LC_CTYPE to match the category.
+ * But if the locales of both categories are the same, there is no need for
+ * a second call. Assume this is the case unless overridden below */
+ bool requires_2nd_localeconv = false;
+
+ /* The actual hash populating is done by S_populate_hash_from_localeconv().
+ * It gets passed an array of length two containing the data structure it
+ * is supposed to use to get the key names to fill the hash with. One
+ * element is alwasy for the NUMERIC strings (or NULL if none to use), and
+ * the other element similarly for the MONETARY ones. */
+# define NUMERIC_STRING_OFFSET 0
+# define MONETARY_STRING_OFFSET 1
+ const lconv_offset_t * strings[2] = { NULL, NULL };
+
+ /* This is a mask, with one bit to tell S_populate_hash_from_localeconv to
+ * populate the NUMERIC items; another bit for the MONETARY ones. This way
+ * it can choose which (or both) to populate from */
+ U32 index_bits = 0;
+
+ /* This converts from a locale index to its bit position in the above mask.
+ * */
+# define INDEX_TO_BIT(i) (1 << (i))
+
+ /* The two categories can have disparate locales. Initialize them to C and
+ * override later whichever one(s) we pay attention to */
+ const char * numeric_locale = "C";
+ const char * monetary_locale = "C";
+
+ /* This will be either 'numeric_locale' or 'monetary_locale' depending on
+ * what we are working on at the moment */
+ const char * locale;
- if (is_localeconv_call) {
- copy_localeconv = S_populate_localeconv;
+ /* The LC_MONETARY category also has some integer-valued fields, whose
+ * information is kept in a separate list */
+ const lconv_offset_t * integers;
+
+# ifdef HAS_SOME_LANGINFO
+
+ /* If the only use-case for this is the full localeconv(), the 'item'
+ * parameter is ignored. */
+ PERL_UNUSED_ARG(item);
+
+# else
+
+ /* This only gets compiled for the use-case of using localeconv() to
+ * emulate an nl_langinfo() missing from the platform.
+ *
+ * We need this substructure to only return this field for the THOUSEP
+ * item. The other items also need substructures, but they were handled
+ * above by placing the substructure's item at the end of the full one, so
+ * the data structure could do double duty. However, both this and
+ * RADIXCHAR would need to be in the final position of the same full
+ * structure; an impossibility. So make this into a separate structure */
+ const lconv_offset_t thousands_sep_string[] = {
+ LCONV_NUMERIC_ENTRY(thousands_sep),
+ {NULL, 0}
+ };
+
+ /* End of all the initialization of datastructures. Now for actual code.
+ *
+ * Without nl_langinfo(), the call to my_localeconv() could be for just one
+ * of the following 3 items to emulate nl_langinfo(). This is compiled
+ * only when using perl_langinfo.h, which we control, and it has been
+ * constructed so that no item is numbered 0.
+ *
+ * For each, setup the appropriate parameters for the call below to
+ * S_populate_hash_from_localeconv() */
+ if (item != 0) switch (item) {
+ default:
+ locale_panic_(Perl_form(aTHX_
+ "Unexpected item passed to my_localeconv: %d", item));
+ break;
# ifdef USE_LOCALE_NUMERIC
- /* Get the UTF8ness of the locales now to avoid repeating this for each
- * string returned by localeconv() */
- numeric_locale_is_utf8 = (is_locale_utf8(PL_numeric_name))
- ? LOCALE_IS_UTF8
- : LOCALE_NOT_UTF8;
+ case RADIXCHAR:
+ locale = numeric_locale = PL_numeric_name;
+ index_bits = INDEX_TO_BIT(LC_NUMERIC_INDEX_);
+ strings[NUMERIC_STRING_OFFSET] = DECIMAL_POINT_ADDRESS;
+ integers = NULL;
+ break;
+
+ case THOUSEP:
+ index_bits = INDEX_TO_BIT(LC_NUMERIC_INDEX_);
+ locale = numeric_locale = PL_numeric_name;
+ strings[NUMERIC_STRING_OFFSET] = thousands_sep_string;
+ integers = NULL;
+ break;
# endif
# ifdef USE_LOCALE_MONETARY
- monetary_locale_is_utf8 = (is_locale_utf8(querylocale_c(LC_MONETARY)))
- ? LOCALE_IS_UTF8
- : LOCALE_NOT_UTF8;
+ case CRNCYSTR:
+ index_bits = INDEX_TO_BIT(LC_MONETARY_INDEX_);
+ locale = monetary_locale = querylocale_i(LC_MONETARY_INDEX_);
+
+ /* This item needs the values for both the currency symbol, and another
+ * one used to construct the nl_langino()-compatible return */
+ strings[MONETARY_STRING_OFFSET] = CURRENCY_SYMBOL_ADDRESS;
+ integers = P_CS_PRECEDES_ADDRESS;
+ break;
+
+# endif
+
+ } /* End of switch() */
+
+ else /* End of for just one item to emulate nl_langinfo() */
# endif
- }
+ { /* Here, the call is for all of localeconv(). It has a bunch of
+ * items. As in the individual item case, set up the parameters for
+ * S_populate_hash_from_localeconv(); */
-# ifndef HAS_SOME_LANGINFO
+# ifdef USE_LOCALE_NUMERIC
+ numeric_locale = PL_numeric_name;
+# elif defined(USE_LOCALE_CTYPE)
+ numeric_locale = querylocale_i(numeric_index);
+# endif
+# if defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_CTYPE)
+ monetary_locale = querylocale_i(monetary_index);
+# endif
- else {
- copy_localeconv = S_get_nl_item_from_localeconv;
- }
+ /* The first call to S_populate_hash_from_localeconv() will be for the
+ * MONETARY values */
+ index_bits = INDEX_TO_BIT(monetary_index);
+ locale = monetary_locale;
+
+ /* And if the locales for the two categories are the same, we can also
+ * do the NUMERIC values in the same call */
+ if (strEQ(numeric_locale, monetary_locale)) {
+ index_bits |= INDEX_TO_BIT(numeric_index);
+ }
+ else {
+ requires_2nd_localeconv = true;
+ }
+
+ /* We always pass both sets of strings. 'index_bits' tells
+ * S_populate_hash_from_localeconv which to actually look at */
+ strings[NUMERIC_STRING_OFFSET] = lconv_numeric_strings;
+ strings[MONETARY_STRING_OFFSET] = lconv_monetary_strings;
+
+ /* And pass the integer values to populate; again 'index_bits' will
+ * say to use them or not */
+ integers = lconv_integers;
+
+ } /* End of call is for localeconv() */
+
+ /* The code above has determined the parameters to
+ S_populate_hash_from_localeconv() for both cases of an individual item
+ and for the entire structure. Below is code common to both */
+
+ HV * hv = newHV(); /* The returned hash, initially empty */
+ sv_2mortal((SV*)hv);
+
+ /* Call localeconv() and copy its results into the hash. All the
+ * parameters have been initialized above */
+ populate_hash_from_localeconv(hv,
+ locale,
+ index_bits,
+ strings,
+ integers
+ );
+
+ /* The above call may have done all the hash fields, but not always, as
+ * already explained. If we need a second call it is always for the
+ * NUMERIC fields */
+ if (requires_2nd_localeconv) {
+ populate_hash_from_localeconv(hv,
+ numeric_locale,
+ INDEX_TO_BIT(numeric_index),
+ strings,
+ NULL /* There are No NUMERIC integer
+ fields */
+ );
+ }
+
+ /* Here, the hash has been completely populated.
+ *
+ * Now go through all the string items and see if they should be marked as
+ * UTF-8 or not. This would have been more convenient and faster to do
+ * while populating the hash in the first place, but that operation has to
+ * be done within a critical section, keeping other threads from executing,
+ * so only the minimal amount of work necessary is done at that time.
+ *
+ * XXX On unthreaded perls, and on platforms where localeconv (or
+ * localeconv_l if present) this code could be #ifdef'd out, and the
+ * UTF8ness determined at hash population time, at an extra maintenance
+ * cost which khw doesn't think is worth it
+ */
+ for (unsigned int i = 0; i < 2; i++) { /* Try both types of strings */
+ if (! strings[i]) { /* Skip if no strings of this type */
+ continue;
+ }
+
+ locale = (i == NUMERIC_STRING_OFFSET)
+ ? numeric_locale
+ : monetary_locale;
+
+ locale_utf8ness_t locale_is_utf8 = LOCALE_UTF8NESS_UNKNOWN;
+
+# ifdef HAS_RELIABLE_UTF8NESS_DETERMINATION
+
+ /* It saves time in the loop below to have predetermined the UTF8ness
+ * of the locale. But only do so if the platform reliably has this
+ * information; otherwise to do it, this could recurse indefinitely.
+ *
+ * When we don't do it here, it will be done on a per-element basis in
+ * the loop. The per-element check is intelligent enough to not
+ * recurse */
+
+ locale_is_utf8 = (is_locale_utf8(locale))
+ ? LOCALE_IS_UTF8
+ : LOCALE_NOT_UTF8;
+
+ if (locale_is_utf8 == LOCALE_NOT_UTF8) {
+ continue; /* No string can be UTF-8 if the locale isn't */
+ }
# endif
- PERL_ARGS_ASSERT_MY_LOCALECONV;
-/*--------------------------------------------------------------------------*/
-/* Here, we are done with the common beginning of all the implementations of
- * my_localeconv(). Below are the two terminations of the function (except
- * the closing '}'. They are separated out because the preprocessor directives
- * were making the simple logic hard to follow. Each implementation ends with
- * the same few lines. khw decided to keep those separate because he thought
- * it was clearer to the reader.
- *--------------------------------------------------------------------------*/
-# if ! defined(TS_W32_BROKEN_LOCALECONV) /* Regular lconv() */
-
- /* There are so many locks because localeconv() deals with two
- * categories, and returns in a single global static buffer. Some
- * locks might be no-ops on this platform, but not others. We need to
- * lock if any one isn't a no-op. */
+ /* Examine each string */
+ while (1) {
+ const char * name = strings[i]->name;
-# ifdef USE_LOCALE_NUMERIC
+ if (! name) { /* Reached the end */
+ break;
+ }
- LC_NUMERIC_LOCK(0);
- const char * orig_switched_locale = NULL;
+ /* 'value' will contain the string that may need to be marked as
+ * UTF-8 */
+ SV ** value = hv_fetch(hv, name, strlen(name), true);
+ if (! value) {
+ continue;
+ }
- /* When called internally, are already switched into the proper numeric
- * locale; otherwise must toggle to it */
- if (is_localeconv_call) {
- orig_switched_locale = toggle_locale_c(LC_NUMERIC, PL_numeric_name);
- }
+ /* Determine if the string should be marked as UTF-8. */
+ if (UTF8NESS_YES == (get_locale_string_utf8ness_i(SvPVX(*value),
+ locale_is_utf8,
+ NULL, 0)))
+ {
+ SvUTF8_on(*value);
+ }
-# endif
+ strings[i]++; /* Iterate */
+ }
+ } /* End of fixing up UTF8ness */
- gwLOCALE_LOCK;
- retval = copy_localeconv(aTHX_ localeconv(),
- item,
- numeric_locale_is_utf8,
- monetary_locale_is_utf8);
- gwLOCALE_UNLOCK;
+ return hv;
+}
-# ifdef USE_LOCALE_NUMERIC
+STATIC void
+S_populate_hash_from_localeconv(pTHX_ HV * hv,
+
+ /* Switch to this locale to run
+ * localeconv() from */
+ const char * locale,
+
+ /* bit mask of which categories to
+ * populate */
+ const U32 which_mask,
+
+ /* strings[0] points the the numeric
+ * string fields; [1] to the monetary */
+ const lconv_offset_t * strings[2],
+
+ /* And to the monetary integer fields */
+ const lconv_offset_t * integers)
+{
+ PERL_ARGS_ASSERT_POPULATE_HASH_FROM_LOCALECONV;
+ PERL_UNUSED_ARG(which_mask); /* Some configurations don't use this;
+ complicated to figure out which */
+
+ /* Run localeconv() and copy some or all of its results to the input 'hv'
+ * hash. Most localeconv() implementations return the values in a global
+ * static buffer, so the operation must be performed in a critical section,
+ * ending only after the copy is completed. There are so many locks
+ * because localeconv() deals with two categories, and returns in a single
+ * global static buffer. Some locks might be no-ops on this platform, but
+ * not others. We need to lock if any one isn't a no-op. */
+
+# ifdef USE_LOCALE_CTYPE
+
+ /* Some platforms require LC_CTYPE to be congruent with the category we are
+ * looking for */
+ const char * orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale);
+
+# endif
+# ifdef USE_LOCALE_NUMERIC
- if (orig_switched_locale) {
- restore_toggled_locale_c(LC_NUMERIC, orig_switched_locale);
+ /* We need to toggle to the underlying NUMERIC locale if we are getting
+ * NUMERIC strings */
+ const char * orig_NUMERIC_locale = NULL;
+ if (which_mask & INDEX_TO_BIT(LC_NUMERIC_INDEX_)) {
+ LC_NUMERIC_LOCK(0);
+ orig_NUMERIC_locale = toggle_locale_i(LC_NUMERIC_INDEX_, locale);
}
- LC_NUMERIC_UNLOCK;
# endif
- return retval;
+ /* Finally ready to do the actual localeconv(). Lock to prevent other
+ * accesses until we have made a copy of its returned static buffer */
+ gwLOCALE_LOCK;
-/*--------------------------------------------------------------------------*/
-# else /* defined(TS_W32_BROKEN_LOCALECONV) */
+# ifdef TS_W32_BROKEN_LOCALECONV
- /* Last is a workaround for the broken localeconv() on Windows with
- * thread-safe locales prior to VS 15. It looks at the global locale
- * instead of the thread one. As a work-around, we toggle to the global
- * locale; populate the return; then toggle back. We have to use LC_ALL
- * instead of the individual categories because of another bug in Windows.
+ /* This is a workaround for another bug in Windows. localeconv() was
+ * broken with thread-safe locales prior to VS 15. It looks at the global
+ * locale instead of the thread one. As a work-around, we toggle to the
+ * global locale; populate the return; then toggle back. We have to use
+ * LC_ALL instead of the individual categories because of yet another bug
+ * in Windows. And this all has to be done in a critical section.
*
* This introduces a potential race with any other thread that has also
* converted to use the global locale, and doesn't protect its locale calls
@@ -3349,25 +3660,11 @@ S_my_localeconv(pTHX_ const int item)
* if the code is ported from working on another platform where there might
* be some reason to do this. But this is typically due to some
* alien-to-Perl library that thinks it owns locale setting. Such a
- * library usn't likely to exist on Windows, so such an application is
+ * library isn't likely to exist on Windows, so such an application is
* unlikely to be run on Windows
*/
bool restore_per_thread = FALSE;
-# ifdef USE_LOCALE_NUMERIC
-
- const char * orig_switched_locale = NULL;
-
- LC_NUMERIC_LOCK(0);
-
- /* When called internally, are already switched into the proper numeric
- * locale; otherwise must toggle to it */
- if (is_localeconv_call) {
- orig_switched_locale = toggle_locale_c(LC_NUMERIC, PL_numeric_name);
- }
-
-# endif
-
/* Save the per-thread locale state */
const char * save_thread = querylocale_c(LC_ALL);
@@ -3383,270 +3680,113 @@ S_my_localeconv(pTHX_ const int item)
const char * save_global = querylocale_c(LC_ALL);
void_setlocale_c(LC_ALL, save_thread);
- /* Safely stash the desired data */
- gwLOCALE_LOCK;
- retval = copy_localeconv(aTHX_ localeconv(),
- item,
- numeric_locale_is_utf8,
- monetary_locale_is_utf8);
- gwLOCALE_UNLOCK;
+# endif /* TS_W32_BROKEN_LOCALECONV */
- /* Restore the global locale's prior state */
- void_setlocale_c(LC_ALL, save_global);
+ /* Finally, do the actual localeconv */
+ const char *lcbuf_as_string = (const char *) localeconv();
- /* And back to per-thread locales */
- if (restore_per_thread) {
- _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
- }
-
- /* Restore the per-thread locale state */
- void_setlocale_c(LC_ALL, save_thread);
-
-# ifdef USE_LOCALE_NUMERIC
-
- if (orig_switched_locale) {
- restore_toggled_locale_c(LC_NUMERIC, orig_switched_locale);
- }
- LC_NUMERIC_UNLOCK;
-
-# endif
-
- return retval;
-
-# endif
-/*--------------------------------------------------------------------------*/
-}
+ /* Fill in the string fields of the HV* */
+ for (unsigned int i = 0; i < 2; i++) {
-STATIC HV *
-S_populate_localeconv(pTHX_ const struct lconv *lcbuf,
- const int unused,
- const locale_utf8ness_t numeric_locale_is_utf8,
- const locale_utf8ness_t monetary_locale_is_utf8)
-{
- /* This returns a mortalized hash containing all the elements returned by
- * localeconv(). It is used by Perl_localeconv() and POSIX::localeconv()
- */
- PERL_UNUSED_ARG(unused);
-
- struct lconv_offset {
- const char *name;
- size_t offset;
- };
-
- /* Create e.g.,
- {"thousands_sep", STRUCT_OFFSET(struct lconv, thousands_sep)},
- */
-# define LCONV_ENTRY(name) \
- {STRINGIFY(name), STRUCT_OFFSET(struct lconv, name)}
-
- /* Set up structures containing the documented fields. One structure for
- * LC_NUMERIC-controlled strings; one for LC_MONETARY ones, and a final one
- * of just numerics. */
# ifdef USE_LOCALE_NUMERIC
- static const struct lconv_offset lconv_numeric_strings[] = {
- LCONV_ENTRY(decimal_point),
- LCONV_ENTRY(thousands_sep),
-# ifndef NO_LOCALECONV_GROUPING
- LCONV_ENTRY(grouping),
-# endif
- {NULL, 0}
- };
+ /* One iteration is only for the numeric string fields */
+ if ( i == NUMERIC_STRING_OFFSET
+ && (which_mask & INDEX_TO_BIT(LC_NUMERIC_INDEX_)) == 0)
+ {
+ continue;
+ }
# endif
# ifdef USE_LOCALE_MONETARY
- static const struct lconv_offset lconv_monetary_strings[] = {
- LCONV_ENTRY(int_curr_symbol),
- LCONV_ENTRY(currency_symbol),
- LCONV_ENTRY(mon_decimal_point),
-# ifndef NO_LOCALECONV_MON_THOUSANDS_SEP
- LCONV_ENTRY(mon_thousands_sep),
-# endif
-# ifndef NO_LOCALECONV_MON_GROUPING
- LCONV_ENTRY(mon_grouping),
-# endif
- LCONV_ENTRY(positive_sign),
- LCONV_ENTRY(negative_sign),
- {NULL, 0}
- };
-
-# endif
-
- static const struct lconv_offset lconv_integers[] = {
-# ifdef USE_LOCALE_MONETARY
- LCONV_ENTRY(int_frac_digits),
- LCONV_ENTRY(frac_digits),
- LCONV_ENTRY(p_cs_precedes),
- LCONV_ENTRY(p_sep_by_space),
- LCONV_ENTRY(n_cs_precedes),
- LCONV_ENTRY(n_sep_by_space),
- LCONV_ENTRY(p_sign_posn),
- LCONV_ENTRY(n_sign_posn),
-# ifdef HAS_LC_MONETARY_2008
- LCONV_ENTRY(int_p_cs_precedes),
- LCONV_ENTRY(int_p_sep_by_space),
- LCONV_ENTRY(int_n_cs_precedes),
- LCONV_ENTRY(int_n_sep_by_space),
- LCONV_ENTRY(int_p_sign_posn),
- LCONV_ENTRY(int_n_sign_posn),
-# endif
-# endif
- {NULL, 0}
- };
+ /* The other iteration is only for the monetary string fields */
+ if ( i == MONETARY_STRING_OFFSET
+ && (which_mask & INDEX_TO_BIT(LC_MONETARY_INDEX_)) == 0)
+ {
+ continue;
+ }
- static const unsigned category_indices[] = {
-# ifdef USE_LOCALE_NUMERIC
- LC_NUMERIC_INDEX_,
-# endif
-# ifdef USE_LOCALE_MONETARY
- LC_MONETARY_INDEX_,
# endif
- (unsigned) -1 /* Just so the previous element can always end with a
- comma => subtract 1 below for the max loop index */
- };
- const char *ptr = (const char *) lcbuf;
- const struct lconv_offset *integers = lconv_integers;
-
- HV * retval = newHV();
- sv_2mortal((SV*)retval);
+ /* For each field for the given category ... */
+ const lconv_offset_t * category_strings = strings[i];
+ while (1) {
+ const char * name = category_strings->name;
+ if (! name) { /* Quit at the end */
+ break;
+ }
- PERL_ARGS_ASSERT_POPULATE_LOCALECONV;
+ /* we have set things up so that we know where in the returned
+ * structure, when viewed as a string, the corresponding value is.
+ * */
+ const char *value = *((const char **)( lcbuf_as_string
+ + category_strings->offset));
- /* For each enabled category ... */
- for (PERL_UINT_FAST8_T i = 0; i < C_ARRAY_LENGTH(category_indices) - 1; i++) {
- const unsigned cat_index = category_indices[i];
- locale_utf8ness_t locale_is_utf8 = LOCALE_UTF8NESS_UNKNOWN;
- const char *locale;
+ /* Set to get next string on next iteration */
+ category_strings++;
- /* ( = NULL silences a compiler warning; would segfault if it could
- * actually happen.) */
- const struct lconv_offset *strings = NULL;
+ /* Skip if this platform doesn't have this field. */
+ if (! value) {
+ continue;
+ }
-# ifdef USE_LOCALE_NUMERIC
- if (cat_index == LC_NUMERIC_INDEX_) {
- locale_is_utf8 = numeric_locale_is_utf8;
- strings = lconv_numeric_strings;
+ /* Copy to the hash */
+ (void) hv_store(hv,
+ name, strlen(name),
+ newSVpv(value, strlen(value)),
+ 0);
}
-# else
- PERL_UNUSED_ARG(numeric_locale_is_utf8);
-# endif
-# ifdef USE_LOCALE_MONETARY
- if (cat_index == LC_MONETARY_INDEX_) {
- locale_is_utf8 = monetary_locale_is_utf8;
- strings = lconv_monetary_strings;
- }
-# else
- PERL_UNUSED_ARG(monetary_locale_is_utf8);
-# endif
- assert(locale_is_utf8 != LOCALE_UTF8NESS_UNKNOWN);
-
- /* Iterate over the strings structure for this category */
- locale = querylocale_i(cat_index);
-
- while (strings->name) {
- const char *value = *((const char **)(ptr + strings->offset));
- if (value) {
- bool is_utf8 = /* Only make UTF-8 if required to */
- (UTF8NESS_YES == (get_locale_string_utf8ness_i(value,
- locale_is_utf8,
- locale,
- cat_index)));
- (void) hv_store(retval,
- strings->name,
- strlen(strings->name),
- newSVpvn_utf8(value, strlen(value), is_utf8),
- 0);
+ /* Add any int fields to the HV* */
+ if (i == MONETARY_STRING_OFFSET && integers) {
+ while (integers->name) {
+ const char value = *((const char *)( lcbuf_as_string
+ + integers->offset));
+ (void) hv_store(hv, integers->name,
+ strlen(integers->name), newSViv(value), 0);
+ integers++;
}
-
- strings++;
}
- }
-
- while (integers->name) {
- const char value = *((const char *)(ptr + integers->offset));
-
- (void) hv_store(retval, integers->name,
- strlen(integers->name), newSViv(value), 0);
- integers++;
- }
+ } /* End of loop through the fields */
- return retval;
-}
+ /* Done with copying to the hash. Can unwind the critical section locks */
-# ifndef HAS_SOME_LANGINFO
+# ifdef TS_W32_BROKEN_LOCALECONV
-STATIC HV *
-S_get_nl_item_from_localeconv(pTHX_ const struct lconv *lcbuf,
- const int item,
- const locale_utf8ness_t unused1,
- const locale_utf8ness_t unused2)
-{
- /* This is a helper function for my_localeconv(), which is called from
- * my_langinfo() to emulate the libc nl_langinfo() function on platforms
- * that don't have it available.
- *
- * This function acts as an extension to my_langinfo(), the intermediate
- * my_localeconv() call is to set up the locks and switch into the proper
- * locale. That logic exists for other reasons, and by doing it this way,
- * it doesn't have to be duplicated.
- *
- * This function extracts the current value of 'item' in the current locale
- * using the localconv() result also passed in, via 'lcbuf'. The other
- * parameter is unused, a placeholder so the signature of this function
- * matches another that does need it, and so the two functions can be
- * referred to by a single function pointer, to simplify the code below */
+ /* Restore the global locale's prior state */
+ void_setlocale_c(LC_ALL, save_global);
- const char * prefix = "";
- const char * temp = NULL;
+ /* And back to per-thread locales */
+ if (restore_per_thread) {
+ _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
+ }
- PERL_ARGS_ASSERT_GET_NL_ITEM_FROM_LOCALECONV;
- PERL_UNUSED_ARG(unused1);
- PERL_UNUSED_ARG(unused2);
+ /* Restore the per-thread locale state */
+ void_setlocale_c(LC_ALL, save_thread);
- switch (item) {
- case CRNCYSTR:
- temp = lcbuf->currency_symbol;
+# endif /* TS_W32_BROKEN_LOCALECONV */
- if (lcbuf->p_cs_precedes) {
+ gwLOCALE_UNLOCK; /* Finished with the critical section of a
+ globally-accessible buffer */
- /* khw couldn't find any documentation that CHAR_MAX is the signal,
- * but cygwin uses it thusly */
- if (lcbuf->p_cs_precedes == CHAR_MAX) {
- prefix = ".";
- }
- else {
- prefix = "-";
- }
- }
- else {
- prefix = "+";
- }
+# ifdef USE_LOCALE_NUMERIC
- break;
+ restore_toggled_locale_i(LC_NUMERIC_INDEX_, orig_NUMERIC_locale);
+ if (which_mask & INDEX_TO_BIT(LC_NUMERIC_INDEX_)) {
+ LC_NUMERIC_UNLOCK;
+ }
- case RADIXCHAR:
- temp = lcbuf->decimal_point;
- break;
+# endif
+# ifdef USE_LOCALE_CTYPE
- case THOUSEP:
- temp = lcbuf->thousands_sep;
- break;
+ restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale);
- default:
- locale_panic_(Perl_form(aTHX_
- "Unexpected item passed to populate_localeconv: %d", item));
- }
+# endif
- return (HV *) Perl_newSVpvf(aTHX_ "%s%s", prefix, temp);
}
-# endif /* ! Has some form of langinfo() */
-#endif /* Has some form of localeconv() and paying attn to a category it
- traffics in */
-
+#endif /* defined(HAS_LOCALECONV) */
#ifndef HAS_SOME_LANGINFO
typedef int nl_item; /* Substitute 'int' for emulated nl_langinfo() */
@@ -4158,24 +4298,78 @@ S_my_langinfo_i(pTHX_
* TS_W32_BROKEN_LOCALECONV, one could use GetNumberFormat and
* GetCurrencyFormat; patches welcome) */
+# define P_CS_PRECEDES "p_cs_precedes"
+# define CURRENCY_SYMBOL "currency_symbol"
+
/* case RADIXCHAR: // May drop down to here in some configurations */
- case CRNCYSTR:
case THOUSEP:
- {
- SV * string = (SV *) my_localeconv(item);
+ case CRNCYSTR:
+ {
+
+ /* The hash gets populated with just the field(s) related to 'item'. */
+ HV * result_hv = my_localeconv(item);
- retval = save_to_buffer(SvPV_nolen(string), retbufp, retbuf_sizep);
+ SV* string;
+ if (item != CRNCYSTR) {
- if (utf8ness) {
- is_utf8 = get_locale_string_utf8ness_i(retval,
- LOCALE_UTF8NESS_UNKNOWN,
- locale, cat_index);
+ /* These items have been populated with just one key => value */
+ (void) hv_iterinit(result_hv);
+ HE * entry = hv_iternext(result_hv);
+ string = hv_iterval(result_hv, entry);
+ }
+ else {
+
+ /* But CRNCYSTR localeconv() returns a slightly different value
+ * than the nl_langinfo() API calls for, so have to modify this one
+ * to conform. We need another value from localeconv() to know
+ * what to change it to. my_localeconv() has populated the hash
+ * with exactly both fields. Delete this one, leaving just the
+ * CRNCYSTR one in the hash */
+ SV* precedes = hv_delete(result_hv,
+ P_CS_PRECEDES, STRLENs(P_CS_PRECEDES),
+ 0);
+ if (! precedes) {
+ locale_panic_("my_localeconv() unexpectedly didn't return"
+ " a value for " P_CS_PRECEDES);
}
- SvREFCNT_dec_NN(string);
- break;
+ /* The modification is to prefix the localeconv() return with a
+ * single byte, calculated as follows: */
+ char prefix = (LIKELY(SvIV(precedes) != CHAR_MAX))
+ ? ((precedes != 0) ? '-' : '+')
+
+ /* khw couldn't find any documentation that
+ * CHAR_MAX is the signal, but cygwin uses it
+ * thusly, and it makes sense given that CHAR_MAX
+ * indicates the value isn't used, so it neither
+ * precedes nor succeeds */
+ : '.';
+
+ /* Now get CRNCYSTR */
+ (void) hv_iterinit(result_hv);
+ HE * entry = hv_iternext(result_hv);
+ string = hv_iterval(result_hv, entry);
+
+ /* And perform the modification */
+ Perl_sv_setpvf(aTHX_ string, "%c%s", prefix, SvPV_nolen(string));
+ }
+
+ /* Here, 'string' contains the value we want to return */
+ retval = save_to_buffer(SvPV_nolen(string), retbufp, retbuf_sizep);
+
+ if (utf8ness) {
+ is_utf8 = (SvUTF8(string))
+ ? UTF8NESS_YES
+ : (is_utf8_invariant_string( (U8 *) retval,
+ strlen(retval)))
+ ? UTF8NESS_IMMATERIAL
+ : UTF8NESS_NO;
}
+ break;
+
+ }
+
# endif /* Some form of localeconv */
# ifdef HAS_STRFTIME