summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc2
-rw-r--r--ext/I18N-Langinfo/Langinfo.xs64
-rw-r--r--locale.c123
-rw-r--r--pod/perldelta.pod7
-rw-r--r--proto.h4
5 files changed, 83 insertions, 117 deletions
diff --git a/embed.fnc b/embed.fnc
index c8658c4182..c453c2f49d 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1646,8 +1646,10 @@ ATdo |const char*|Perl_setlocale|const int category|NULLOK const char* locale
Ado |HV * |Perl_localeconv
#if defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H)
ATdo |const char*|Perl_langinfo|const nl_item item
+ATdo |const char*|Perl_langinfo8|const nl_item item|NULLOK utf8ness_t * utf8ness
#else
ATdo |const char*|Perl_langinfo|const int item
+ATdo |const char*|Perl_langinfo8|const int item|NULLOK utf8ness_t * utf8ness
#endif
pEX |int |mbtowc_|NULLOK const wchar_t * pwc|NULLOK const char * s|const Size_t len
CpO |int |init_i18nl10n |int printwarn
diff --git a/ext/I18N-Langinfo/Langinfo.xs b/ext/I18N-Langinfo/Langinfo.xs
index 904b424b19..3b05e9f791 100644
--- a/ext/I18N-Langinfo/Langinfo.xs
+++ b/ext/I18N-Langinfo/Langinfo.xs
@@ -25,8 +25,8 @@ SV*
langinfo(code)
int code
PREINIT:
- const char * value;
- STRLEN len;
+ const char * value;
+ utf8ness_t is_utf8;
PROTOTYPE: _
CODE:
#ifdef HAS_NL_LANGINFO
@@ -36,64 +36,8 @@ langinfo(code)
} else
#endif
{
- value = Perl_langinfo(code);
- len = strlen(value);
- RETVAL = newSVpvn(Perl_langinfo(code), len);
-
- /* Now see if the UTF-8 flag should be turned on */
-#ifdef USE_LOCALE_CTYPE /* No utf8 strings if not using LC_CTYPE */
-
- /* If 'value' is ASCII or not legal UTF-8, the flag doesn't get
- * turned on, so skip the followin code */
- if (is_utf8_non_invariant_string((U8 *) value, len)) {
- int category;
-
- /* Check if the locale is a UTF-8 one. The returns from
- * Perl_langinfo() are in different locale categories, so check the
- * category corresponding to this item */
- switch (code) {
-
- /* This should always return ASCII, so we could instead
- * legitimately panic here, but soldier on */
- case CODESET:
- category = LC_CTYPE;
- break;
-
- case RADIXCHAR:
- case THOUSEP:
-# ifdef USE_LOCALE_NUMERIC
- category = LC_NUMERIC;
-# else
- /* Not ideal, but the best we can do on such a platform */
- category = LC_CTYPE;
-# endif
- break;
-
- case CRNCYSTR:
-# ifdef USE_LOCALE_MONETARY
- category = LC_MONETARY;
-# else
- category = LC_CTYPE;
-# endif
- break;
-
- default:
-# ifdef USE_LOCALE_TIME
- category = LC_TIME;
-# else
- category = LC_CTYPE;
-# endif
- break;
- }
-
- /* Here the return is legal UTF-8. Turn on that flag if the
- * locale is UTF-8. (Otherwise, could just be a coincidence.)
- * */
- if (_is_cur_LC_category_utf8(category)) {
- SvUTF8_on(RETVAL);
- }
- }
-#endif /* USE_LOCALE_CTYPE */
+ value = Perl_langinfo8(code, &is_utf8);
+ RETVAL = newSVpvn_utf8(value, strlen(value), is_utf8 == UTF8NESS_YES);
}
OUTPUT:
diff --git a/locale.c b/locale.c
index 8b83a9de0d..51d66c53c1 100644
--- a/locale.c
+++ b/locale.c
@@ -3498,32 +3498,43 @@ S_get_nl_item_from_localeconv(pTHX_ const struct lconv *lcbuf,
#endif /* Has some form of localeconv() and paying attn to a category it
traffics in */
+#ifndef HAS_SOME_LANGINFO
+
+typedef int nl_item; /* Substitute 'int' for emulated nl_langinfo() */
+
+#endif
+
/*
-=for apidoc Perl_langinfo
+=for apidoc Perl_langinfo
+=for apidoc_item Perl_langinfo8
+
+C<Perl_langinfo> is an (almost) drop-in replacement for the system
+C<L<nl_langinfo(3)>>, taking the same C<item> parameter values, and returning
+the same information. But it is more thread-safe than regular
+C<nl_langinfo()>, and hides the quirks of Perl's locale handling from your
+code, and can be used on systems that lack a native C<nl_langinfo>.
-This is an (almost) drop-in replacement for the system C<L<nl_langinfo(3)>>,
-taking the same C<item> parameter values, and returning the same information.
-But it is more thread-safe than regular C<nl_langinfo()>, and hides the quirks
-of Perl's locale handling from your code, and can be used on systems that lack
-a native C<nl_langinfo>.
+However, you should instead use the improved version of this:
+L</Perl_langinfo8>, which behaves identically except for an additional
+parameter, a pointer to a variable declared as L</C<utf8ness_t>>, into which it
+returns to you how you should treat the returned string with regards to it
+being encoded in UTF-8 or not.
-Expanding on these:
+Concerning the differences between these and plain C<nl_langinfo()>:
=over
-=item *
+=item a.
-The reason it isn't quite a drop-in replacement is actually an advantage. The
-only difference is that it returns S<C<const char *>>, whereas plain
-C<nl_langinfo()> returns S<C<char *>>, but you are (only by documentation)
-forbidden to write into the buffer. By declaring this C<const>, the compiler
-enforces this restriction, so if it is violated, you know at compilation time,
-rather than getting segfaults at runtime.
+C<Perl_langinfo8> has an extra parameter, described above. Besides this, the
+other reasons they aren't quite a drop-in replacement is actually an advantage.
+The C<const>ness of the return allows the compiler to catch attempts to write
+into the returned buffer, which is illegal and could cause run-time crashes.
-=item *
+=item b.
-It delivers the correct results for the C<RADIXCHAR> and C<THOUSEP> items,
+They deliver the correct results for the C<RADIXCHAR> and C<THOUSEP> items,
without you having to write extra code. The reason for the extra code would be
because these are from the C<LC_NUMERIC> locale category, which is normally
kept set by Perl so that the radix is a dot, and the separator is the empty
@@ -3535,50 +3546,50 @@ the other advantages of C<Perl_langinfo()>; not keeping C<LC_NUMERIC> in the C
(or equivalent) locale would break a lot of CPAN, which is expecting the radix
(decimal point) character to be a dot.)
-=item *
+=item c.
-The system function it replaces can have its static return buffer trashed,
+The system function they replace can have its static return buffer trashed,
not only by a subsequent call to that function, but by a C<freelocale>,
-C<setlocale>, or other locale change. The returned buffer of this function is
-not changed until the next call to it, so the buffer is never in a trashed
-state.
+C<setlocale>, or other locale change. The returned buffer of these functions
+is not changed until the next call to one or the other, so the buffer is never
+in a trashed state.
-=item *
+=item d.
-Its return buffer is per-thread, so it also is never overwritten by a call to
-this function from another thread; unlike the function it replaces.
+The return buffer is per-thread, so it also is never overwritten by a call to
+these functions from another thread; unlike the function it replaces.
-=item *
+=item e.
-But most importantly, it works on systems that don't have C<nl_langinfo>, such
-as Windows, hence makes your code more portable. Of the fifty-some possible
+But most importantly, they work on systems that don't have C<nl_langinfo>, such
+as Windows, hence making your code more portable. Of the fifty-some possible
items specified by the POSIX 2008 standard,
L<http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/langinfo.h.html>,
only one is completely unimplemented, though on non-Windows platforms, another
-significant one is also not implemented). It uses various techniques to
+significant one is not fully implemented). They use various techniques to
recover the other items, including calling C<L<localeconv(3)>>, and
C<L<strftime(3)>>, both of which are specified in C89, so should be always be
available. Later C<strftime()> versions have additional capabilities; C<""> is
-returned for those not available on your system.
+returned for any item not available on your system.
-It is important to note that when called with an item that is recovered by
+It is important to note that, when called with an item that is recovered by
using C<localeconv>, the buffer from any previous explicit call to
-C<localeconv> will be overwritten. This means you must save that buffer's
-contents if you need to access them after a call to this function. (But note
-that you might not want to be using C<localeconv()> directly anyway, because of
-issues like the ones listed in the second item of this list (above) for
-C<RADIXCHAR> and C<THOUSEP>. You can use the methods given in L<perlcall> to
-call L<POSIX/localeconv> and avoid all the issues, but then you have a hash to
-unpack).
+C<L<localeconv(3)>> will be overwritten. But you shouldn't be using
+C<localeconv> anyway because it is is very much not thread-safe, and suffers
+from the same problems outlined in item 'b.' above for the fields it returns that
+are controlled by the LC_NUMERIC locale category. Instead, avoid all of those
+problems by calling L</Perl_localeconv>, which is thread-safe; or by using the
+methods given in L<perlcall> to call
+L<C<POSIX::localeconv()>|POSIX/localeconv>, which is also thread-safe.
+
+=back
The details for those items which may deviate from what this emulation returns
and what a native C<nl_langinfo()> would return are specified in
L<I18N::Langinfo>.
-=back
-
-When using C<Perl_langinfo> on systems that don't have a native
-C<nl_langinfo()>, you must
+When using C<Perl_langinfo8> (or plain C<Perl_langinfo>) on systems that don't
+have a native C<nl_langinfo()>, you must
#include "perl_langinfo.h"
@@ -3587,30 +3598,28 @@ C<#include> with this one. (Doing it this way keeps out the symbols that plain
C<langinfo.h> would try to import into the namespace for code that doesn't need
it.)
-The original impetus for C<Perl_langinfo()> was so that code that needs to
-find out the current currency symbol, floating point radix character, or digit
-grouping separator can use, on all systems, the simpler and more
-thread-friendly C<nl_langinfo> API instead of C<L<localeconv(3)>> which is a
-pain to make thread-friendly. For other fields returned by C<localeconv>, it
-is better to use the methods given in L<perlcall> to call
-L<C<POSIX::localeconv()>|POSIX/localeconv>, which is thread-friendly.
-
=cut
*/
-#ifndef HAS_SOME_LANGINFO
-
-typedef int nl_item; /* Substitute 'int' for emulated nl_langinfo() */
-
-#endif
-
const char *
Perl_langinfo(const nl_item item)
{
+ return Perl_langinfo8(item, NULL);
+}
+
+const char *
+Perl_langinfo8(const nl_item item, utf8ness_t * utf8ness)
+{
dTHX;
unsigned cat_index;
+ PERL_ARGS_ASSERT_PERL_LANGINFO8;
+
+ if (utf8ness) { /* Assume for now */
+ *utf8ness = UTF8NESS_IMMATERIAL;
+ }
+
/* Find the locale category that controls the input 'item'. If we are not
* paying attention to that category, instead return a default value. Also
* return the default value if there is no way for us to figure out the
@@ -3774,7 +3783,7 @@ Perl_langinfo(const nl_item item)
/* Use either the underlying numeric, or the other underlying categories */
if (cat_index == LC_NUMERIC_INDEX_) {
return my_langinfo_c(item, LC_NUMERIC, PL_numeric_name,
- &PL_langinfo_buf, &PL_langinfo_bufsize, NULL);
+ &PL_langinfo_buf, &PL_langinfo_bufsize, utf8ness);
}
else
@@ -3782,7 +3791,7 @@ Perl_langinfo(const nl_item item)
{
return my_langinfo_i(item, cat_index, querylocale_i(cat_index),
- &PL_langinfo_buf, &PL_langinfo_bufsize, NULL);
+ &PL_langinfo_buf, &PL_langinfo_bufsize, utf8ness);
}
#endif
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 82db679dd9..89af9bb80d 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -359,6 +359,13 @@ same as plain L<perlapi/C<my_strftime>>, but with an extra parameter
that allows the caller to simply and reliably know if the returned
string is UTF-8.
+=item *
+
+A new API function, L<perlapi/C<Perl_langinfo8>> is added. This is the
+same as plain L<perlapi/C<Perl_langinfo>>, but with an extra parameter
+that allows the caller to simply and reliably know if the returned
+string is UTF-8.
+
=back
=head1 Selected Bug Fixes
diff --git a/proto.h b/proto.h
index 19993c53fe..4ba7334770 100644
--- a/proto.h
+++ b/proto.h
@@ -4681,6 +4681,8 @@ STATIC int S_sv_2iuv_non_preserve(pTHX_ SV *const sv);
#if !(defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H))
PERL_CALLCONV const char* Perl_langinfo(const int item);
#define PERL_ARGS_ASSERT_PERL_LANGINFO
+PERL_CALLCONV const char* Perl_langinfo8(const int item, utf8ness_t * utf8ness);
+#define PERL_ARGS_ASSERT_PERL_LANGINFO8
#endif
#if !(defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L))
# if defined(PERL_IN_LOCALE_C)
@@ -5222,6 +5224,8 @@ PERL_CALLCONV I32 Perl_do_shmio(pTHX_ I32 optype, SV** mark, SV** sp)
#if defined(HAS_NL_LANGINFO) && defined(PERL_LANGINFO_H)
PERL_CALLCONV const char* Perl_langinfo(const nl_item item);
#define PERL_ARGS_ASSERT_PERL_LANGINFO
+PERL_CALLCONV const char* Perl_langinfo8(const nl_item item, utf8ness_t * utf8ness);
+#define PERL_ARGS_ASSERT_PERL_LANGINFO8
#endif
#if defined(HAS_NL_LANGINFO) || defined(HAS_NL_LANGINFO_L)
# if defined(PERL_IN_LOCALE_C)