summaryrefslogtreecommitdiff
path: root/locale.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2022-10-22 21:39:52 -0600
committerKarl Williamson <khw@cpan.org>2022-10-31 10:15:40 -0600
commit73d66f354cd4df0324b11e46af48f66bd5c1dd15 (patch)
treefa2287a66e486eb33fbbd0b255fa19d2efaa1caf /locale.c
parentfa88efeacd5d994ceacc035be21adaff50aaac7f (diff)
downloadperl-73d66f354cd4df0324b11e46af48f66bd5c1dd15.tar.gz
locale.c: Handle locale names with '@'
The XPG locale name standard says the codeset is the part of the name after any dot, terminated by the end of string or an '@'. Prior to this commit, the possibility of an '@' was not considered.
Diffstat (limited to 'locale.c')
-rw-r--r--locale.c21
1 files changed, 18 insertions, 3 deletions
diff --git a/locale.c b/locale.c
index 62ec596077..0cdc06d2e8 100644
--- a/locale.c
+++ b/locale.c
@@ -4442,7 +4442,11 @@ S_my_langinfo_i(pTHX_
* part of the locale name. This is very less than ideal; often there
* is no code set in the name; and at other times they even lie.
*
- * Find any dot in the locale name */
+ * But there is an XPG standard syntax, which many locales follow:
+ *
+ * language[_territory[.codeset]][@modifier]
+ *
+ * So we take the part between the dot and any '@' */
retval = (const char *) strchr(locale, '.');
if (! retval) {
retval = ""; /* Alas, no dot */
@@ -4452,6 +4456,17 @@ S_my_langinfo_i(pTHX_
/* Use everything past the dot */
retval++;
+ /* And stop before any '@' */
+ char * modifier = strchr(retval, '@');
+ if (modifier) {
+ char * code_set_name;
+ const Size_t name_len = modifier - retval;
+ Newx(code_set_name, name_len + 1, char); /* +1 for NUL */
+ my_strlcpy(code_set_name, retval, name_len + 1);
+ SAVEFREEPV(code_set_name);
+ retval = code_set_name;
+ }
+
# if defined(HAS_MBTOWC) || defined(HAS_MBRTOWC)
/* When these functions, are available, they were tried earlier and
@@ -4464,8 +4479,8 @@ S_my_langinfo_i(pTHX_
# endif
- /* Otherwise the code set name is considered to be everything past the
- * dot. */
+ /* Otherwise the code set name is considered to be everything between
+ * the dot and the '@' */
retval = save_to_buffer(retval, retbufp, retbuf_sizep);
break;