summaryrefslogtreecommitdiff
path: root/locale.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-02-04 18:58:26 -0700
committerKarl Williamson <khw@cpan.org>2019-02-05 11:44:29 -0700
commit30d8090de81085bd3dff00c83a7ab6d3ff8dfc8d (patch)
treef3512f86d360f5c2fbaf05b88627dc2ecfafc20f /locale.c
parent26be5fe6b967cd228768870a4b6138947d418d39 (diff)
downloadperl-30d8090de81085bd3dff00c83a7ab6d3ff8dfc8d.tar.gz
locale.c: Add detection of Turkic UTF-8 locales
When switching into a new locale, after it is decided this is a UTF-8 locale, the code now also checks for if the locale is a specialized Turkic one, which has a couple of slightly modified casing change rules. If so, it sets a flag indicating this. The code that has been added in previous commits in this series check if that flag is set when they are actually paying attention to the background locale, and if so behave according to Unicode Turkic rules.
Diffstat (limited to 'locale.c')
-rw-r--r--locale.c25
1 files changed, 24 insertions, 1 deletions
diff --git a/locale.c b/locale.c
index 383b2137c0..07e5525c10 100644
--- a/locale.c
+++ b/locale.c
@@ -1507,6 +1507,7 @@ S_new_ctype(pTHX_ const char *newctype)
/* Don't check for problems if we are suppressing the warnings */
bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG_L_TEST);
+ bool maybe_utf8_turkic = FALSE;
PERL_ARGS_ASSERT_NEW_CTYPE;
@@ -1523,6 +1524,14 @@ S_new_ctype(pTHX_ const char *newctype)
* handle this specially because of the three problematic code points */
if (PL_in_utf8_CTYPE_locale) {
Copy(PL_fold_latin1, PL_fold_locale, 256, U8);
+
+ /* UTF-8 locales can have special handling for 'I' and 'i' if they are
+ * Turkic. Make sure these two are the only anomalies. (We don't use
+ * towupper and towlower because they aren't in C89.) */
+ if (toupper('i') == 'i' && tolower('I') == 'I') {
+ check_for_problems = TRUE;
+ maybe_utf8_turkic = TRUE;
+ }
}
/* We don't populate the other lists if a UTF-8 locale, but do check that
@@ -1668,7 +1677,18 @@ S_new_ctype(pTHX_ const char *newctype)
}
}
+ if (bad_count == 2 && maybe_utf8_turkic) {
+ bad_count = 0;
+ *bad_chars_list = '\0';
+ PL_fold_locale['I'] = 'I';
+ PL_fold_locale['i'] = 'i';
+ PL_in_utf8_turkic_locale = TRUE;
+ DEBUG_L(PerlIO_printf(Perl_debug_log, "%s:%d: %s is turkic\n",
+ __FILE__, __LINE__, newctype));
+ }
+ else {
PL_in_utf8_turkic_locale = FALSE;
+ }
# ifdef MB_CUR_MAX
@@ -1695,7 +1715,10 @@ S_new_ctype(pTHX_ const char *newctype)
# endif
- if (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale)) {
+ /* If we found problems and we want them output, do so */
+ if ( (UNLIKELY(bad_count) || UNLIKELY(multi_byte_locale))
+ && (LIKELY(ckWARN_d(WARN_LOCALE)) || UNLIKELY(DEBUG_L_TEST)))
+ {
if (UNLIKELY(bad_count) && PL_in_utf8_CTYPE_locale) {
PL_warn_locale = Perl_newSVpvf(aTHX_
"Locale '%s' contains (at least) the following characters"