locale.c More new_ctype() refactoring

Merge commit e4bbbfe02b9e9aae521b164eba0e518ca478945f refactored this function some. Most of the commits in that series dated to before when we could assume C99. In re-reading the result, I saw some opportunities to take advantage of C99, by, for example, moving declarations closer to their use. I also hadn't previously noticed that when changing to the C locale (a frequent occurrence), various things that we being recalculated are determinable at compile time. So this commit returns early under this circumstance. And, an obsolete comment is removed
author: Karl Williamson <khw@cpan.org> 2022-09-04 14:00:41 -0600
committer: Karl Williamson <khw@cpan.org> 2022-09-10 09:29:33 -0600
commit: 360374e28fc8b4afaaf790f14eef602c9a9b192c (patch)
tree: 7fd1bfa7bc8011afd65dfb6f590e01353bd44153 /locale.c
parent: e933b48fd1674fe37ae768a4726cbd451595dc07 (diff)
download: perl-360374e28fc8b4afaaf790f14eef602c9a9b192c.tar.gz
1 files changed, 21 insertions, 28 deletions
diff --git a/locale.c b/locale.c
index 30a9651e8a..40031fa69e 100644
--- a/locale.c
+++ b/locale.c
@@ -1819,25 +1819,14 @@ Perl_set_numeric_underlying(pTHX)
 STATIC void
 S_new_ctype(pTHX_ const char *newctype)
 {
+    PERL_ARGS_ASSERT_NEW_CTYPE;
 
     /* Called after each libc setlocale() call affecting LC_CTYPE, to tell
      * core Perl this and that 'newctype' is the name of the new locale.
      *
      * This function sets up the folding arrays for all 256 bytes, assuming
      * that tofold() is tolc() since fold case is not a concept in POSIX,
-     *
-     * Any code changing the locale (outside this file) should use
-     * Perl_setlocale or POSIX::setlocale, which call this function.  Therefore
-     * this function should be called directly only from this file and from
-     * POSIX::setlocale() */
-
-    unsigned int i;
-
-    /* Don't check for problems if we are suppressing the warnings */
-    bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG_L_TEST);
-    bool maybe_utf8_turkic = FALSE;
-
-    PERL_ARGS_ASSERT_NEW_CTYPE;
+     */
 
     DEBUG_L(PerlIO_printf(Perl_debug_log, "Entering new_ctype(%s)\n", newctype));
 
@@ -1857,23 +1846,27 @@ S_new_ctype(pTHX_ const char *newctype)
     Safefree(PL_ctype_name);
     PL_ctype_name = "";
 
-    /* Guard against the is_locale_utf8() call potentially zapping newctype.
-     * This is not extra work as the cache is set to this a few lines down, and
-     * that needs to be copied anyway */
-    newctype = savepv(newctype);
+    PL_in_utf8_turkic_locale = FALSE;
+
+    /* For the C locale, just use the standard folds, and we know there are no
+     * glitches possible, so return early */
+    if (isNAME_C_OR_POSIX(newctype)) {
+        Copy(PL_fold, PL_fold_locale, 256, U8);
+        PL_ctype_name = savepv(newctype);
+        PL_in_utf8_CTYPE_locale = FALSE;
+        return;
+    }
 
-    /* With cache cleared, this will know to compute a new value */
+    /* The cache being cleared signals this to compute a new value */
     PL_in_utf8_CTYPE_locale = is_locale_utf8(newctype);
 
-    /* Cache new name */
-    PL_ctype_name = newctype;
+    PL_ctype_name = savepv(newctype);
+    bool maybe_utf8_turkic = FALSE;
 
-    PL_in_utf8_turkic_locale = FALSE;
+    /* Don't check for problems if we are suppressing the warnings */
+    bool check_for_problems = ckWARN_d(WARN_LOCALE) || UNLIKELY(DEBUG_L_TEST);
 
-    if (isNAME_C_OR_POSIX(PL_ctype_name)) {
-        Copy(PL_fold, PL_fold_locale, 256, U8);
-    }
-    else if (PL_in_utf8_CTYPE_locale) {
+    if (PL_in_utf8_CTYPE_locale) {
 
         /* A UTF-8 locale gets standard rules.  But note that code still has to
          * handle this specially because of the three problematic code points
@@ -1908,7 +1901,7 @@ S_new_ctype(pTHX_ const char *newctype)
         bool found_unexpected = FALSE;
 
         if (DEBUG_Lv_TEST) {
-            for (i = 128; i < 256; i++) {
+            for (unsigned i = 128; i < 256; i++) {
                 int j = LATIN1_TO_NATIVE(i);
                 if (toU8_LOWER_LC(j) != j || toU8_UPPER_LC(j) != j) {
                     has_non_ascii_fold = TRUE;
@@ -1919,7 +1912,7 @@ S_new_ctype(pTHX_ const char *newctype)
 
 #    endif
 
-        for (i = 0; i < 256; i++) {
+        for (unsigned i = 0; i < 256; i++) {
             if (isU8_UPPER_LC(i))
                 PL_fold_locale[i] = (U8) toU8_LOWER_LC(i);
             else if (isU8_LOWER_LC(i))
@@ -2031,7 +2024,7 @@ S_new_ctype(pTHX_ const char *newctype)
         char bad_chars_list[ (94 * 4) + (3 * 5) + 1 ] = { '\0' };
         unsigned int bad_count = 0;         /* Count of bad characters */
 
-        for (i = 0; i < 256; i++) {
+        for (unsigned i = 0; i < 256; i++) {
 
             /* If checking for locale problems, see if the native ASCII-range
              * printables plus \n and \t are in their expected categories in
author	Karl Williamson <khw@cpan.org>	2022-09-04 14:00:41 -0600
committer	Karl Williamson <khw@cpan.org>	2022-09-10 09:29:33 -0600
commit	360374e28fc8b4afaaf790f14eef602c9a9b192c (patch)
tree	7fd1bfa7bc8011afd65dfb6f590e01353bd44153 /locale.c
parent	e933b48fd1674fe37ae768a4726cbd451595dc07 (diff)
download	perl-360374e28fc8b4afaaf790f14eef602c9a9b192c.tar.gz