style: Detabify indentation of the C code maintained by the core.

This just detabifies to get rid of the mixed tab/space indentation. Applying consistent indentation and dealing with other tabs are another issue. Done with `expand -i`. * vutil.* left alone, it's part of version. * Left regen managed files alone for now.
author: Michael G. Schwern <schwern@pobox.com> 2020-12-28 18:04:52 -0800
committer: Karl Williamson <khw@cpan.org> 2021-01-17 09:18:15 -0700
commit: 1604cfb0273418ed479719f39def5ee559bffda2 (patch)
tree: 166a5ab935a029ab86cf6295d6f3cb77da22e559 /utf8.c
parent: 557ff1b2a4ecd18fe9229e7e0eb8fa123adc5670 (diff)
download: perl-1604cfb0273418ed479719f39def5ee559bffda2.tar.gz
1 files changed, 255 insertions, 255 deletions
diff --git a/utf8.c b/utf8.c
index add8c093aa..72d3ac2b7a 100644
--- a/utf8.c
+++ b/utf8.c
@@ -99,7 +99,7 @@ Perl__force_out_malformed_utf8_message(pTHX_
     LEAVE;
 
     if (! errors) {
-	Perl_croak(aTHX_ "panic: _force_out_malformed_utf8_message should"
+        Perl_croak(aTHX_ "panic: _force_out_malformed_utf8_message should"
                          " be called only when there are errors found");
     }
 
@@ -264,8 +264,8 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
     }
 
     if (OFFUNI_IS_INVARIANT(uv)) {
-	*d++ = LATIN1_TO_NATIVE(uv);
-	return d;
+        *d++ = LATIN1_TO_NATIVE(uv);
+        return d;
     }
 
     if (uv <= MAX_UTF8_TWO_BYTE) {
@@ -281,9 +281,9 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
      * on ASCII; so 3 bytes covers the range 0x400-0x3FFF on EBCDIC;
      * 0x800-0xFFFF on ASCII */
     if (uv < (16 * (1U << (2 * SHIFT)))) {
-	*d++ = I8_TO_NATIVE_UTF8(( uv >> ((3 - 1) * SHIFT)) | UTF_START_MARK(3));
-	*d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) |   MARK);
-	*d++ = I8_TO_NATIVE_UTF8(( uv  /* (1 - 1) */        & MASK) |   MARK);
+        *d++ = I8_TO_NATIVE_UTF8(( uv >> ((3 - 1) * SHIFT)) | UTF_START_MARK(3));
+        *d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) |   MARK);
+        *d++ = I8_TO_NATIVE_UTF8(( uv  /* (1 - 1) */        & MASK) |   MARK);
 
 #ifndef EBCDIC  /* These problematic code points are 4 bytes on EBCDIC, so
                    aren't tested here */
@@ -300,7 +300,7 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
             }
         }
 #endif
-	return d;
+        return d;
     }
 
     /* Not 3-byte; that means the code point is at least 0x1_0000 on ASCII
@@ -364,10 +364,10 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
      * ASCII, so 4 bytes covers the range 0x4000-0x3_FFFF on EBCDIC;
      * 0x1_0000-0x1F_FFFF on ASCII */
     if (uv < (8 * (1U << (3 * SHIFT)))) {
-	*d++ = I8_TO_NATIVE_UTF8(( uv >> ((4 - 1) * SHIFT)) | UTF_START_MARK(4));
-	*d++ = I8_TO_NATIVE_UTF8(((uv >> ((3 - 1) * SHIFT)) & MASK) |   MARK);
-	*d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) |   MARK);
-	*d++ = I8_TO_NATIVE_UTF8(( uv  /* (1 - 1) */        & MASK) |   MARK);
+        *d++ = I8_TO_NATIVE_UTF8(( uv >> ((4 - 1) * SHIFT)) | UTF_START_MARK(4));
+        *d++ = I8_TO_NATIVE_UTF8(((uv >> ((3 - 1) * SHIFT)) & MASK) |   MARK);
+        *d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) |   MARK);
+        *d++ = I8_TO_NATIVE_UTF8(( uv  /* (1 - 1) */        & MASK) |   MARK);
 
 #ifdef EBCDIC   /* These were handled on ASCII platforms in the code for 3-byte
                    characters.  The end-plane non-characters for EBCDIC were
@@ -380,7 +380,7 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
         }
 #endif
 
-	return d;
+        return d;
     }
 
     /* Not 4-byte; that means the code point is at least 0x20_0000 on ASCII
@@ -391,14 +391,14 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
      * khw believes that less code outweighs slight performance gains. */
 
     {
-	STRLEN len  = OFFUNISKIP(uv);
-	U8 *p = d+len-1;
-	while (p > d) {
-	    *p-- = I8_TO_NATIVE_UTF8((uv & MASK) | MARK);
-	    uv >>= SHIFT;
-	}
-	*p = I8_TO_NATIVE_UTF8((uv & UTF_START_MASK(len)) | UTF_START_MARK(len));
-	return d+len;
+        STRLEN len  = OFFUNISKIP(uv);
+        U8 *p = d+len-1;
+        while (p > d) {
+            *p-- = I8_TO_NATIVE_UTF8((uv & MASK) | MARK);
+            uv >>= SHIFT;
+        }
+        *p = I8_TO_NATIVE_UTF8((uv & UTF_START_MASK(len)) | UTF_START_MARK(len));
+        return d+len;
     }
 }
 
@@ -1659,7 +1659,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
         possible_problems |= UTF8_GOT_EMPTY;
         curlen = 0;
         uv = UNICODE_REPLACEMENT;
-	goto ready_to_handle_errors;
+        goto ready_to_handle_errors;
     }
 
     expectlen = UTF8SKIP(s);
@@ -1669,15 +1669,15 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
      * things up here to return it.  It will be overriden only in those rare
      * cases where a malformation is found */
     if (retlen) {
-	*retlen = expectlen;
+        *retlen = expectlen;
     }
 
     /* A continuation character can't start a valid sequence */
     if (UNLIKELY(UTF8_IS_CONTINUATION(uv))) {
-	possible_problems |= UTF8_GOT_CONTINUATION;
+        possible_problems |= UTF8_GOT_CONTINUATION;
         curlen = 1;
         uv = UNICODE_REPLACEMENT;
-	goto ready_to_handle_errors;
+        goto ready_to_handle_errors;
     }
 
     /* Here is not a continuation byte, nor an invariant.  The only thing left
@@ -1703,8 +1703,8 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
     /* Now, loop through the remaining bytes in the character's sequence,
      * accumulating each into the working value as we go. */
     for (s = s0 + 1; s < send; s++) {
-	if (LIKELY(UTF8_IS_CONTINUATION(*s))) {
-	    uv = UTF8_ACCUMULATE(uv, *s);
+        if (LIKELY(UTF8_IS_CONTINUATION(*s))) {
+            uv = UTF8_ACCUMULATE(uv, *s);
             continue;
         }
 
@@ -1808,11 +1808,11 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
                 && LIKELY(! (possible_problems & UTF8_GOT_OVERFLOW))
                 && (   isUTF8_POSSIBLY_PROBLEMATIC(*adjusted_s0)
                     || UNLIKELY(isUTF8_PERL_EXTENDED(s0)))))
-	&& ((flags & ( UTF8_DISALLOW_NONCHAR
+        && ((flags & ( UTF8_DISALLOW_NONCHAR
                       |UTF8_DISALLOW_SURROGATE
                       |UTF8_DISALLOW_SUPER
                       |UTF8_DISALLOW_PERL_EXTENDED
-	              |UTF8_WARN_NONCHAR
+                      |UTF8_WARN_NONCHAR
                       |UTF8_WARN_SURROGATE
                       |UTF8_WARN_SUPER
                       |UTF8_WARN_PERL_EXTENDED))))
@@ -2373,20 +2373,20 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e)
      * In other words: in Perl UTF-8 is not just for Unicode. */
 
     if (UNLIKELY(e < s))
-	goto warn_and_return;
+        goto warn_and_return;
     while (s < e) {
         s += UTF8SKIP(s);
-	len++;
+        len++;
     }
 
     if (UNLIKELY(e != s)) {
-	len--;
+        len--;
         warn_and_return:
-	if (PL_op)
-	    Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
-			     "%s in %s", unees, OP_DESC(PL_op));
-	else
-	    Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
+        if (PL_op)
+            Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+                             "%s in %s", unees, OP_DESC(PL_op));
+        else
+            Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
     }
 
     return len;
@@ -2419,41 +2419,41 @@ Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen)
 
     while (b < bend && u < uend) {
         U8 c = *u++;
-	if (!UTF8_IS_INVARIANT(c)) {
-	    if (UTF8_IS_DOWNGRADEABLE_START(c)) {
-		if (u < uend) {
-		    U8 c1 = *u++;
-		    if (UTF8_IS_CONTINUATION(c1)) {
-			c = EIGHT_BIT_UTF8_TO_NATIVE(c, c1);
-		    } else {
+        if (!UTF8_IS_INVARIANT(c)) {
+            if (UTF8_IS_DOWNGRADEABLE_START(c)) {
+                if (u < uend) {
+                    U8 c1 = *u++;
+                    if (UTF8_IS_CONTINUATION(c1)) {
+                        c = EIGHT_BIT_UTF8_TO_NATIVE(c, c1);
+                    } else {
                         /* diag_listed_as: Malformed UTF-8 character%s */
-			Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+                        Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
                               "%s %s%s",
                               unexpected_non_continuation_text(u - 2, 2, 1, 2),
                               PL_op ? " in " : "",
                               PL_op ? OP_DESC(PL_op) : "");
-			return -2;
-		    }
-		} else {
-		    if (PL_op)
-			Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
-					 "%s in %s", unees, OP_DESC(PL_op));
-		    else
-			Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
-		    return -2; /* Really want to return undef :-)  */
-		}
-	    } else {
-		return -2;
-	    }
-	}
-	if (*b != c) {
-	    return *b < c ? -2 : +2;
-	}
-	++b;
+                        return -2;
+                    }
+                } else {
+                    if (PL_op)
+                        Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+                                         "%s in %s", unees, OP_DESC(PL_op));
+                    else
+                        Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
+                    return -2; /* Really want to return undef :-)  */
+                }
+            } else {
+                return -2;
+            }
+        }
+        if (*b != c) {
+            return *b < c ? -2 : +2;
+        }
+        ++b;
     }
 
     if (b == bend && u == uend)
-	return 0;
+        return 0;
 
     return b < bend ? +1 : -1;
 }
@@ -2737,23 +2737,23 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, Size_t bytelen, Size_t *newlen)
     PERL_ARGS_ASSERT_UTF16_TO_UTF8;
 
     if (bytelen & 1)
-	Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %" UVuf,
+        Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %" UVuf,
                                                                (UV)bytelen);
 
     pend = p + bytelen;
 
     while (p < pend) {
-	UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
-	p += 2;
-	if (OFFUNI_IS_INVARIANT(uv)) {
-	    *d++ = LATIN1_TO_NATIVE((U8) uv);
-	    continue;
-	}
-	if (uv <= MAX_UTF8_TWO_BYTE) {
-	    *d++ = UTF8_TWO_BYTE_HI(UNI_TO_NATIVE(uv));
-	    *d++ = UTF8_TWO_BYTE_LO(UNI_TO_NATIVE(uv));
-	    continue;
-	}
+        UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
+        p += 2;
+        if (OFFUNI_IS_INVARIANT(uv)) {
+            *d++ = LATIN1_TO_NATIVE((U8) uv);
+            continue;
+        }
+        if (uv <= MAX_UTF8_TWO_BYTE) {
+            *d++ = UTF8_TWO_BYTE_HI(UNI_TO_NATIVE(uv));
+            *d++ = UTF8_TWO_BYTE_LO(UNI_TO_NATIVE(uv));
+            continue;
+        }
 
 #define FIRST_HIGH_SURROGATE UNICODE_SURROGATE_FIRST
 #define LAST_HIGH_SURROGATE  0xDBFF
@@ -2763,40 +2763,40 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, Size_t bytelen, Size_t *newlen)
 
         /* This assumes that most uses will be in the first Unicode plane, not
          * needing surrogates */
-	if (UNLIKELY(inRANGE(uv, UNICODE_SURROGATE_FIRST,
+        if (UNLIKELY(inRANGE(uv, UNICODE_SURROGATE_FIRST,
                                  UNICODE_SURROGATE_LAST)))
         {
             if (UNLIKELY(p >= pend) || UNLIKELY(uv > LAST_HIGH_SURROGATE)) {
                 Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
             }
-	    else {
-		UV low = (p[0] << 8) + p[1];
-		if (UNLIKELY(! inRANGE(low, FIRST_LOW_SURROGATE,
+            else {
+                UV low = (p[0] << 8) + p[1];
+                if (UNLIKELY(! inRANGE(low, FIRST_LOW_SURROGATE,
                                             LAST_LOW_SURROGATE)))
                 {
-		    Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
+                    Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
                 }
-		p += 2;
-		uv = ((uv - FIRST_HIGH_SURROGATE) << 10)
+                p += 2;
+                uv = ((uv - FIRST_HIGH_SURROGATE) << 10)
                                 + (low - FIRST_LOW_SURROGATE) + FIRST_IN_PLANE1;
-	    }
-	}
+            }
+        }
 #ifdef EBCDIC
         d = uvoffuni_to_utf8_flags(d, uv, 0);
 #else
-	if (uv < FIRST_IN_PLANE1) {
-	    *d++ = (U8)(( uv >> 12)         | 0xe0);
-	    *d++ = (U8)(((uv >>  6) & 0x3f) | 0x80);
-	    *d++ = (U8)(( uv        & 0x3f) | 0x80);
-	    continue;
-	}
-	else {
-	    *d++ = (U8)(( uv >> 18)         | 0xf0);
-	    *d++ = (U8)(((uv >> 12) & 0x3f) | 0x80);
-	    *d++ = (U8)(((uv >>  6) & 0x3f) | 0x80);
-	    *d++ = (U8)(( uv        & 0x3f) | 0x80);
-	    continue;
-	}
+        if (uv < FIRST_IN_PLANE1) {
+            *d++ = (U8)(( uv >> 12)         | 0xe0);
+            *d++ = (U8)(((uv >>  6) & 0x3f) | 0x80);
+            *d++ = (U8)(( uv        & 0x3f) | 0x80);
+            continue;
+        }
+        else {
+            *d++ = (U8)(( uv >> 18)         | 0xf0);
+            *d++ = (U8)(((uv >> 12) & 0x3f) | 0x80);
+            *d++ = (U8)(((uv >>  6) & 0x3f) | 0x80);
+            *d++ = (U8)(( uv        & 0x3f) | 0x80);
+            continue;
+        }
 #endif
     }
     *newlen = d - dstart;
@@ -2814,14 +2814,14 @@ Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8* d, Size_t bytelen, Size_t *newlen)
     PERL_ARGS_ASSERT_UTF16_TO_UTF8_REVERSED;
 
     if (bytelen & 1)
-	Perl_croak(aTHX_ "panic: utf16_to_utf8_reversed: odd bytelen %" UVuf,
-		   (UV)bytelen);
+        Perl_croak(aTHX_ "panic: utf16_to_utf8_reversed: odd bytelen %" UVuf,
+                   (UV)bytelen);
 
     while (s < send) {
-	const U8 tmp = s[0];
-	s[0] = s[1];
-	s[1] = tmp;
-	s += 2;
+        const U8 tmp = s[0];
+        s[0] = s[1];
+        s[1] = tmp;
+        s += 2;
     }
     return utf16_to_utf8(p, d, bytelen, newlen);
 }
@@ -2861,38 +2861,38 @@ Perl__to_upper_title_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp,
     assert(S_or_s == 'S' || S_or_s == 's');
 
     if (UVCHR_IS_INVARIANT(converted)) { /* No difference between the two for
-					     characters in this range */
-	*p = (U8) converted;
-	*lenp = 1;
-	return converted;
+                                             characters in this range */
+        *p = (U8) converted;
+        *lenp = 1;
+        return converted;
     }
 
     /* toUPPER_LATIN1_MOD gives the correct results except for three outliers,
      * which it maps to one of them, so as to only have to have one check for
      * it in the main case */
     if (UNLIKELY(converted == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
-	switch (c) {
-	    case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
-		converted = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
-		break;
-	    case MICRO_SIGN:
-		converted = GREEK_CAPITAL_LETTER_MU;
-		break;
+        switch (c) {
+            case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
+                converted = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
+                break;
+            case MICRO_SIGN:
+                converted = GREEK_CAPITAL_LETTER_MU;
+                break;
 #if    UNICODE_MAJOR_VERSION > 2                                        \
    || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1           \
                                   && UNICODE_DOT_DOT_VERSION >= 8)
-	    case LATIN_SMALL_LETTER_SHARP_S:
-		*(p)++ = 'S';
-		*p = S_or_s;
-		*lenp = 2;
-		return 'S';
+            case LATIN_SMALL_LETTER_SHARP_S:
+                *(p)++ = 'S';
+                *p = S_or_s;
+                *lenp = 2;
+                return 'S';
 #endif
-	    default:
-		Perl_croak(aTHX_ "panic: to_upper_title_latin1 did not expect"
+            default:
+                Perl_croak(aTHX_ "panic: to_upper_title_latin1 did not expect"
                                  " '%c' to map to '%c'",
                                  c, LATIN_SMALL_LETTER_Y_WITH_DIAERESIS);
-		NOT_REACHED; /* NOTREACHED */
-	}
+                NOT_REACHED; /* NOTREACHED */
+        }
     }
 
     *(p)++ = UTF8_TWO_BYTE_HI(converted);
@@ -2983,7 +2983,7 @@ Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp)
     PERL_ARGS_ASSERT_TO_UNI_UPPER;
 
     if (c < 256) {
-	return _to_upper_title_latin1((U8) c, p, lenp, 'S');
+        return _to_upper_title_latin1((U8) c, p, lenp, 'S');
     }
 
     return CALL_UPPER_CASE(c, NULL, p, lenp);
@@ -2995,7 +2995,7 @@ Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp)
     PERL_ARGS_ASSERT_TO_UNI_TITLE;
 
     if (c < 256) {
-	return _to_upper_title_latin1((U8) c, p, lenp, 's');
+        return _to_upper_title_latin1((U8) c, p, lenp, 's');
     }
 
     return CALL_TITLE_CASE(c, NULL, p, lenp);
@@ -3013,17 +3013,17 @@ S_to_lower_latin1(const U8 c, U8* p, STRLEN *lenp, const char dummy)
     PERL_UNUSED_ARG(dummy);
 
     if (p != NULL) {
-	if (NATIVE_BYTE_IS_INVARIANT(converted)) {
-	    *p = converted;
-	    *lenp = 1;
-	}
-	else {
+        if (NATIVE_BYTE_IS_INVARIANT(converted)) {
+            *p = converted;
+            *lenp = 1;
+        }
+        else {
             /* Result is known to always be < 256, so can use the EIGHT_BIT
              * macros */
-	    *p = UTF8_EIGHT_BIT_HI(converted);
-	    *(p+1) = UTF8_EIGHT_BIT_LO(converted);
-	    *lenp = 2;
-	}
+            *p = UTF8_EIGHT_BIT_HI(converted);
+            *(p+1) = UTF8_EIGHT_BIT_LO(converted);
+            *lenp = 2;
+        }
     }
     return converted;
 }
@@ -3034,7 +3034,7 @@ Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
     PERL_ARGS_ASSERT_TO_UNI_LOWER;
 
     if (c < 256) {
-	return to_lower_latin1((U8) c, p, lenp, 0 /* 0 is a dummy arg */ );
+        return to_lower_latin1((U8) c, p, lenp, 0 /* 0 is a dummy arg */ );
     }
 
     return CALL_LOWER_CASE(c, NULL, p, lenp);
@@ -3057,7 +3057,7 @@ Perl__to_fold_latin1(const U8 c, U8* p, STRLEN *lenp, const unsigned int flags)
     assert (! (flags & FOLD_FLAGS_LOCALE));
 
     if (UNLIKELY(c == MICRO_SIGN)) {
-	converted = GREEK_SMALL_LETTER_MU;
+        converted = GREEK_SMALL_LETTER_MU;
     }
 #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
    || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
@@ -3084,17 +3084,17 @@ Perl__to_fold_latin1(const U8 c, U8* p, STRLEN *lenp, const unsigned int flags)
 #endif
     else { /* In this range the fold of all other characters is their lower
               case */
-	converted = toLOWER_LATIN1(c);
+        converted = toLOWER_LATIN1(c);
     }
 
     if (UVCHR_IS_INVARIANT(converted)) {
-	*p = (U8) converted;
-	*lenp = 1;
+        *p = (U8) converted;
+        *lenp = 1;
     }
     else {
-	*(p)++ = UTF8_TWO_BYTE_HI(converted);
-	*p = UTF8_TWO_BYTE_LO(converted);
-	*lenp = 2;
+        *(p)++ = UTF8_TWO_BYTE_HI(converted);
+        *p = UTF8_TWO_BYTE_LO(converted);
+        *lenp = 2;
     }
 
     return converted;
@@ -3128,20 +3128,20 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags)
 
     if (c < 256) {
         return _to_fold_latin1((U8) c, p, lenp,
-			    flags & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII));
+                            flags & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII));
     }
 
     /* Here, above 255.  If no special needs, just use the macro */
     if ( ! (flags & (FOLD_FLAGS_LOCALE|FOLD_FLAGS_NOMIX_ASCII))) {
-	return CALL_FOLD_CASE(c, NULL, p, lenp, flags & FOLD_FLAGS_FULL);
+        return CALL_FOLD_CASE(c, NULL, p, lenp, flags & FOLD_FLAGS_FULL);
     }
     else {  /* Otherwise, _toFOLD_utf8_flags has the intelligence to deal with
-	       the special flags. */
-	U8 utf8_c[UTF8_MAXBYTES + 1];
+               the special flags. */
+        U8 utf8_c[UTF8_MAXBYTES + 1];
 
       needs_full_generality:
-	uvchr_to_utf8(utf8_c, c);
-	return _toFOLD_utf8_flags(utf8_c, utf8_c + sizeof(utf8_c),
+        uvchr_to_utf8(utf8_c, c);
+        return _toFOLD_utf8_flags(utf8_c, utf8_c + sizeof(utf8_c),
                                   p, lenp, flags);
     }
 }
@@ -3184,14 +3184,14 @@ S_warn_on_first_deprecated_use(pTHX_ const char * const name,
     if (ckWARN_d(WARN_DEPRECATED)) {
 
         key = Perl_form(aTHX_ "%s;%d;%s;%d", name, use_locale, file, line);
-	if (! hv_fetch(PL_seen_deprecated_macro, key, strlen(key), 0)) {
+        if (! hv_fetch(PL_seen_deprecated_macro, key, strlen(key), 0)) {
             if (! PL_seen_deprecated_macro) {
                 PL_seen_deprecated_macro = newHV();
             }
             if (! hv_store(PL_seen_deprecated_macro, key,
                            strlen(key), &PL_sv_undef, 0))
             {
-		Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed");
+                Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed");
             }
 
             if (instr(file, "mathoms.c")) {
@@ -3344,7 +3344,7 @@ S__to_utf8_case(pTHX_ const UV uv1, const U8 *p,
             }
         }
 
-	/* Note that non-characters are perfectly legal, so no warning should
+        /* Note that non-characters are perfectly legal, so no warning should
          * be given. */
     }
 
@@ -3401,7 +3401,7 @@ S__to_utf8_case(pTHX_ const UV uv1, const U8 *p,
         *lenp = len;
     }
     else {
-	*lenp = uvchr_to_utf8(ustrp, uv1) - ustrp;
+        *lenp = uvchr_to_utf8(ustrp, uv1) - ustrp;
     }
 
     return uv1;
@@ -3503,20 +3503,20 @@ S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result,
      * boundary, so can skip testing */
     if (result > 255) {
 
-	/* Look at every character in the result; if any cross the
-	* boundary, the whole thing is disallowed */
-	U8* s = ustrp + UTF8SKIP(ustrp);
-	U8* e = ustrp + *lenp;
-	while (s < e) {
-	    if (! UTF8_IS_ABOVE_LATIN1(*s)) {
-		goto bad_crossing;
-	    }
-	    s += UTF8SKIP(s);
-	}
+        /* Look at every character in the result; if any cross the
+        * boundary, the whole thing is disallowed */
+        U8* s = ustrp + UTF8SKIP(ustrp);
+        U8* e = ustrp + *lenp;
+        while (s < e) {
+            if (! UTF8_IS_ABOVE_LATIN1(*s)) {
+                goto bad_crossing;
+            }
+            s += UTF8SKIP(s);
+        }
 
         /* Here, no characters crossed, result is ok as-is, but we warn. */
         _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(p, p + UTF8SKIP(p));
-	return result;
+        return result;
     }
 
   bad_crossing:
@@ -3838,9 +3838,9 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
                  ((flags) & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII)),
                                                                     turkic_fc);
 
-	result = CALL_FOLD_CASE(result, p, ustrp, lenp, flags & FOLD_FLAGS_FULL);
+        result = CALL_FOLD_CASE(result, p, ustrp, lenp, flags & FOLD_FLAGS_FULL);
 
-	if (flags & FOLD_FLAGS_LOCALE) {
+        if (flags & FOLD_FLAGS_LOCALE) {
 
 #           define LONG_S_T      LATIN_SMALL_LIGATURE_LONG_S_T_UTF8
 #         ifdef LATIN_CAPITAL_LETTER_SHARP_S_UTF8
@@ -3886,26 +3886,26 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
             }
 #endif
 
-	    return check_locale_boundary_crossing(p, result, ustrp, lenp);
-	}
-	else if (! (flags & FOLD_FLAGS_NOMIX_ASCII)) {
-	    return result;
-	}
-	else {
-	    /* This is called when changing the case of a UTF-8-encoded
+            return check_locale_boundary_crossing(p, result, ustrp, lenp);
+        }
+        else if (! (flags & FOLD_FLAGS_NOMIX_ASCII)) {
+            return result;
+        }
+        else {
+            /* This is called when changing the case of a UTF-8-encoded
              * character above the ASCII range, and the result should not
              * contain an ASCII character. */
 
-	    UV original;    /* To store the first code point of <p> */
+            UV original;    /* To store the first code point of <p> */
 
-	    /* Look at every character in the result; if any cross the
-	    * boundary, the whole thing is disallowed */
-	    U8* s = ustrp;
-	    U8* send = ustrp + *lenp;
-	    while (s < send) {
-		if (isASCII(*s)) {
-		    /* Crossed, have to return the original */
-		    original = valid_utf8_to_uvchr(p, lenp);
+            /* Look at every character in the result; if any cross the
+            * boundary, the whole thing is disallowed */
+            U8* s = ustrp;
+            U8* send = ustrp + *lenp;
+            while (s < send) {
+                if (isASCII(*s)) {
+                    /* Crossed, have to return the original */
+                    original = valid_utf8_to_uvchr(p, lenp);
 
                     /* But in these instances, there is an alternative we can
                      * return that is valid */
@@ -3927,26 +3927,26 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
                         goto return_dotless_i;
                     }
 #endif
-		    Copy(p, ustrp, *lenp, char);
-		    return original;
-		}
-		s += UTF8SKIP(s);
-	    }
-
-	    /* Here, no characters crossed, result is ok as-is */
-	    return result;
-	}
+                    Copy(p, ustrp, *lenp, char);
+                    return original;
+                }
+                s += UTF8SKIP(s);
+            }
+
+            /* Here, no characters crossed, result is ok as-is */
+            return result;
+        }
     }
 
     /* Here, used locale rules.  Convert back to UTF-8 */
     if (UTF8_IS_INVARIANT(result)) {
-	*ustrp = (U8) result;
-	*lenp = 1;
+        *ustrp = (U8) result;
+        *lenp = 1;
     }
     else {
-	*ustrp = UTF8_EIGHT_BIT_HI((U8) result);
-	*(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
-	*lenp = 2;
+        *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+        *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
+        *lenp = 2;
     }
 
     return result;
@@ -3999,13 +3999,13 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
     PERL_ARGS_ASSERT_CHECK_UTF8_PRINT;
 
     while (s < e) {
-	if (UTF8SKIP(s) > len) {
-	    Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
-			   "%s in %s", unees, PL_op ? OP_DESC(PL_op) : "print");
-	    return FALSE;
-	}
-	if (UNLIKELY(isUTF8_POSSIBLY_PROBLEMATIC(*s))) {
-	    if (UNLIKELY(UTF8_IS_SUPER(s, e))) {
+        if (UTF8SKIP(s) > len) {
+            Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+                           "%s in %s", unees, PL_op ? OP_DESC(PL_op) : "print");
+            return FALSE;
+        }
+        if (UNLIKELY(isUTF8_POSSIBLY_PROBLEMATIC(*s))) {
+            if (UNLIKELY(UTF8_IS_SUPER(s, e))) {
                 if (   ckWARN_d(WARN_NON_UNICODE)
                     || UNLIKELY(0 < does_utf8_overflow(s, s + len,
                                                0 /* Don't consider overlongs */
@@ -4015,28 +4015,28 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
                     (void) utf8n_to_uvchr(s, e - s, NULL, UTF8_WARN_SUPER);
                     ok = FALSE;
                 }
-	    }
-	    else if (UNLIKELY(UTF8_IS_SURROGATE(s, e))) {
-		if (ckWARN_d(WARN_SURROGATE)) {
+            }
+            else if (UNLIKELY(UTF8_IS_SURROGATE(s, e))) {
+                if (ckWARN_d(WARN_SURROGATE)) {
                     /* This has a different warning than the one the called
                      * function would output, so can't just call it, unlike we
                      * do for the non-chars and above-unicodes */
-		    UV uv = utf8_to_uvchr_buf(s, e, NULL);
-		    Perl_warner(aTHX_ packWARN(WARN_SURROGATE),
-			"Unicode surrogate U+%04" UVXf " is illegal in UTF-8",
+                    UV uv = utf8_to_uvchr_buf(s, e, NULL);
+                    Perl_warner(aTHX_ packWARN(WARN_SURROGATE),
+                        "Unicode surrogate U+%04" UVXf " is illegal in UTF-8",
                                              uv);
-		    ok = FALSE;
-		}
-	    }
-	    else if (   UNLIKELY(UTF8_IS_NONCHAR(s, e))
+                    ok = FALSE;
+                }
+            }
+            else if (   UNLIKELY(UTF8_IS_NONCHAR(s, e))
                      && (ckWARN_d(WARN_NONCHAR)))
             {
                 /* A side effect of this function will be to warn */
                 (void) utf8n_to_uvchr(s, e - s, NULL, UTF8_WARN_NONCHAR);
-		ok = FALSE;
-	    }
-	}
-	s += UTF8SKIP(s);
+                ok = FALSE;
+            }
+        }
+        s += UTF8SKIP(s);
     }
 
     return ok;
@@ -4082,17 +4082,17 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
     SvPVCLEAR(dsv);
     SvUTF8_off(dsv);
     for (s = (const char *)spv, e = s + len; s < e; s += UTF8SKIP(s)) {
-	 UV u;
-	 bool ok = 0;
-
-	 if (pvlim && SvCUR(dsv) >= pvlim) {
-	      truncated++;
-	      break;
-	 }
-	 u = utf8_to_uvchr_buf((U8*)s, (U8*)e, 0);
-	 if (u < 256) {
-	     const unsigned char c = (unsigned char)u & 0xFF;
-	     if (flags & UNI_DISPLAY_BACKSLASH) {
+         UV u;
+         bool ok = 0;
+
+         if (pvlim && SvCUR(dsv) >= pvlim) {
+              truncated++;
+              break;
+         }
+         u = utf8_to_uvchr_buf((U8*)s, (U8*)e, 0);
+         if (u < 256) {
+             const unsigned char c = (unsigned char)u & 0xFF;
+             if (flags & UNI_DISPLAY_BACKSLASH) {
                  if (    isMNEMONIC_CNTRL(c)
                      && (   c != '\b'
                          || (flags & UNI_DISPLAY_BACKSPACE)))
@@ -4106,18 +4106,18 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
                     ok = 1;
                  }
              }
-	     /* isPRINT() is the locale-blind version. */
-	     if (!ok && (flags & UNI_DISPLAY_ISPRINT) && isPRINT(c)) {
-		 const char string = c;
-		 sv_catpvn(dsv, &string, 1);
-		 ok = 1;
-	     }
-	 }
-	 if (!ok)
-	     Perl_sv_catpvf(aTHX_ dsv, "\\x{%" UVxf "}", u);
+             /* isPRINT() is the locale-blind version. */
+             if (!ok && (flags & UNI_DISPLAY_ISPRINT) && isPRINT(c)) {
+                 const char string = c;
+                 sv_catpvn(dsv, &string, 1);
+                 ok = 1;
+             }
+         }
+         if (!ok)
+             Perl_sv_catpvf(aTHX_ dsv, "\\x{%" UVxf "}", u);
     }
     if (truncated)
-	 sv_catpvs(dsv, "...");
+         sv_catpvs(dsv, "...");
 
     return SvPVX(dsv);
 }
@@ -4144,7 +4144,7 @@ Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags)
     PERL_ARGS_ASSERT_SV_UNI_DISPLAY;
 
     return Perl_pv_uni_display(aTHX_ dsv, (const U8*)ptr,
-				SvCUR(ssv), pvlim, flags);
+                                SvCUR(ssv), pvlim, flags);
 }
 
 /*
@@ -4202,7 +4202,7 @@ L<https://www.unicode.org/unicode/reports/tr21/> (Case Mappings).
  * externally documented.  Currently it is:
  *  0 for as-documented above
  *  FOLDEQ_UTF8_NOMIX_ASCII meaning that if a non-ASCII character folds to an
-			    ASCII one, to not match
+                            ASCII one, to not match
  *  FOLDEQ_LOCALE	    is set iff the rules from the current underlying
  *	                    locale are to be used.
  *  FOLDEQ_S1_ALREADY_FOLDED  s1 has already been folded before calling this
@@ -4308,7 +4308,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
         e1 = g1;
     }
     else {
-	assert(e1);    /* Must have an end for looking at s1 */
+        assert(e1);    /* Must have an end for looking at s1 */
     }
 
     /* Same for goal for s2 */
@@ -4317,7 +4317,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
         e2 = g2;
     }
     else {
-	assert(e2);
+        assert(e2);
     }
 
     /* If both operands are already folded, we could just do a memEQ on the
@@ -4328,14 +4328,14 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
     while (p1 < e1 && p2 < e2) {
 
         /* If at the beginning of a new character in s1, get its fold to use
-	 * and the length of the fold. */
+         * and the length of the fold. */
         if (n1 == 0) {
-	    if (flags & FOLDEQ_S1_ALREADY_FOLDED) {
-		f1 = (U8 *) p1;
+            if (flags & FOLDEQ_S1_ALREADY_FOLDED) {
+                f1 = (U8 *) p1;
                 assert(u1);
-		n1 = UTF8SKIP(f1);
-	    }
-	    else {
+                n1 = UTF8SKIP(f1);
+            }
+            else {
                 if (isASCII(*p1) && ! (flags & FOLDEQ_LOCALE)) {
 
                     /* We have to forbid mixing ASCII with non-ASCII if the
@@ -4361,11 +4361,11 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
         }
 
         if (n2 == 0) {    /* Same for s2 */
-	    if (flags & FOLDEQ_S2_ALREADY_FOLDED) {
+            if (flags & FOLDEQ_S2_ALREADY_FOLDED) {
 
                 /* Point to the already-folded character.  But for non-UTF-8
                  * variants, convert to UTF-8 for the algorithm below */
-		if (UTF8_IS_INVARIANT(*p2)) {
+                if (UTF8_IS_INVARIANT(*p2)) {
                     f2 = (U8 *) p2;
                     n2 = 1;
                 }
@@ -4379,8 +4379,8 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
                     f2 = foldbuf2;
                     n2 = 2;
                 }
-	    }
-	    else {
+            }
+            else {
                 if (isASCII(*p2) && ! (flags & FOLDEQ_LOCALE)) {
                     if ((flags & FOLDEQ_UTF8_NOMIX_ASCII) && ! isASCII(*p1)) {
                         return 0;
@@ -4395,12 +4395,12 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
                     _to_uni_fold_flags(*p2, foldbuf2, &n2, flags_for_folder);
                 }
                 f2 = foldbuf2;
-	    }
+            }
         }
 
-	/* Here f1 and f2 point to the beginning of the strings to compare.
-	 * These strings are the folds of the next character from each input
-	 * string, stored in UTF-8. */
+        /* Here f1 and f2 point to the beginning of the strings to compare.
+         * These strings are the folds of the next character from each input
+         * string, stored in UTF-8. */
 
         /* While there is more to look for in both folds, see if they
         * continue to match */
author	Michael G. Schwern <schwern@pobox.com>	2020-12-28 18:04:52 -0800
committer	Karl Williamson <khw@cpan.org>	2021-01-17 09:18:15 -0700
commit	1604cfb0273418ed479719f39def5ee559bffda2 (patch)
tree	166a5ab935a029ab86cf6295d6f3cb77da22e559 /utf8.c
parent	557ff1b2a4ecd18fe9229e7e0eb8fa123adc5670 (diff)
download	perl-1604cfb0273418ed479719f39def5ee559bffda2.tar.gz