summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorMichael G. Schwern <schwern@pobox.com>2020-12-28 18:04:52 -0800
committerKarl Williamson <khw@cpan.org>2021-01-17 09:18:15 -0700
commit1604cfb0273418ed479719f39def5ee559bffda2 (patch)
tree166a5ab935a029ab86cf6295d6f3cb77da22e559 /utf8.c
parent557ff1b2a4ecd18fe9229e7e0eb8fa123adc5670 (diff)
downloadperl-1604cfb0273418ed479719f39def5ee559bffda2.tar.gz
style: Detabify indentation of the C code maintained by the core.
This just detabifies to get rid of the mixed tab/space indentation. Applying consistent indentation and dealing with other tabs are another issue. Done with `expand -i`. * vutil.* left alone, it's part of version. * Left regen managed files alone for now.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c510
1 files changed, 255 insertions, 255 deletions
diff --git a/utf8.c b/utf8.c
index add8c093aa..72d3ac2b7a 100644
--- a/utf8.c
+++ b/utf8.c
@@ -99,7 +99,7 @@ Perl__force_out_malformed_utf8_message(pTHX_
LEAVE;
if (! errors) {
- Perl_croak(aTHX_ "panic: _force_out_malformed_utf8_message should"
+ Perl_croak(aTHX_ "panic: _force_out_malformed_utf8_message should"
" be called only when there are errors found");
}
@@ -264,8 +264,8 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
}
if (OFFUNI_IS_INVARIANT(uv)) {
- *d++ = LATIN1_TO_NATIVE(uv);
- return d;
+ *d++ = LATIN1_TO_NATIVE(uv);
+ return d;
}
if (uv <= MAX_UTF8_TWO_BYTE) {
@@ -281,9 +281,9 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
* on ASCII; so 3 bytes covers the range 0x400-0x3FFF on EBCDIC;
* 0x800-0xFFFF on ASCII */
if (uv < (16 * (1U << (2 * SHIFT)))) {
- *d++ = I8_TO_NATIVE_UTF8(( uv >> ((3 - 1) * SHIFT)) | UTF_START_MARK(3));
- *d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) | MARK);
- *d++ = I8_TO_NATIVE_UTF8(( uv /* (1 - 1) */ & MASK) | MARK);
+ *d++ = I8_TO_NATIVE_UTF8(( uv >> ((3 - 1) * SHIFT)) | UTF_START_MARK(3));
+ *d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) | MARK);
+ *d++ = I8_TO_NATIVE_UTF8(( uv /* (1 - 1) */ & MASK) | MARK);
#ifndef EBCDIC /* These problematic code points are 4 bytes on EBCDIC, so
aren't tested here */
@@ -300,7 +300,7 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
}
}
#endif
- return d;
+ return d;
}
/* Not 3-byte; that means the code point is at least 0x1_0000 on ASCII
@@ -364,10 +364,10 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
* ASCII, so 4 bytes covers the range 0x4000-0x3_FFFF on EBCDIC;
* 0x1_0000-0x1F_FFFF on ASCII */
if (uv < (8 * (1U << (3 * SHIFT)))) {
- *d++ = I8_TO_NATIVE_UTF8(( uv >> ((4 - 1) * SHIFT)) | UTF_START_MARK(4));
- *d++ = I8_TO_NATIVE_UTF8(((uv >> ((3 - 1) * SHIFT)) & MASK) | MARK);
- *d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) | MARK);
- *d++ = I8_TO_NATIVE_UTF8(( uv /* (1 - 1) */ & MASK) | MARK);
+ *d++ = I8_TO_NATIVE_UTF8(( uv >> ((4 - 1) * SHIFT)) | UTF_START_MARK(4));
+ *d++ = I8_TO_NATIVE_UTF8(((uv >> ((3 - 1) * SHIFT)) & MASK) | MARK);
+ *d++ = I8_TO_NATIVE_UTF8(((uv >> ((2 - 1) * SHIFT)) & MASK) | MARK);
+ *d++ = I8_TO_NATIVE_UTF8(( uv /* (1 - 1) */ & MASK) | MARK);
#ifdef EBCDIC /* These were handled on ASCII platforms in the code for 3-byte
characters. The end-plane non-characters for EBCDIC were
@@ -380,7 +380,7 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
}
#endif
- return d;
+ return d;
}
/* Not 4-byte; that means the code point is at least 0x20_0000 on ASCII
@@ -391,14 +391,14 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
* khw believes that less code outweighs slight performance gains. */
{
- STRLEN len = OFFUNISKIP(uv);
- U8 *p = d+len-1;
- while (p > d) {
- *p-- = I8_TO_NATIVE_UTF8((uv & MASK) | MARK);
- uv >>= SHIFT;
- }
- *p = I8_TO_NATIVE_UTF8((uv & UTF_START_MASK(len)) | UTF_START_MARK(len));
- return d+len;
+ STRLEN len = OFFUNISKIP(uv);
+ U8 *p = d+len-1;
+ while (p > d) {
+ *p-- = I8_TO_NATIVE_UTF8((uv & MASK) | MARK);
+ uv >>= SHIFT;
+ }
+ *p = I8_TO_NATIVE_UTF8((uv & UTF_START_MASK(len)) | UTF_START_MARK(len));
+ return d+len;
}
}
@@ -1659,7 +1659,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
possible_problems |= UTF8_GOT_EMPTY;
curlen = 0;
uv = UNICODE_REPLACEMENT;
- goto ready_to_handle_errors;
+ goto ready_to_handle_errors;
}
expectlen = UTF8SKIP(s);
@@ -1669,15 +1669,15 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
* things up here to return it. It will be overriden only in those rare
* cases where a malformation is found */
if (retlen) {
- *retlen = expectlen;
+ *retlen = expectlen;
}
/* A continuation character can't start a valid sequence */
if (UNLIKELY(UTF8_IS_CONTINUATION(uv))) {
- possible_problems |= UTF8_GOT_CONTINUATION;
+ possible_problems |= UTF8_GOT_CONTINUATION;
curlen = 1;
uv = UNICODE_REPLACEMENT;
- goto ready_to_handle_errors;
+ goto ready_to_handle_errors;
}
/* Here is not a continuation byte, nor an invariant. The only thing left
@@ -1703,8 +1703,8 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
/* Now, loop through the remaining bytes in the character's sequence,
* accumulating each into the working value as we go. */
for (s = s0 + 1; s < send; s++) {
- if (LIKELY(UTF8_IS_CONTINUATION(*s))) {
- uv = UTF8_ACCUMULATE(uv, *s);
+ if (LIKELY(UTF8_IS_CONTINUATION(*s))) {
+ uv = UTF8_ACCUMULATE(uv, *s);
continue;
}
@@ -1808,11 +1808,11 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
&& LIKELY(! (possible_problems & UTF8_GOT_OVERFLOW))
&& ( isUTF8_POSSIBLY_PROBLEMATIC(*adjusted_s0)
|| UNLIKELY(isUTF8_PERL_EXTENDED(s0)))))
- && ((flags & ( UTF8_DISALLOW_NONCHAR
+ && ((flags & ( UTF8_DISALLOW_NONCHAR
|UTF8_DISALLOW_SURROGATE
|UTF8_DISALLOW_SUPER
|UTF8_DISALLOW_PERL_EXTENDED
- |UTF8_WARN_NONCHAR
+ |UTF8_WARN_NONCHAR
|UTF8_WARN_SURROGATE
|UTF8_WARN_SUPER
|UTF8_WARN_PERL_EXTENDED))))
@@ -2373,20 +2373,20 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e)
* In other words: in Perl UTF-8 is not just for Unicode. */
if (UNLIKELY(e < s))
- goto warn_and_return;
+ goto warn_and_return;
while (s < e) {
s += UTF8SKIP(s);
- len++;
+ len++;
}
if (UNLIKELY(e != s)) {
- len--;
+ len--;
warn_and_return:
- if (PL_op)
- Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
- "%s in %s", unees, OP_DESC(PL_op));
- else
- Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
+ if (PL_op)
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+ "%s in %s", unees, OP_DESC(PL_op));
+ else
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
}
return len;
@@ -2419,41 +2419,41 @@ Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen)
while (b < bend && u < uend) {
U8 c = *u++;
- if (!UTF8_IS_INVARIANT(c)) {
- if (UTF8_IS_DOWNGRADEABLE_START(c)) {
- if (u < uend) {
- U8 c1 = *u++;
- if (UTF8_IS_CONTINUATION(c1)) {
- c = EIGHT_BIT_UTF8_TO_NATIVE(c, c1);
- } else {
+ if (!UTF8_IS_INVARIANT(c)) {
+ if (UTF8_IS_DOWNGRADEABLE_START(c)) {
+ if (u < uend) {
+ U8 c1 = *u++;
+ if (UTF8_IS_CONTINUATION(c1)) {
+ c = EIGHT_BIT_UTF8_TO_NATIVE(c, c1);
+ } else {
/* diag_listed_as: Malformed UTF-8 character%s */
- Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
"%s %s%s",
unexpected_non_continuation_text(u - 2, 2, 1, 2),
PL_op ? " in " : "",
PL_op ? OP_DESC(PL_op) : "");
- return -2;
- }
- } else {
- if (PL_op)
- Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
- "%s in %s", unees, OP_DESC(PL_op));
- else
- Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
- return -2; /* Really want to return undef :-) */
- }
- } else {
- return -2;
- }
- }
- if (*b != c) {
- return *b < c ? -2 : +2;
- }
- ++b;
+ return -2;
+ }
+ } else {
+ if (PL_op)
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+ "%s in %s", unees, OP_DESC(PL_op));
+ else
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
+ return -2; /* Really want to return undef :-) */
+ }
+ } else {
+ return -2;
+ }
+ }
+ if (*b != c) {
+ return *b < c ? -2 : +2;
+ }
+ ++b;
}
if (b == bend && u == uend)
- return 0;
+ return 0;
return b < bend ? +1 : -1;
}
@@ -2737,23 +2737,23 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, Size_t bytelen, Size_t *newlen)
PERL_ARGS_ASSERT_UTF16_TO_UTF8;
if (bytelen & 1)
- Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %" UVuf,
+ Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %" UVuf,
(UV)bytelen);
pend = p + bytelen;
while (p < pend) {
- UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
- p += 2;
- if (OFFUNI_IS_INVARIANT(uv)) {
- *d++ = LATIN1_TO_NATIVE((U8) uv);
- continue;
- }
- if (uv <= MAX_UTF8_TWO_BYTE) {
- *d++ = UTF8_TWO_BYTE_HI(UNI_TO_NATIVE(uv));
- *d++ = UTF8_TWO_BYTE_LO(UNI_TO_NATIVE(uv));
- continue;
- }
+ UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
+ p += 2;
+ if (OFFUNI_IS_INVARIANT(uv)) {
+ *d++ = LATIN1_TO_NATIVE((U8) uv);
+ continue;
+ }
+ if (uv <= MAX_UTF8_TWO_BYTE) {
+ *d++ = UTF8_TWO_BYTE_HI(UNI_TO_NATIVE(uv));
+ *d++ = UTF8_TWO_BYTE_LO(UNI_TO_NATIVE(uv));
+ continue;
+ }
#define FIRST_HIGH_SURROGATE UNICODE_SURROGATE_FIRST
#define LAST_HIGH_SURROGATE 0xDBFF
@@ -2763,40 +2763,40 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, Size_t bytelen, Size_t *newlen)
/* This assumes that most uses will be in the first Unicode plane, not
* needing surrogates */
- if (UNLIKELY(inRANGE(uv, UNICODE_SURROGATE_FIRST,
+ if (UNLIKELY(inRANGE(uv, UNICODE_SURROGATE_FIRST,
UNICODE_SURROGATE_LAST)))
{
if (UNLIKELY(p >= pend) || UNLIKELY(uv > LAST_HIGH_SURROGATE)) {
Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
}
- else {
- UV low = (p[0] << 8) + p[1];
- if (UNLIKELY(! inRANGE(low, FIRST_LOW_SURROGATE,
+ else {
+ UV low = (p[0] << 8) + p[1];
+ if (UNLIKELY(! inRANGE(low, FIRST_LOW_SURROGATE,
LAST_LOW_SURROGATE)))
{
- Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
+ Perl_croak(aTHX_ "Malformed UTF-16 surrogate");
}
- p += 2;
- uv = ((uv - FIRST_HIGH_SURROGATE) << 10)
+ p += 2;
+ uv = ((uv - FIRST_HIGH_SURROGATE) << 10)
+ (low - FIRST_LOW_SURROGATE) + FIRST_IN_PLANE1;
- }
- }
+ }
+ }
#ifdef EBCDIC
d = uvoffuni_to_utf8_flags(d, uv, 0);
#else
- if (uv < FIRST_IN_PLANE1) {
- *d++ = (U8)(( uv >> 12) | 0xe0);
- *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80);
- *d++ = (U8)(( uv & 0x3f) | 0x80);
- continue;
- }
- else {
- *d++ = (U8)(( uv >> 18) | 0xf0);
- *d++ = (U8)(((uv >> 12) & 0x3f) | 0x80);
- *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80);
- *d++ = (U8)(( uv & 0x3f) | 0x80);
- continue;
- }
+ if (uv < FIRST_IN_PLANE1) {
+ *d++ = (U8)(( uv >> 12) | 0xe0);
+ *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80);
+ *d++ = (U8)(( uv & 0x3f) | 0x80);
+ continue;
+ }
+ else {
+ *d++ = (U8)(( uv >> 18) | 0xf0);
+ *d++ = (U8)(((uv >> 12) & 0x3f) | 0x80);
+ *d++ = (U8)(((uv >> 6) & 0x3f) | 0x80);
+ *d++ = (U8)(( uv & 0x3f) | 0x80);
+ continue;
+ }
#endif
}
*newlen = d - dstart;
@@ -2814,14 +2814,14 @@ Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8* d, Size_t bytelen, Size_t *newlen)
PERL_ARGS_ASSERT_UTF16_TO_UTF8_REVERSED;
if (bytelen & 1)
- Perl_croak(aTHX_ "panic: utf16_to_utf8_reversed: odd bytelen %" UVuf,
- (UV)bytelen);
+ Perl_croak(aTHX_ "panic: utf16_to_utf8_reversed: odd bytelen %" UVuf,
+ (UV)bytelen);
while (s < send) {
- const U8 tmp = s[0];
- s[0] = s[1];
- s[1] = tmp;
- s += 2;
+ const U8 tmp = s[0];
+ s[0] = s[1];
+ s[1] = tmp;
+ s += 2;
}
return utf16_to_utf8(p, d, bytelen, newlen);
}
@@ -2861,38 +2861,38 @@ Perl__to_upper_title_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp,
assert(S_or_s == 'S' || S_or_s == 's');
if (UVCHR_IS_INVARIANT(converted)) { /* No difference between the two for
- characters in this range */
- *p = (U8) converted;
- *lenp = 1;
- return converted;
+ characters in this range */
+ *p = (U8) converted;
+ *lenp = 1;
+ return converted;
}
/* toUPPER_LATIN1_MOD gives the correct results except for three outliers,
* which it maps to one of them, so as to only have to have one check for
* it in the main case */
if (UNLIKELY(converted == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
- switch (c) {
- case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
- converted = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
- break;
- case MICRO_SIGN:
- converted = GREEK_CAPITAL_LETTER_MU;
- break;
+ switch (c) {
+ case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
+ converted = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
+ break;
+ case MICRO_SIGN:
+ converted = GREEK_CAPITAL_LETTER_MU;
+ break;
#if UNICODE_MAJOR_VERSION > 2 \
|| (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \
&& UNICODE_DOT_DOT_VERSION >= 8)
- case LATIN_SMALL_LETTER_SHARP_S:
- *(p)++ = 'S';
- *p = S_or_s;
- *lenp = 2;
- return 'S';
+ case LATIN_SMALL_LETTER_SHARP_S:
+ *(p)++ = 'S';
+ *p = S_or_s;
+ *lenp = 2;
+ return 'S';
#endif
- default:
- Perl_croak(aTHX_ "panic: to_upper_title_latin1 did not expect"
+ default:
+ Perl_croak(aTHX_ "panic: to_upper_title_latin1 did not expect"
" '%c' to map to '%c'",
c, LATIN_SMALL_LETTER_Y_WITH_DIAERESIS);
- NOT_REACHED; /* NOTREACHED */
- }
+ NOT_REACHED; /* NOTREACHED */
+ }
}
*(p)++ = UTF8_TWO_BYTE_HI(converted);
@@ -2983,7 +2983,7 @@ Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp)
PERL_ARGS_ASSERT_TO_UNI_UPPER;
if (c < 256) {
- return _to_upper_title_latin1((U8) c, p, lenp, 'S');
+ return _to_upper_title_latin1((U8) c, p, lenp, 'S');
}
return CALL_UPPER_CASE(c, NULL, p, lenp);
@@ -2995,7 +2995,7 @@ Perl_to_uni_title(pTHX_ UV c, U8* p, STRLEN *lenp)
PERL_ARGS_ASSERT_TO_UNI_TITLE;
if (c < 256) {
- return _to_upper_title_latin1((U8) c, p, lenp, 's');
+ return _to_upper_title_latin1((U8) c, p, lenp, 's');
}
return CALL_TITLE_CASE(c, NULL, p, lenp);
@@ -3013,17 +3013,17 @@ S_to_lower_latin1(const U8 c, U8* p, STRLEN *lenp, const char dummy)
PERL_UNUSED_ARG(dummy);
if (p != NULL) {
- if (NATIVE_BYTE_IS_INVARIANT(converted)) {
- *p = converted;
- *lenp = 1;
- }
- else {
+ if (NATIVE_BYTE_IS_INVARIANT(converted)) {
+ *p = converted;
+ *lenp = 1;
+ }
+ else {
/* Result is known to always be < 256, so can use the EIGHT_BIT
* macros */
- *p = UTF8_EIGHT_BIT_HI(converted);
- *(p+1) = UTF8_EIGHT_BIT_LO(converted);
- *lenp = 2;
- }
+ *p = UTF8_EIGHT_BIT_HI(converted);
+ *(p+1) = UTF8_EIGHT_BIT_LO(converted);
+ *lenp = 2;
+ }
}
return converted;
}
@@ -3034,7 +3034,7 @@ Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
PERL_ARGS_ASSERT_TO_UNI_LOWER;
if (c < 256) {
- return to_lower_latin1((U8) c, p, lenp, 0 /* 0 is a dummy arg */ );
+ return to_lower_latin1((U8) c, p, lenp, 0 /* 0 is a dummy arg */ );
}
return CALL_LOWER_CASE(c, NULL, p, lenp);
@@ -3057,7 +3057,7 @@ Perl__to_fold_latin1(const U8 c, U8* p, STRLEN *lenp, const unsigned int flags)
assert (! (flags & FOLD_FLAGS_LOCALE));
if (UNLIKELY(c == MICRO_SIGN)) {
- converted = GREEK_SMALL_LETTER_MU;
+ converted = GREEK_SMALL_LETTER_MU;
}
#if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \
|| (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \
@@ -3084,17 +3084,17 @@ Perl__to_fold_latin1(const U8 c, U8* p, STRLEN *lenp, const unsigned int flags)
#endif
else { /* In this range the fold of all other characters is their lower
case */
- converted = toLOWER_LATIN1(c);
+ converted = toLOWER_LATIN1(c);
}
if (UVCHR_IS_INVARIANT(converted)) {
- *p = (U8) converted;
- *lenp = 1;
+ *p = (U8) converted;
+ *lenp = 1;
}
else {
- *(p)++ = UTF8_TWO_BYTE_HI(converted);
- *p = UTF8_TWO_BYTE_LO(converted);
- *lenp = 2;
+ *(p)++ = UTF8_TWO_BYTE_HI(converted);
+ *p = UTF8_TWO_BYTE_LO(converted);
+ *lenp = 2;
}
return converted;
@@ -3128,20 +3128,20 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags)
if (c < 256) {
return _to_fold_latin1((U8) c, p, lenp,
- flags & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII));
+ flags & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII));
}
/* Here, above 255. If no special needs, just use the macro */
if ( ! (flags & (FOLD_FLAGS_LOCALE|FOLD_FLAGS_NOMIX_ASCII))) {
- return CALL_FOLD_CASE(c, NULL, p, lenp, flags & FOLD_FLAGS_FULL);
+ return CALL_FOLD_CASE(c, NULL, p, lenp, flags & FOLD_FLAGS_FULL);
}
else { /* Otherwise, _toFOLD_utf8_flags has the intelligence to deal with
- the special flags. */
- U8 utf8_c[UTF8_MAXBYTES + 1];
+ the special flags. */
+ U8 utf8_c[UTF8_MAXBYTES + 1];
needs_full_generality:
- uvchr_to_utf8(utf8_c, c);
- return _toFOLD_utf8_flags(utf8_c, utf8_c + sizeof(utf8_c),
+ uvchr_to_utf8(utf8_c, c);
+ return _toFOLD_utf8_flags(utf8_c, utf8_c + sizeof(utf8_c),
p, lenp, flags);
}
}
@@ -3184,14 +3184,14 @@ S_warn_on_first_deprecated_use(pTHX_ const char * const name,
if (ckWARN_d(WARN_DEPRECATED)) {
key = Perl_form(aTHX_ "%s;%d;%s;%d", name, use_locale, file, line);
- if (! hv_fetch(PL_seen_deprecated_macro, key, strlen(key), 0)) {
+ if (! hv_fetch(PL_seen_deprecated_macro, key, strlen(key), 0)) {
if (! PL_seen_deprecated_macro) {
PL_seen_deprecated_macro = newHV();
}
if (! hv_store(PL_seen_deprecated_macro, key,
strlen(key), &PL_sv_undef, 0))
{
- Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed");
+ Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed");
}
if (instr(file, "mathoms.c")) {
@@ -3344,7 +3344,7 @@ S__to_utf8_case(pTHX_ const UV uv1, const U8 *p,
}
}
- /* Note that non-characters are perfectly legal, so no warning should
+ /* Note that non-characters are perfectly legal, so no warning should
* be given. */
}
@@ -3401,7 +3401,7 @@ S__to_utf8_case(pTHX_ const UV uv1, const U8 *p,
*lenp = len;
}
else {
- *lenp = uvchr_to_utf8(ustrp, uv1) - ustrp;
+ *lenp = uvchr_to_utf8(ustrp, uv1) - ustrp;
}
return uv1;
@@ -3503,20 +3503,20 @@ S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result,
* boundary, so can skip testing */
if (result > 255) {
- /* Look at every character in the result; if any cross the
- * boundary, the whole thing is disallowed */
- U8* s = ustrp + UTF8SKIP(ustrp);
- U8* e = ustrp + *lenp;
- while (s < e) {
- if (! UTF8_IS_ABOVE_LATIN1(*s)) {
- goto bad_crossing;
- }
- s += UTF8SKIP(s);
- }
+ /* Look at every character in the result; if any cross the
+ * boundary, the whole thing is disallowed */
+ U8* s = ustrp + UTF8SKIP(ustrp);
+ U8* e = ustrp + *lenp;
+ while (s < e) {
+ if (! UTF8_IS_ABOVE_LATIN1(*s)) {
+ goto bad_crossing;
+ }
+ s += UTF8SKIP(s);
+ }
/* Here, no characters crossed, result is ok as-is, but we warn. */
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(p, p + UTF8SKIP(p));
- return result;
+ return result;
}
bad_crossing:
@@ -3838,9 +3838,9 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
((flags) & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII)),
turkic_fc);
- result = CALL_FOLD_CASE(result, p, ustrp, lenp, flags & FOLD_FLAGS_FULL);
+ result = CALL_FOLD_CASE(result, p, ustrp, lenp, flags & FOLD_FLAGS_FULL);
- if (flags & FOLD_FLAGS_LOCALE) {
+ if (flags & FOLD_FLAGS_LOCALE) {
# define LONG_S_T LATIN_SMALL_LIGATURE_LONG_S_T_UTF8
# ifdef LATIN_CAPITAL_LETTER_SHARP_S_UTF8
@@ -3886,26 +3886,26 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
}
#endif
- return check_locale_boundary_crossing(p, result, ustrp, lenp);
- }
- else if (! (flags & FOLD_FLAGS_NOMIX_ASCII)) {
- return result;
- }
- else {
- /* This is called when changing the case of a UTF-8-encoded
+ return check_locale_boundary_crossing(p, result, ustrp, lenp);
+ }
+ else if (! (flags & FOLD_FLAGS_NOMIX_ASCII)) {
+ return result;
+ }
+ else {
+ /* This is called when changing the case of a UTF-8-encoded
* character above the ASCII range, and the result should not
* contain an ASCII character. */
- UV original; /* To store the first code point of <p> */
+ UV original; /* To store the first code point of <p> */
- /* Look at every character in the result; if any cross the
- * boundary, the whole thing is disallowed */
- U8* s = ustrp;
- U8* send = ustrp + *lenp;
- while (s < send) {
- if (isASCII(*s)) {
- /* Crossed, have to return the original */
- original = valid_utf8_to_uvchr(p, lenp);
+ /* Look at every character in the result; if any cross the
+ * boundary, the whole thing is disallowed */
+ U8* s = ustrp;
+ U8* send = ustrp + *lenp;
+ while (s < send) {
+ if (isASCII(*s)) {
+ /* Crossed, have to return the original */
+ original = valid_utf8_to_uvchr(p, lenp);
/* But in these instances, there is an alternative we can
* return that is valid */
@@ -3927,26 +3927,26 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
goto return_dotless_i;
}
#endif
- Copy(p, ustrp, *lenp, char);
- return original;
- }
- s += UTF8SKIP(s);
- }
-
- /* Here, no characters crossed, result is ok as-is */
- return result;
- }
+ Copy(p, ustrp, *lenp, char);
+ return original;
+ }
+ s += UTF8SKIP(s);
+ }
+
+ /* Here, no characters crossed, result is ok as-is */
+ return result;
+ }
}
/* Here, used locale rules. Convert back to UTF-8 */
if (UTF8_IS_INVARIANT(result)) {
- *ustrp = (U8) result;
- *lenp = 1;
+ *ustrp = (U8) result;
+ *lenp = 1;
}
else {
- *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
- *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
- *lenp = 2;
+ *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+ *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
+ *lenp = 2;
}
return result;
@@ -3999,13 +3999,13 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
PERL_ARGS_ASSERT_CHECK_UTF8_PRINT;
while (s < e) {
- if (UTF8SKIP(s) > len) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
- "%s in %s", unees, PL_op ? OP_DESC(PL_op) : "print");
- return FALSE;
- }
- if (UNLIKELY(isUTF8_POSSIBLY_PROBLEMATIC(*s))) {
- if (UNLIKELY(UTF8_IS_SUPER(s, e))) {
+ if (UTF8SKIP(s) > len) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+ "%s in %s", unees, PL_op ? OP_DESC(PL_op) : "print");
+ return FALSE;
+ }
+ if (UNLIKELY(isUTF8_POSSIBLY_PROBLEMATIC(*s))) {
+ if (UNLIKELY(UTF8_IS_SUPER(s, e))) {
if ( ckWARN_d(WARN_NON_UNICODE)
|| UNLIKELY(0 < does_utf8_overflow(s, s + len,
0 /* Don't consider overlongs */
@@ -4015,28 +4015,28 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
(void) utf8n_to_uvchr(s, e - s, NULL, UTF8_WARN_SUPER);
ok = FALSE;
}
- }
- else if (UNLIKELY(UTF8_IS_SURROGATE(s, e))) {
- if (ckWARN_d(WARN_SURROGATE)) {
+ }
+ else if (UNLIKELY(UTF8_IS_SURROGATE(s, e))) {
+ if (ckWARN_d(WARN_SURROGATE)) {
/* This has a different warning than the one the called
* function would output, so can't just call it, unlike we
* do for the non-chars and above-unicodes */
- UV uv = utf8_to_uvchr_buf(s, e, NULL);
- Perl_warner(aTHX_ packWARN(WARN_SURROGATE),
- "Unicode surrogate U+%04" UVXf " is illegal in UTF-8",
+ UV uv = utf8_to_uvchr_buf(s, e, NULL);
+ Perl_warner(aTHX_ packWARN(WARN_SURROGATE),
+ "Unicode surrogate U+%04" UVXf " is illegal in UTF-8",
uv);
- ok = FALSE;
- }
- }
- else if ( UNLIKELY(UTF8_IS_NONCHAR(s, e))
+ ok = FALSE;
+ }
+ }
+ else if ( UNLIKELY(UTF8_IS_NONCHAR(s, e))
&& (ckWARN_d(WARN_NONCHAR)))
{
/* A side effect of this function will be to warn */
(void) utf8n_to_uvchr(s, e - s, NULL, UTF8_WARN_NONCHAR);
- ok = FALSE;
- }
- }
- s += UTF8SKIP(s);
+ ok = FALSE;
+ }
+ }
+ s += UTF8SKIP(s);
}
return ok;
@@ -4082,17 +4082,17 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
SvPVCLEAR(dsv);
SvUTF8_off(dsv);
for (s = (const char *)spv, e = s + len; s < e; s += UTF8SKIP(s)) {
- UV u;
- bool ok = 0;
-
- if (pvlim && SvCUR(dsv) >= pvlim) {
- truncated++;
- break;
- }
- u = utf8_to_uvchr_buf((U8*)s, (U8*)e, 0);
- if (u < 256) {
- const unsigned char c = (unsigned char)u & 0xFF;
- if (flags & UNI_DISPLAY_BACKSLASH) {
+ UV u;
+ bool ok = 0;
+
+ if (pvlim && SvCUR(dsv) >= pvlim) {
+ truncated++;
+ break;
+ }
+ u = utf8_to_uvchr_buf((U8*)s, (U8*)e, 0);
+ if (u < 256) {
+ const unsigned char c = (unsigned char)u & 0xFF;
+ if (flags & UNI_DISPLAY_BACKSLASH) {
if ( isMNEMONIC_CNTRL(c)
&& ( c != '\b'
|| (flags & UNI_DISPLAY_BACKSPACE)))
@@ -4106,18 +4106,18 @@ Perl_pv_uni_display(pTHX_ SV *dsv, const U8 *spv, STRLEN len, STRLEN pvlim,
ok = 1;
}
}
- /* isPRINT() is the locale-blind version. */
- if (!ok && (flags & UNI_DISPLAY_ISPRINT) && isPRINT(c)) {
- const char string = c;
- sv_catpvn(dsv, &string, 1);
- ok = 1;
- }
- }
- if (!ok)
- Perl_sv_catpvf(aTHX_ dsv, "\\x{%" UVxf "}", u);
+ /* isPRINT() is the locale-blind version. */
+ if (!ok && (flags & UNI_DISPLAY_ISPRINT) && isPRINT(c)) {
+ const char string = c;
+ sv_catpvn(dsv, &string, 1);
+ ok = 1;
+ }
+ }
+ if (!ok)
+ Perl_sv_catpvf(aTHX_ dsv, "\\x{%" UVxf "}", u);
}
if (truncated)
- sv_catpvs(dsv, "...");
+ sv_catpvs(dsv, "...");
return SvPVX(dsv);
}
@@ -4144,7 +4144,7 @@ Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags)
PERL_ARGS_ASSERT_SV_UNI_DISPLAY;
return Perl_pv_uni_display(aTHX_ dsv, (const U8*)ptr,
- SvCUR(ssv), pvlim, flags);
+ SvCUR(ssv), pvlim, flags);
}
/*
@@ -4202,7 +4202,7 @@ L<https://www.unicode.org/unicode/reports/tr21/> (Case Mappings).
* externally documented. Currently it is:
* 0 for as-documented above
* FOLDEQ_UTF8_NOMIX_ASCII meaning that if a non-ASCII character folds to an
- ASCII one, to not match
+ ASCII one, to not match
* FOLDEQ_LOCALE is set iff the rules from the current underlying
* locale are to be used.
* FOLDEQ_S1_ALREADY_FOLDED s1 has already been folded before calling this
@@ -4308,7 +4308,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
e1 = g1;
}
else {
- assert(e1); /* Must have an end for looking at s1 */
+ assert(e1); /* Must have an end for looking at s1 */
}
/* Same for goal for s2 */
@@ -4317,7 +4317,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
e2 = g2;
}
else {
- assert(e2);
+ assert(e2);
}
/* If both operands are already folded, we could just do a memEQ on the
@@ -4328,14 +4328,14 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
while (p1 < e1 && p2 < e2) {
/* If at the beginning of a new character in s1, get its fold to use
- * and the length of the fold. */
+ * and the length of the fold. */
if (n1 == 0) {
- if (flags & FOLDEQ_S1_ALREADY_FOLDED) {
- f1 = (U8 *) p1;
+ if (flags & FOLDEQ_S1_ALREADY_FOLDED) {
+ f1 = (U8 *) p1;
assert(u1);
- n1 = UTF8SKIP(f1);
- }
- else {
+ n1 = UTF8SKIP(f1);
+ }
+ else {
if (isASCII(*p1) && ! (flags & FOLDEQ_LOCALE)) {
/* We have to forbid mixing ASCII with non-ASCII if the
@@ -4361,11 +4361,11 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
}
if (n2 == 0) { /* Same for s2 */
- if (flags & FOLDEQ_S2_ALREADY_FOLDED) {
+ if (flags & FOLDEQ_S2_ALREADY_FOLDED) {
/* Point to the already-folded character. But for non-UTF-8
* variants, convert to UTF-8 for the algorithm below */
- if (UTF8_IS_INVARIANT(*p2)) {
+ if (UTF8_IS_INVARIANT(*p2)) {
f2 = (U8 *) p2;
n2 = 1;
}
@@ -4379,8 +4379,8 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
f2 = foldbuf2;
n2 = 2;
}
- }
- else {
+ }
+ else {
if (isASCII(*p2) && ! (flags & FOLDEQ_LOCALE)) {
if ((flags & FOLDEQ_UTF8_NOMIX_ASCII) && ! isASCII(*p1)) {
return 0;
@@ -4395,12 +4395,12 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
_to_uni_fold_flags(*p2, foldbuf2, &n2, flags_for_folder);
}
f2 = foldbuf2;
- }
+ }
}
- /* Here f1 and f2 point to the beginning of the strings to compare.
- * These strings are the folds of the next character from each input
- * string, stored in UTF-8. */
+ /* Here f1 and f2 point to the beginning of the strings to compare.
+ * These strings are the folds of the next character from each input
+ * string, stored in UTF-8. */
/* While there is more to look for in both folds, see if they
* continue to match */