summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc2
-rw-r--r--embed.h1
-rw-r--r--inline.h22
-rw-r--r--pp_pack.c16
-rw-r--r--proto.h5
-rw-r--r--sv.c18
-rw-r--r--toke.c8
-rw-r--r--utf8.c9
8 files changed, 37 insertions, 44 deletions
diff --git a/embed.fnc b/embed.fnc
index bebed86a28..7747453a57 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2316,6 +2316,8 @@ iR |bool |is_utf8_common |NN const U8 *const p|NN SV **swash|NN const char * con
sR |SV* |swatch_get |NN SV* swash|UV start|UV span
#endif
+AiMn |void |append_utf8_from_native_byte|const U8 byte|NN U8** dest
+
Apd |void |sv_setsv_flags |NN SV *dstr|NULLOK SV *sstr|const I32 flags
Apd |void |sv_catpvn_flags|NN SV *const dstr|NN const char *sstr|const STRLEN len \
|const I32 flags
diff --git a/embed.h b/embed.h
index 132993787e..9d31874a30 100644
--- a/embed.h
+++ b/embed.h
@@ -41,6 +41,7 @@
#define _to_utf8_upper_flags(a,b,c,d,e) Perl__to_utf8_upper_flags(aTHX_ a,b,c,d,e)
#define amagic_call(a,b,c,d) Perl_amagic_call(aTHX_ a,b,c,d)
#define amagic_deref_call(a,b) Perl_amagic_deref_call(aTHX_ a,b)
+#define append_utf8_from_native_byte S_append_utf8_from_native_byte
#define apply_attrs_string(a,b,c,d) Perl_apply_attrs_string(aTHX_ a,b,c,d)
#define atfork_lock Perl_atfork_lock
#define atfork_unlock Perl_atfork_unlock
diff --git a/inline.h b/inline.h
index 066edf1bda..63a5e064d1 100644
--- a/inline.h
+++ b/inline.h
@@ -203,8 +203,26 @@ S_croak_memory_wrap(void)
/* ------------------------------- utf8.h ------------------------------- */
-/* These exist only to replace the macros they formerly were so that their use
- * can be deprecated */
+PERL_STATIC_INLINE void
+S_append_utf8_from_native_byte(const U8 byte, U8** dest)
+{
+ /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
+ * encoded string at '*dest', updating '*dest' to include it */
+
+ const U8 uv = NATIVE_TO_LATIN1(byte);
+
+ PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
+
+ if (UNI_IS_INVARIANT(uv))
+ *(*dest)++ = UNI_TO_NATIVE(uv);
+ else {
+ *(*dest)++ = UTF8_EIGHT_BIT_HI(uv);
+ *(*dest)++ = UTF8_EIGHT_BIT_LO(uv);
+ }
+}
+
+/* These two exist only to replace the macros they formerly were so that their
+ * use can be deprecated */
PERL_STATIC_INLINE bool
S_isIDFIRST_lazy(pTHX_ const char* p)
diff --git a/pp_pack.c b/pp_pack.c
index 3cfc03c6f1..39f862e0a5 100644
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -357,24 +357,12 @@ S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
if (UNLIKELY(needs_swap)) {
const U8 *p = start + len;
while (p-- > start) {
- const UV uv = NATIVE_TO_ASCII(*p);
- if (UNI_IS_INVARIANT(uv))
- *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
- else {
- *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
- *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
- }
+ append_utf8_from_native_byte(*p, (U8 **) & dest);
}
} else {
const U8 * const end = start + len;
while (start < end) {
- const UV uv = NATIVE_TO_ASCII(*start);
- if (UNI_IS_INVARIANT(uv))
- *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
- else {
- *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
- *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
- }
+ append_utf8_from_native_byte(*start, (U8 **) & dest);
start++;
}
}
diff --git a/proto.h b/proto.h
index 30fcba5ef5..0da17ec963 100644
--- a/proto.h
+++ b/proto.h
@@ -112,6 +112,11 @@ PERL_CALLCONV SV * Perl_amagic_deref_call(pTHX_ SV *ref, int method)
assert(ref)
PERL_CALLCONV bool Perl_amagic_is_enabled(pTHX_ int method);
+PERL_STATIC_INLINE void S_append_utf8_from_native_byte(const U8 byte, U8** dest)
+ __attribute__nonnull__(2);
+#define PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE \
+ assert(dest)
+
PERL_CALLCONV I32 Perl_apply(pTHX_ I32 type, SV** mark, SV** sp)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3);
diff --git a/sv.c b/sv.c
index 3945ab991a..8ba05050ae 100644
--- a/sv.c
+++ b/sv.c
@@ -3422,13 +3422,8 @@ must_be_utf8:
}
while (t < e) {
- const UV uv = NATIVE8_TO_UNI(*t++);
- if (UNI_IS_INVARIANT(uv))
- *d++ = (U8)UNI_TO_NATIVE(uv);
- else {
- *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
- *d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
- }
+ append_utf8_from_native_byte(*t, &d);
+ t++;
}
*d = '\0';
SvPV_free(sv); /* No longer using pre-existing string */
@@ -5200,13 +5195,8 @@ Perl_sv_catpvn_flags(pTHX_ SV *const dsv, const char *sstr, const STRLEN slen, c
d = (U8 *)SvPVX(dsv) + dlen;
while (sstr < send) {
- const UV uv = NATIVE_TO_ASCII((U8)*sstr++);
- if (UNI_IS_INVARIANT(uv))
- *d++ = (U8)UTF_TO_NATIVE(uv);
- else {
- *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
- *d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
- }
+ append_utf8_from_native_byte(*sstr, &d);
+ sstr++;
}
SvCUR_set(dsv, d-(const U8 *)SvPVX(dsv));
}
diff --git a/toke.c b/toke.c
index 96b0274abf..3ce12e28b6 100644
--- a/toke.c
+++ b/toke.c
@@ -3176,13 +3176,7 @@ S_scan_const(pTHX_ char *start)
for (i = min; i <= max; i++)
#ifdef EBCDIC
if (has_utf8) {
- const U8 ch = (U8)NATIVE_TO_UTF(i);
- if (UNI_IS_INVARIANT(ch))
- *d++ = (U8)i;
- else {
- *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
- *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
- }
+ append_utf8_from_native_byte(i, &d);
}
else
#endif
diff --git a/utf8.c b/utf8.c
index 1bdad1b3d2..b445a2efbc 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1376,13 +1376,8 @@ Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *len)
dst = d;
while (s < send) {
- const UV uv = NATIVE_TO_ASCII(*s++);
- if (UNI_IS_INVARIANT(uv))
- *d++ = (U8)UTF_TO_NATIVE(uv);
- else {
- *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
- *d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
- }
+ append_utf8_from_native_byte(*s, &d);
+ s++;
}
*d = '\0';
*len = d-dst;