summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-06-28 23:57:31 -0600
committerKarl Williamson <khw@cpan.org>2019-07-01 19:38:00 -0600
commit82651abe60a8c1ca1f9eb6aae0202ecfd34bfecf (patch)
treec6d1ca001a6040c9e60c9a3260ce8323a2d99985
parent34b633732d69b4cd1ac2f7d38cbcb76fa2e2c7ca (diff)
downloadperl-82651abe60a8c1ca1f9eb6aae0202ecfd34bfecf.tar.gz
utf8_to_uvchr_buf() make behavior match docs
For well formed input, there is no change. But for malformed it wasn't returning the documented length when warnings were enabled, and not always the documented value when they were disabled. This is implemented as an inline function, called from both the macro and the Perl_ form. Devel::PPPort has sufficient tests for this.
-rw-r--r--embed.fnc1
-rw-r--r--embed.h1
-rw-r--r--inline.h21
-rw-r--r--proto.h5
-rw-r--r--utf8.c5
-rw-r--r--utf8.h4
6 files changed, 30 insertions, 7 deletions
diff --git a/embed.fnc b/embed.fnc
index 0cd09172b4..bfc9dca241 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1928,6 +1928,7 @@ ApdD |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
AbpdD |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
AbpxD |UV |valid_utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
AMpd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
+Ai |UV |_utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
ApdD |UV |utf8_to_uvuni_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
px |bool |check_utf8_print |NN const U8 *s|const STRLEN len
diff --git a/embed.h b/embed.h
index 506327f78e..f72a7eec80 100644
--- a/embed.h
+++ b/embed.h
@@ -46,6 +46,7 @@
#define _to_utf8_lower_flags(a,b,c,d,e,f,g) Perl__to_utf8_lower_flags(aTHX_ a,b,c,d,e,f,g)
#define _to_utf8_title_flags(a,b,c,d,e,f,g) Perl__to_utf8_title_flags(aTHX_ a,b,c,d,e,f,g)
#define _to_utf8_upper_flags(a,b,c,d,e,f,g) Perl__to_utf8_upper_flags(aTHX_ a,b,c,d,e,f,g)
+#define _utf8_to_uvchr_buf(a,b,c) S__utf8_to_uvchr_buf(aTHX_ a,b,c)
#define _utf8n_to_uvchr_msgs_helper Perl__utf8n_to_uvchr_msgs_helper
#define amagic_call(a,b,c,d) Perl_amagic_call(aTHX_ a,b,c,d)
#define amagic_deref_call(a,b) Perl_amagic_deref_call(aTHX_ a,b)
diff --git a/inline.h b/inline.h
index 442c08e036..7d81da634f 100644
--- a/inline.h
+++ b/inline.h
@@ -1842,6 +1842,27 @@ S_utf8n_to_uvchr_msgs(const U8 *s,
return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags, errors, msgs);
}
+PERL_STATIC_INLINE UV
+S__utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
+{
+ PERL_ARGS_ASSERT__UTF8_TO_UVCHR_BUF;
+
+ assert(s < send);
+
+ if (! ckWARN_d(WARN_UTF8)) {
+ return utf8n_to_uvchr(s, send - s, retlen,
+ (UTF8_ALLOW_ANY & ~UTF8_ALLOW_EMPTY));
+ }
+ else {
+ UV ret = utf8n_to_uvchr(s, send - s, retlen, 0);
+ if (retlen && ret == 0 && *s != '\0') {
+ *retlen = (STRLEN) -1;
+ }
+
+ return ret;
+ }
+}
+
/* ------------------------------- perl.h ----------------------------- */
/*
diff --git a/proto.h b/proto.h
index 9359016ea7..a708e142cd 100644
--- a/proto.h
+++ b/proto.h
@@ -143,6 +143,11 @@ PERL_CALLCONV UV Perl__to_utf8_title_flags(pTHX_ const U8 *p, const U8* e, U8* u
PERL_CALLCONV UV Perl__to_utf8_upper_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, bool flags, const char * const file, const int line);
#define PERL_ARGS_ASSERT__TO_UTF8_UPPER_FLAGS \
assert(p); assert(ustrp); assert(file)
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE UV S__utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen);
+#define PERL_ARGS_ASSERT__UTF8_TO_UVCHR_BUF \
+ assert(s); assert(send)
+#endif
PERL_CALLCONV UV Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags, U32 * errors, AV ** msgs);
#define PERL_ARGS_ASSERT__UTF8N_TO_UVCHR_MSGS_HELPER \
assert(s)
diff --git a/utf8.c b/utf8.c
index 84db2f6aee..07e4df72dd 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2271,10 +2271,7 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
{
PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF;
- assert(s < send);
-
- return utf8n_to_uvchr(s, send - s, retlen,
- ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+ return _utf8_to_uvchr_buf(s, send, retlen);
}
/* This is marked as deprecated
diff --git a/utf8.h b/utf8.h
index d0b8742d5a..88e74de746 100644
--- a/utf8.h
+++ b/utf8.h
@@ -69,9 +69,7 @@ the string is invariant.
#define uvchr_to_utf8_flags_msgs(d,uv,flags,msgs) \
uvoffuni_to_utf8_flags_msgs(d,NATIVE_TO_UNI(uv),flags, msgs)
#define utf8_to_uvchr_buf(s, e, lenp) \
- (__ASSERT_((U8*) (e) > (U8*) (s)) \
- utf8n_to_uvchr(s, (U8*)(e) - (U8*)(s), lenp, \
- ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY))
+ _utf8_to_uvchr_buf((const U8 *) (s), (const U8 *) e, lenp)
#define utf8n_to_uvchr(s, len, lenp, flags) \
utf8n_to_uvchr_error(s, len, lenp, flags, 0)
#define utf8n_to_uvchr_error(s, len, lenp, flags, errors) \