diff options
author | Karl Williamson <khw@cpan.org> | 2019-06-28 23:57:31 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2019-07-01 19:38:00 -0600 |
commit | 82651abe60a8c1ca1f9eb6aae0202ecfd34bfecf (patch) | |
tree | c6d1ca001a6040c9e60c9a3260ce8323a2d99985 | |
parent | 34b633732d69b4cd1ac2f7d38cbcb76fa2e2c7ca (diff) | |
download | perl-82651abe60a8c1ca1f9eb6aae0202ecfd34bfecf.tar.gz |
utf8_to_uvchr_buf() make behavior match docs
For well formed input, there is no change. But for malformed it wasn't
returning the documented length when warnings were enabled, and not
always the documented value when they were disabled.
This is implemented as an inline function, called from both the macro
and the Perl_ form.
Devel::PPPort has sufficient tests for this.
-rw-r--r-- | embed.fnc | 1 | ||||
-rw-r--r-- | embed.h | 1 | ||||
-rw-r--r-- | inline.h | 21 | ||||
-rw-r--r-- | proto.h | 5 | ||||
-rw-r--r-- | utf8.c | 5 | ||||
-rw-r--r-- | utf8.h | 4 |
6 files changed, 30 insertions, 7 deletions
@@ -1928,6 +1928,7 @@ ApdD |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen AbpdD |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen AbpxD |UV |valid_utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen AMpd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen +Ai |UV |_utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen ApdD |UV |utf8_to_uvuni_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen px |bool |check_utf8_print |NN const U8 *s|const STRLEN len @@ -46,6 +46,7 @@ #define _to_utf8_lower_flags(a,b,c,d,e,f,g) Perl__to_utf8_lower_flags(aTHX_ a,b,c,d,e,f,g) #define _to_utf8_title_flags(a,b,c,d,e,f,g) Perl__to_utf8_title_flags(aTHX_ a,b,c,d,e,f,g) #define _to_utf8_upper_flags(a,b,c,d,e,f,g) Perl__to_utf8_upper_flags(aTHX_ a,b,c,d,e,f,g) +#define _utf8_to_uvchr_buf(a,b,c) S__utf8_to_uvchr_buf(aTHX_ a,b,c) #define _utf8n_to_uvchr_msgs_helper Perl__utf8n_to_uvchr_msgs_helper #define amagic_call(a,b,c,d) Perl_amagic_call(aTHX_ a,b,c,d) #define amagic_deref_call(a,b) Perl_amagic_deref_call(aTHX_ a,b) @@ -1842,6 +1842,27 @@ S_utf8n_to_uvchr_msgs(const U8 *s, return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags, errors, msgs); } +PERL_STATIC_INLINE UV +S__utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen) +{ + PERL_ARGS_ASSERT__UTF8_TO_UVCHR_BUF; + + assert(s < send); + + if (! ckWARN_d(WARN_UTF8)) { + return utf8n_to_uvchr(s, send - s, retlen, + (UTF8_ALLOW_ANY & ~UTF8_ALLOW_EMPTY)); + } + else { + UV ret = utf8n_to_uvchr(s, send - s, retlen, 0); + if (retlen && ret == 0 && *s != '\0') { + *retlen = (STRLEN) -1; + } + + return ret; + } +} + /* ------------------------------- perl.h ----------------------------- */ /* @@ -143,6 +143,11 @@ PERL_CALLCONV UV Perl__to_utf8_title_flags(pTHX_ const U8 *p, const U8* e, U8* u PERL_CALLCONV UV Perl__to_utf8_upper_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, bool flags, const char * const file, const int line); #define PERL_ARGS_ASSERT__TO_UTF8_UPPER_FLAGS \ assert(p); assert(ustrp); assert(file) +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE UV S__utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen); +#define PERL_ARGS_ASSERT__UTF8_TO_UVCHR_BUF \ + assert(s); assert(send) +#endif PERL_CALLCONV UV Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags, U32 * errors, AV ** msgs); #define PERL_ARGS_ASSERT__UTF8N_TO_UVCHR_MSGS_HELPER \ assert(s) @@ -2271,10 +2271,7 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen) { PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF; - assert(s < send); - - return utf8n_to_uvchr(s, send - s, retlen, - ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); + return _utf8_to_uvchr_buf(s, send, retlen); } /* This is marked as deprecated @@ -69,9 +69,7 @@ the string is invariant. #define uvchr_to_utf8_flags_msgs(d,uv,flags,msgs) \ uvoffuni_to_utf8_flags_msgs(d,NATIVE_TO_UNI(uv),flags, msgs) #define utf8_to_uvchr_buf(s, e, lenp) \ - (__ASSERT_((U8*) (e) > (U8*) (s)) \ - utf8n_to_uvchr(s, (U8*)(e) - (U8*)(s), lenp, \ - ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY)) + _utf8_to_uvchr_buf((const U8 *) (s), (const U8 *) e, lenp) #define utf8n_to_uvchr(s, len, lenp, flags) \ utf8n_to_uvchr_error(s, len, lenp, flags, 0) #define utf8n_to_uvchr_error(s, len, lenp, flags, errors) \ |