diff options
-rw-r--r-- | embed.fnc | 16 | ||||
-rw-r--r-- | embed.h | 8 | ||||
-rw-r--r-- | ext/XS-APItest/APItest.xs | 32 | ||||
-rw-r--r-- | ext/XS-APItest/t/handy.t | 21 | ||||
-rw-r--r-- | handy.h | 82 | ||||
-rw-r--r-- | pod/perldelta.pod | 7 | ||||
-rw-r--r-- | proto.h | 8 | ||||
-rw-r--r-- | utf8.c | 29 | ||||
-rw-r--r-- | utf8.h | 13 |
9 files changed, 152 insertions, 64 deletions
@@ -1738,17 +1738,17 @@ s |UV |_to_utf8_case |const UV uv1 \ |NULLOK const char *special #endif Apbmd |UV |to_utf8_lower |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp -AMp |UV |_to_utf8_lower_flags |NN const U8 *p|NN U8* ustrp \ - |NULLOK STRLEN *lenp|bool flags +AMp |UV |_to_utf8_lower_flags|NN const U8 *p|NULLOK const U8* e \ + |NN U8* ustrp|NULLOK STRLEN *lenp|bool flags Apbmd |UV |to_utf8_upper |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp -AMp |UV |_to_utf8_upper_flags |NN const U8 *p|NN U8* ustrp \ - |NULLOK STRLEN *lenp|bool flags +AMp |UV |_to_utf8_upper_flags |NN const U8 *p|NULLOK const U8 *e \ + |NN U8* ustrp|NULLOK STRLEN *lenp|bool flags Apbmd |UV |to_utf8_title |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp -AMp |UV |_to_utf8_title_flags |NN const U8 *p|NN U8* ustrp \ - |NULLOK STRLEN *lenp|bool flags +AMp |UV |_to_utf8_title_flags |NN const U8 *p|NULLOK const U8* e \ + |NN U8* ustrp|NULLOK STRLEN *lenp|bool flags Apbmd |UV |to_utf8_fold |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp -AMp |UV |_to_utf8_fold_flags|NN const U8 *p|NN U8* ustrp \ - |NULLOK STRLEN *lenp|U8 flags +AMp |UV |_to_utf8_fold_flags|NN const U8 *p|NULLOK const U8 *e \ + |NN U8* ustrp|NULLOK STRLEN *lenp|U8 flags #if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C) pn |bool |translate_substr_offsets|STRLEN curlen|IV pos1_iv \ |bool pos1_is_uv|IV len_iv \ @@ -42,10 +42,10 @@ #define _is_utf8_xidcont(a) Perl__is_utf8_xidcont(aTHX_ a) #define _is_utf8_xidstart(a) Perl__is_utf8_xidstart(aTHX_ a) #define _to_uni_fold_flags(a,b,c,d) Perl__to_uni_fold_flags(aTHX_ a,b,c,d) -#define _to_utf8_fold_flags(a,b,c,d) Perl__to_utf8_fold_flags(aTHX_ a,b,c,d) -#define _to_utf8_lower_flags(a,b,c,d) Perl__to_utf8_lower_flags(aTHX_ a,b,c,d) -#define _to_utf8_title_flags(a,b,c,d) Perl__to_utf8_title_flags(aTHX_ a,b,c,d) -#define _to_utf8_upper_flags(a,b,c,d) Perl__to_utf8_upper_flags(aTHX_ a,b,c,d) +#define _to_utf8_fold_flags(a,b,c,d,e) Perl__to_utf8_fold_flags(aTHX_ a,b,c,d,e) +#define _to_utf8_lower_flags(a,b,c,d,e) Perl__to_utf8_lower_flags(aTHX_ a,b,c,d,e) +#define _to_utf8_title_flags(a,b,c,d,e) Perl__to_utf8_title_flags(aTHX_ a,b,c,d,e) +#define _to_utf8_upper_flags(a,b,c,d,e) Perl__to_utf8_upper_flags(aTHX_ a,b,c,d,e) #define amagic_call(a,b,c,d) Perl_amagic_call(aTHX_ a,b,c,d) #define amagic_deref_call(a,b) Perl_amagic_deref_call(aTHX_ a,b) #define apply_attrs_string(a,b,c,d) Perl_apply_attrs_string(aTHX_ a,b,c,d) diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index e9d28c8d49..9c0fd1930a 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -6197,17 +6197,21 @@ test_toLOWER_uvchr(UV ord) RETVAL AV * -test_toLOWER_utf8(SV * p) +test_toLOWER_utf8(SV * p, int type) PREINIT: U8 *input; U8 s[UTF8_MAXBYTES_CASE + 1]; STRLEN len; AV *av; SV *utf8; + const unsigned char * e; + UV resultant_cp; CODE: input = (U8 *) SvPV(p, len); av = newAV(); - av_push(av, newSVuv(toLOWER_utf8(input, s, &len))); + e = input + UTF8SKIP(input) - type; + resultant_cp = toLOWER_utf8_safe(input, e, s, &len); + av_push(av, newSVuv(resultant_cp)); utf8 = newSVpvn((char *) s, len); SvUTF8_on(utf8); @@ -6273,17 +6277,21 @@ test_toFOLD_uvchr(UV ord) RETVAL AV * -test_toFOLD_utf8(SV * p) +test_toFOLD_utf8(SV * p, int type) PREINIT: U8 *input; U8 s[UTF8_MAXBYTES_CASE + 1]; STRLEN len; AV *av; SV *utf8; + const unsigned char * e; + UV resultant_cp; CODE: input = (U8 *) SvPV(p, len); av = newAV(); - av_push(av, newSVuv(toFOLD_utf8(input, s, &len))); + e = input + UTF8SKIP(input) - type; + resultant_cp = toFOLD_utf8_safe(input, e, s, &len); + av_push(av, newSVuv(resultant_cp)); utf8 = newSVpvn((char *) s, len); SvUTF8_on(utf8); @@ -6349,17 +6357,21 @@ test_toUPPER_uvchr(UV ord) RETVAL AV * -test_toUPPER_utf8(SV * p) +test_toUPPER_utf8(SV * p, int type) PREINIT: U8 *input; U8 s[UTF8_MAXBYTES_CASE + 1]; STRLEN len; AV *av; SV *utf8; + const unsigned char * e; + UV resultant_cp; CODE: input = (U8 *) SvPV(p, len); av = newAV(); - av_push(av, newSVuv(toUPPER_utf8(input, s, &len))); + e = input + UTF8SKIP(input) - type; + resultant_cp = toUPPER_utf8_safe(input, e, s, &len); + av_push(av, newSVuv(resultant_cp)); utf8 = newSVpvn((char *) s, len); SvUTF8_on(utf8); @@ -6418,17 +6430,21 @@ test_toTITLE_uvchr(UV ord) RETVAL AV * -test_toTITLE_utf8(SV * p) +test_toTITLE_utf8(SV * p, int type) PREINIT: U8 *input; U8 s[UTF8_MAXBYTES_CASE + 1]; STRLEN len; AV *av; SV *utf8; + const unsigned char * e; + UV resultant_cp; CODE: input = (U8 *) SvPV(p, len); av = newAV(); - av_push(av, newSVuv(toTITLE_utf8(input, s, &len))); + e = input + UTF8SKIP(input) - type; + resultant_cp = toTITLE_utf8_safe(input, e, s, &len); + av_push(av, newSVuv(resultant_cp)); utf8 = newSVpvn((char *) s, len); SvUTF8_on(utf8); diff --git a/ext/XS-APItest/t/handy.t b/ext/XS-APItest/t/handy.t index 81e4c7c75b..f21a39d3bd 100644 --- a/ext/XS-APItest/t/handy.t +++ b/ext/XS-APItest/t/handy.t @@ -534,10 +534,25 @@ foreach my $name (sort keys %to_properties) { my $char = chr($j); utf8::upgrade($char); $char = quotemeta $char if $char eq '\\' || $char eq "'"; + foreach my $utf8_param("_safe", + "_safe, malformed", + ) { - my $display_call = "to${function}_utf8($display_name )"; - $ret = eval "test_to${function}_utf8('$char')"; - if (is ($@, "", "$display_call didn't give error")) { + my $utf8_param_code = $utf8_param_code{$utf8_param}; + my $expect_error = $utf8_param_code > 0; + + # Skip if can't malform (because is a UTF-8 invariant) + next if $expect_error && $i < ((ord "A" == 65) ? 128 : 160); + + my $display_call = "to${function}_utf8($display_name, $utf8_param )"; + $ret = eval "test_to${function}_utf8('$char', $utf8_param_code)"; + if ($expect_error) { + isnt ($@, "", "expected and got error in $display_call"); + like($@, qr/Malformed UTF-8 character/, + "${tab}And got expected message"); + undef @warnings; + } + elsif (is ($@, "", "$display_call didn't give error")) { is ($ret->[0], $first_ord_should_be, sprintf("${tab}And correctly returned 0x%02X", $first_ord_should_be)); @@ -849,8 +849,9 @@ The first code point of the uppercased version is returned (but note, as explained at L<the top of this section|/Character case changing>, that there may be more.) -=for apidoc Am|UV|toUPPER_utf8|U8* p|U8* s|STRLEN* lenp -Converts the UTF-8 encoded character at C<p> to its uppercase version, and +=for apidoc Am|UV|toUPPER_utf8_safe|U8* p|U8* e|U8* s|STRLEN* lenp +Converts the first UTF-8 encoded character in the sequence starting at C<p> and +extending no further than S<C<e - 1>> to its uppercase version, and stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1> bytes since the uppercase version may be longer than the original character. @@ -859,7 +860,17 @@ The first code point of the uppercased version is returned (but note, as explained at L<the top of this section|/Character case changing>, that there may be more). -The input character at C<p> is assumed to be well-formed. +The suffix C<_safe> in the function's name indicates that it will not attempt +to read beyond S<C<e - 1>>, provided that the constraint S<C<s E<lt> e>> is +true (this is asserted for in C<-DDEBUGGING> builds). If the UTF-8 for the +input character is malformed in some way, the program may croak, or the +function may return the REPLACEMENT CHARACTER, at the discretion of the +implementation, and subject to change in future releases. + +=for apidoc Am|UV|toUPPER_utf8|U8* p|U8* s|STRLEN* lenp +This is like C<L</toUPPER_utf8_safe>>, but doesn't have the C<e> +parameter The function therefore can't check if it is reading +beyond the end of the string. =for apidoc Am|U8|toFOLD|U8 ch Converts the specified character to foldcase. If the input is anything but an @@ -878,8 +889,9 @@ The first code point of the foldcased version is returned (but note, as explained at L<the top of this section|/Character case changing>, that there may be more). -=for apidoc Am|UV|toFOLD_utf8|U8* p|U8* s|STRLEN* lenp -Converts the UTF-8 encoded character at C<p> to its foldcase version, and +=for apidoc Am|UV|toFOLD_utf8_safe|U8* p|U8* e|U8* s|STRLEN* lenp +Converts the first UTF-8 encoded character in the sequence starting at C<p> and +extending no further than S<C<e - 1>> to its foldcase version, and stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1> bytes since the foldcase version may be longer than the original character. @@ -888,7 +900,17 @@ The first code point of the foldcased version is returned (but note, as explained at L<the top of this section|/Character case changing>, that there may be more). -The input character at C<p> is assumed to be well-formed. +The suffix C<_safe> in the function's name indicates that it will not attempt +to read beyond S<C<e - 1>>, provided that the constraint S<C<s E<lt> e>> is +true (this is asserted for in C<-DDEBUGGING> builds). If the UTF-8 for the +input character is malformed in some way, the program may croak, or the +function may return the REPLACEMENT CHARACTER, at the discretion of the +implementation, and subject to change in future releases. + +=for apidoc Am|UV|toFOLD_utf8|U8* p|U8* s|STRLEN* lenp +This is like C<L</toFOLD_utf8_safe>>, but doesn't have the C<e> +parameter The function therefore can't check if it is reading +beyond the end of the string. =for apidoc Am|U8|toLOWER|U8 ch Converts the specified character to lowercase. If the input is anything but an @@ -914,8 +936,10 @@ The first code point of the lowercased version is returned (but note, as explained at L<the top of this section|/Character case changing>, that there may be more). -=for apidoc Am|UV|toLOWER_utf8|U8* p|U8* s|STRLEN* lenp -Converts the UTF-8 encoded character at C<p> to its lowercase version, and + +=for apidoc Am|UV|toLOWER_utf8_safe|U8* p|U8* e|U8* s|STRLEN* lenp +Converts the first UTF-8 encoded character in the sequence starting at C<p> and +extending no further than S<C<e - 1>> to its lowercase version, and stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1> bytes since the lowercase version may be longer than the original character. @@ -924,7 +948,17 @@ The first code point of the lowercased version is returned (but note, as explained at L<the top of this section|/Character case changing>, that there may be more). -The input character at C<p> is assumed to be well-formed. +The suffix C<_safe> in the function's name indicates that it will not attempt +to read beyond S<C<e - 1>>, provided that the constraint S<C<s E<lt> e>> is +true (this is asserted for in C<-DDEBUGGING> builds). If the UTF-8 for the +input character is malformed in some way, the program may croak, or the +function may return the REPLACEMENT CHARACTER, at the discretion of the +implementation, and subject to change in future releases. + +=for apidoc Am|UV|toLOWER_utf8|U8* p|U8* s|STRLEN* lenp +This is like C<L</toLOWER_utf8_safe>>, but doesn't have the C<e> +parameter The function therefore can't check if it is reading +beyond the end of the string. =for apidoc Am|U8|toTITLE|U8 ch Converts the specified character to titlecase. If the input is anything but an @@ -944,8 +978,9 @@ The first code point of the titlecased version is returned (but note, as explained at L<the top of this section|/Character case changing>, that there may be more). -=for apidoc Am|UV|toTITLE_utf8|U8* p|U8* s|STRLEN* lenp -Converts the UTF-8 encoded character at C<p> to its titlecase version, and +=for apidoc Am|UV|toTITLE_utf8_safe|U8* p|U8* e|U8* s|STRLEN* lenp +Converts the first UTF-8 encoded character in the sequence starting at C<p> and +extending no further than S<C<e - 1>> to its titlecase version, and stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1> bytes since the titlecase version may be longer than the original character. @@ -954,7 +989,17 @@ The first code point of the titlecased version is returned (but note, as explained at L<the top of this section|/Character case changing>, that there may be more). -The input character at C<p> is assumed to be well-formed. +The suffix C<_safe> in the function's name indicates that it will not attempt +to read beyond S<C<e - 1>>, provided that the constraint S<C<s E<lt> e>> is +true (this is asserted for in C<-DDEBUGGING> builds). If the UTF-8 for the +input character is malformed in some way, the program may croak, or the +function may return the REPLACEMENT CHARACTER, at the discretion of the +implementation, and subject to change in future releases. + +=for apidoc Am|UV|toTITLE_utf8|U8* p|U8* s|STRLEN* lenp +This is like C<L</toLOWER_utf8_safe>>, but doesn't have the C<e> +parameter The function therefore can't check if it is reading +beyond the end of the string. =cut @@ -1881,10 +1926,15 @@ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e)) #define toUPPER_utf8(p,s,l) to_utf8_upper(p,s,l) /* For internal core use only, subject to change */ -#define _toFOLD_utf8_flags(p,s,l,f) _to_utf8_fold_flags (p,s,l,f) -#define _toLOWER_utf8_flags(p,s,l,f) _to_utf8_lower_flags(p,s,l,f) -#define _toTITLE_utf8_flags(p,s,l,f) _to_utf8_title_flags(p,s,l,f) -#define _toUPPER_utf8_flags(p,s,l,f) _to_utf8_upper_flags(p,s,l,f) +#define _toFOLD_utf8_flags(p,s,l,f) _to_utf8_fold_flags (p,NULL,s,l,f) +#define _toLOWER_utf8_flags(p,s,l,f) _to_utf8_lower_flags(p,NULL,s,l,f) +#define _toTITLE_utf8_flags(p,s,l,f) _to_utf8_title_flags(p,NULL,s,l,f) +#define _toUPPER_utf8_flags(p,s,l,f) _to_utf8_upper_flags(p,NULL,s,l,f) + +#define toFOLD_utf8_safe(p,e,s,l) _to_utf8_fold_flags(p,e,s,l, FOLD_FLAGS_FULL) +#define toLOWER_utf8_safe(p,e,s,l) _to_utf8_lower_flags(p,e,s,l, 0) +#define toTITLE_utf8_safe(p,e,s,l) _to_utf8_title_flags(p,e,s,l, 0) +#define toUPPER_utf8_safe(p,e,s,l) _to_utf8_upper_flags(p,e,s,l, 0) /* For internal core Perl use only: the base macros for defining macros like * isALPHA_LC_utf8. These are like _generic_utf8, but if the first code point diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 6d0b5cf1d2..fce4786a2e 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -327,12 +327,13 @@ well. =item * -New versions of macros like C<isALPHA_utf8> have been added, each with the +New versions of macros like C<isALPHA_utf8> and C<toLOWER_utf8> have +been added, each with the suffix C<_safe>, like C<isSPACE_utf8_safe>. These take an extra parameter, giving an upper limit of how far into the string it is safe to read. Using the old versions could cause attempts to read beyond the -end of the input buffer if the UTF-8 is not well-formed, and their use -now raises a deprecation warning. Details are at +end of the input buffer if the UTF-8 is not well-formed, and the use +of the C<isI<FOO>_utf8> ones now raises a deprecation warning. Details are at L<perlapi/Character classification>. =item * @@ -102,16 +102,16 @@ PERL_CALLCONV bool Perl__is_utf8_xidstart(pTHX_ const U8 *p) PERL_CALLCONV UV Perl__to_uni_fold_flags(pTHX_ UV c, U8 *p, STRLEN *lenp, U8 flags); #define PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS \ assert(p); assert(lenp) -PERL_CALLCONV UV Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags); +PERL_CALLCONV UV Perl__to_utf8_fold_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, U8 flags); #define PERL_ARGS_ASSERT__TO_UTF8_FOLD_FLAGS \ assert(p); assert(ustrp) -PERL_CALLCONV UV Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags); +PERL_CALLCONV UV Perl__to_utf8_lower_flags(pTHX_ const U8 *p, const U8* e, U8* ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT__TO_UTF8_LOWER_FLAGS \ assert(p); assert(ustrp) -PERL_CALLCONV UV Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags); +PERL_CALLCONV UV Perl__to_utf8_title_flags(pTHX_ const U8 *p, const U8* e, U8* ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT__TO_UTF8_TITLE_FLAGS \ assert(p); assert(ustrp) -PERL_CALLCONV UV Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags); +PERL_CALLCONV UV Perl__to_utf8_upper_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT__TO_UTF8_UPPER_FLAGS \ assert(p); assert(ustrp) PERL_CALLCONV void Perl__warn_problematic_locale(void); @@ -2718,10 +2718,10 @@ Perl__is_utf8_mark(pTHX_ const U8 *p) /* =for apidoc to_utf8_case -Instead use the appropriate one of L</toUPPER_utf8>, -L</toTITLE_utf8>, -L</toLOWER_utf8>, -or L</toFOLD_utf8>. +Instead use the appropriate one of L</toUPPER_utf8_safe>, +L</toTITLE_utf8_safe>, +L</toLOWER_utf8_safe>, +or L</toFOLD_utf8_safe>. C<p> contains the pointer to the UTF-8 string encoding the character that is being converted. This routine assumes that the character @@ -3019,6 +3019,8 @@ S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* c * going on. */ #define CASE_CHANGE_BODY_START(locale_flags, LC_L1_change_macro, L1_func, \ L1_func_extra_param) \ + if (e == NULL) e = p + UTF8SKIP(p); \ + \ if (flags & (locale_flags)) { \ /* Treat a UTF-8 locale as not being in locale at all */ \ if (IN_UTF8_CTYPE_LOCALE) { \ @@ -3037,7 +3039,7 @@ S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* c return L1_func(*p, ustrp, lenp, L1_func_extra_param); \ } \ } \ - else if UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, p + UTF8SKIP(p)) { \ + else if UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e) { \ if (flags & (locale_flags)) { \ result = LC_L1_change_macro(EIGHT_BIT_UTF8_TO_NATIVE(*p, \ *(p+1))); \ @@ -3049,7 +3051,6 @@ S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* c } \ else { /* malformed UTF-8 or ord above 255 */ \ STRLEN len_result; \ - const U8 * e = p + UTF8SKIP(p); /* Have to assume len is valid */ \ result = utf8n_to_uvchr(p, e - p, &len_result, UTF8_CHECK_ONLY); \ if (len_result == (STRLEN) -1) { \ _force_out_malformed_utf8_message(p, e, \ @@ -3082,7 +3083,7 @@ S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* c /* =for apidoc to_utf8_upper -Instead use L</toUPPER_utf8>. +Instead use L</toUPPER_utf8_safe>. =cut */ @@ -3091,7 +3092,7 @@ Instead use L</toUPPER_utf8>. * be used. */ UV -Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags) +Perl__to_utf8_upper_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, bool flags) { UV result; @@ -3106,7 +3107,7 @@ Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags /* =for apidoc to_utf8_title -Instead use L</toTITLE_utf8>. +Instead use L</toTITLE_utf8_safe>. =cut */ @@ -3117,7 +3118,7 @@ Instead use L</toTITLE_utf8>. */ UV -Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags) +Perl__to_utf8_title_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, bool flags) { UV result; @@ -3131,7 +3132,7 @@ Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags /* =for apidoc to_utf8_lower -Instead use L</toLOWER_utf8>. +Instead use L</toLOWER_utf8_safe>. =cut */ @@ -3141,7 +3142,7 @@ Instead use L</toLOWER_utf8>. */ UV -Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags) +Perl__to_utf8_lower_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, bool flags) { UV result; @@ -3154,7 +3155,7 @@ Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags /* =for apidoc to_utf8_fold -Instead use L</toFOLD_utf8>. +Instead use L</toFOLD_utf8_safe>. =cut */ @@ -3169,7 +3170,7 @@ Instead use L</toFOLD_utf8>. */ UV -Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags) +Perl__to_utf8_fold_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, U8 flags) { UV result; @@ -76,10 +76,15 @@ the string is invariant. utf8n_to_uvchr_error(s, len, lenp, flags, 0) #define to_uni_fold(c, p, lenp) _to_uni_fold_flags(c, p, lenp, FOLD_FLAGS_FULL) -#define to_utf8_fold(c, p, lenp) _to_utf8_fold_flags(c, p, lenp, FOLD_FLAGS_FULL) -#define to_utf8_lower(a,b,c) _to_utf8_lower_flags(a,b,c,0) -#define to_utf8_upper(a,b,c) _to_utf8_upper_flags(a,b,c,0) -#define to_utf8_title(a,b,c) _to_utf8_title_flags(a,b,c,0) + +#define to_utf8_fold(s, r, lenr) \ + _to_utf8_fold_flags (s, NULL, r, lenr, FOLD_FLAGS_FULL) +#define to_utf8_lower(s, r, lenr) \ + _to_utf8_lower_flags(s, NULL, r ,lenr, 0) +#define to_utf8_upper(s, r, lenr) \ + _to_utf8_upper_flags(s, NULL, r, lenr, 0) +#define to_utf8_title(s, r, lenr) \ + _to_utf8_title_flags(s, NULL, r, lenr ,0) #define foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \ foldEQ_utf8_flags(s1, pe1, l1, u1, s2, pe2, l2, u2, 0) |