diff options
-rw-r--r-- | embed.fnc | 1 | ||||
-rw-r--r-- | ext/POSIX/POSIX.xs | 23 | ||||
-rw-r--r-- | ext/POSIX/lib/POSIX.pm | 2 | ||||
-rw-r--r-- | ext/POSIX/lib/POSIX.pod | 11 | ||||
-rw-r--r-- | locale.c | 45 | ||||
-rw-r--r-- | pod/perldelta.pod | 12 | ||||
-rw-r--r-- | proto.h | 3 |
7 files changed, 74 insertions, 23 deletions
@@ -1382,6 +1382,7 @@ Cp |I32 * |markstack_grow #if defined(USE_LOCALE_COLLATE) p |int |magic_setcollxfrm|NN SV* sv|NN MAGIC* mg p |int |magic_freecollxfrm|NN SV* sv|NN MAGIC* mg +EXop |SV * |strxfrm |NN SV * src : Defined in locale.c, used only in sv.c # if defined(PERL_IN_LOCALE_C) || defined(PERL_IN_SV_C) || defined(PERL_IN_MATHOMS_C) Ep |char* |mem_collxfrm_ |NN const char* input_string \ diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index cd8447e825..ec409c06fd 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -3437,24 +3437,11 @@ void strxfrm(src) SV * src CODE: - { - STRLEN srclen; - STRLEN dstlen; - STRLEN buflen; - char *p = SvPV(src,srclen); - srclen++; - buflen = srclen * 4 + 1; - ST(0) = sv_2mortal(newSV(buflen)); - dstlen = strxfrm(SvPVX(ST(0)), p, (size_t)buflen); - if (dstlen >= buflen) { - dstlen++; - SvGROW(ST(0), dstlen); - strxfrm(SvPVX(ST(0)), p, (size_t)dstlen); - dstlen--; - } - SvCUR_set(ST(0), dstlen); - SvPOK_only(ST(0)); - } +#ifdef USE_LOCALE_COLLATE + ST(0) = Perl_strxfrm(aTHX_ src); +#else + ST(0) = src; +#endif SysRet mkfifo(filename, mode) diff --git a/ext/POSIX/lib/POSIX.pm b/ext/POSIX/lib/POSIX.pm index aabb03cd2e..c99bb35f11 100644 --- a/ext/POSIX/lib/POSIX.pm +++ b/ext/POSIX/lib/POSIX.pm @@ -4,7 +4,7 @@ use warnings; our ($AUTOLOAD, %SIGRT); -our $VERSION = '2.08'; +our $VERSION = '2.09'; require XSLoader; diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod index d14f53247e..6ef6dc66d5 100644 --- a/ext/POSIX/lib/POSIX.pod +++ b/ext/POSIX/lib/POSIX.pod @@ -1972,9 +1972,14 @@ Used with C<eq> or C<cmp> as an alternative to C<L</strcoll>>. Not really needed since Perl can do this transparently, see L<perllocale>. -Beware that in a UTF-8 locale, anything you pass to this function must -be in UTF-8; and when not in a UTF-8 locale, anything passed must not be -UTF-8 encoded. +Unlike the libc C<strxfrm>, this allows NUL characters in the input +C<$src>. + +It doesn't make sense for a string to be encoded in one locale (say, +ISO-8859-6, Arabic) and to collate it based on another (like ISO-8859-7, +Greek). Perl assumes that the current C<LC_CTYPE> locale correctly +represents the encoding of C<$src>, and ignores the value of +C<LC_COLLATE>. =item C<sysconf> @@ -5223,7 +5223,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn) STATIC void S_compute_collxfrm_coefficients(pTHX) { - PL_in_utf8_COLLATE_locale = (PL_collation_standard) ? 0 : is_locale_utf8(PL_collation_name); @@ -5982,6 +5981,50 @@ S_print_collxfrm_input_and_return(pTHX_ } # endif /* DEBUGGING */ + +SV * +Perl_strxfrm(pTHX_ SV * src) +{ + PERL_ARGS_ASSERT_STRXFRM; + + /* For use by POSIX::strxfrm(). The PV in an SV is controlled by LC_CTYPE, + * not LC_COLLATE. If the locales for the two categories differ, LC_CTYPE + * should win out. + * + * If we can't calculate a collation, 'src' is instead returned, so that + * future comparisons will be by code point order */ + +# ifdef USE_LOCALE_CTYPE + + const char * orig_ctype = toggle_locale_c(LC_CTYPE, + querylocale_c(LC_COLLATE)); +# endif + + SV * dst = src; + STRLEN dstlen; + STRLEN srclen; + const char *p = SvPV_const(src,srclen); + const U32 utf8_flag = SvUTF8(src); + char *d = mem_collxfrm_(p, srclen, &dstlen, cBOOL(utf8_flag)); + + assert(utf8_flag == 0 || utf8_flag == SVf_UTF8); + + if (d != NULL) { + assert(dstlen > 0); + dst =newSVpvn_flags(d + COLLXFRM_HDR_LEN, + dstlen, SVs_TEMP|utf8_flag); + Safefree(d); + } + +# ifdef USE_LOCALE_CTYPE + + restore_toggled_locale_c(LC_CTYPE, orig_ctype); + +# endif + + return dst; +} + #endif /* USE_LOCALE_COLLATE */ #if defined(DEBUGGING) || defined(USE_POSIX_2008_LOCALE) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index b8493d670c..30d11b0fe5 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -381,6 +381,18 @@ Correctly handle C<OP_ANONCODE> ops generated by CPAN modules that don't include the OPf_REF flag when propagating lvalue context. [github #20532] +L<POSIX::strxfrm|POSIX/strxfrm> now uses the C<LC_CTYPE> locale category +to specify its collation, ignoring any differing C<LC_COLLATE>. It +doesn't make sense for a string to be encoded in one locale (say, +ISO-8859-6, Arabic) and to collate it based on another (like ISO-8859-7, +Greek). Perl assumes that the current C<LC_CTYPE> locale correctly +represents the encoding, and collates accordingly. + +Also, embedded C<NUL> characters are now allowed in the input. + +If locale collation is not enabled on the platform (C<LC_COLLATE>), the +input is returned unchanged. + =back =head1 Known Problems @@ -7624,6 +7624,9 @@ PERL_CALLCONV int Perl_magic_setcollxfrm(pTHX_ SV* sv, MAGIC* mg) #define PERL_ARGS_ASSERT_MAGIC_SETCOLLXFRM \ assert(sv); assert(mg) +PERL_CALLCONV SV * Perl_strxfrm(pTHX_ SV * src); +#define PERL_ARGS_ASSERT_STRXFRM \ + assert(src) #ifndef NO_MATHOMS PERL_CALLCONV char* Perl_sv_collxfrm(pTHX_ SV *const sv, STRLEN *const nxp); #define PERL_ARGS_ASSERT_SV_COLLXFRM \ |