summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc1
-rw-r--r--ext/POSIX/POSIX.xs23
-rw-r--r--ext/POSIX/lib/POSIX.pm2
-rw-r--r--ext/POSIX/lib/POSIX.pod11
-rw-r--r--locale.c45
-rw-r--r--pod/perldelta.pod12
-rw-r--r--proto.h3
7 files changed, 74 insertions, 23 deletions
diff --git a/embed.fnc b/embed.fnc
index 70656e6dfe..81e3952cad 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1382,6 +1382,7 @@ Cp |I32 * |markstack_grow
#if defined(USE_LOCALE_COLLATE)
p |int |magic_setcollxfrm|NN SV* sv|NN MAGIC* mg
p |int |magic_freecollxfrm|NN SV* sv|NN MAGIC* mg
+EXop |SV * |strxfrm |NN SV * src
: Defined in locale.c, used only in sv.c
# if defined(PERL_IN_LOCALE_C) || defined(PERL_IN_SV_C) || defined(PERL_IN_MATHOMS_C)
Ep |char* |mem_collxfrm_ |NN const char* input_string \
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index cd8447e825..ec409c06fd 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -3437,24 +3437,11 @@ void
strxfrm(src)
SV * src
CODE:
- {
- STRLEN srclen;
- STRLEN dstlen;
- STRLEN buflen;
- char *p = SvPV(src,srclen);
- srclen++;
- buflen = srclen * 4 + 1;
- ST(0) = sv_2mortal(newSV(buflen));
- dstlen = strxfrm(SvPVX(ST(0)), p, (size_t)buflen);
- if (dstlen >= buflen) {
- dstlen++;
- SvGROW(ST(0), dstlen);
- strxfrm(SvPVX(ST(0)), p, (size_t)dstlen);
- dstlen--;
- }
- SvCUR_set(ST(0), dstlen);
- SvPOK_only(ST(0));
- }
+#ifdef USE_LOCALE_COLLATE
+ ST(0) = Perl_strxfrm(aTHX_ src);
+#else
+ ST(0) = src;
+#endif
SysRet
mkfifo(filename, mode)
diff --git a/ext/POSIX/lib/POSIX.pm b/ext/POSIX/lib/POSIX.pm
index aabb03cd2e..c99bb35f11 100644
--- a/ext/POSIX/lib/POSIX.pm
+++ b/ext/POSIX/lib/POSIX.pm
@@ -4,7 +4,7 @@ use warnings;
our ($AUTOLOAD, %SIGRT);
-our $VERSION = '2.08';
+our $VERSION = '2.09';
require XSLoader;
diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod
index d14f53247e..6ef6dc66d5 100644
--- a/ext/POSIX/lib/POSIX.pod
+++ b/ext/POSIX/lib/POSIX.pod
@@ -1972,9 +1972,14 @@ Used with C<eq> or C<cmp> as an alternative to C<L</strcoll>>.
Not really needed since Perl can do this transparently, see
L<perllocale>.
-Beware that in a UTF-8 locale, anything you pass to this function must
-be in UTF-8; and when not in a UTF-8 locale, anything passed must not be
-UTF-8 encoded.
+Unlike the libc C<strxfrm>, this allows NUL characters in the input
+C<$src>.
+
+It doesn't make sense for a string to be encoded in one locale (say,
+ISO-8859-6, Arabic) and to collate it based on another (like ISO-8859-7,
+Greek). Perl assumes that the current C<LC_CTYPE> locale correctly
+represents the encoding of C<$src>, and ignores the value of
+C<LC_COLLATE>.
=item C<sysconf>
diff --git a/locale.c b/locale.c
index 3735726ac6..b68f8ec6ed 100644
--- a/locale.c
+++ b/locale.c
@@ -5223,7 +5223,6 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
STATIC void
S_compute_collxfrm_coefficients(pTHX)
{
-
PL_in_utf8_COLLATE_locale = (PL_collation_standard)
? 0
: is_locale_utf8(PL_collation_name);
@@ -5982,6 +5981,50 @@ S_print_collxfrm_input_and_return(pTHX_
}
# endif /* DEBUGGING */
+
+SV *
+Perl_strxfrm(pTHX_ SV * src)
+{
+ PERL_ARGS_ASSERT_STRXFRM;
+
+ /* For use by POSIX::strxfrm(). The PV in an SV is controlled by LC_CTYPE,
+ * not LC_COLLATE. If the locales for the two categories differ, LC_CTYPE
+ * should win out.
+ *
+ * If we can't calculate a collation, 'src' is instead returned, so that
+ * future comparisons will be by code point order */
+
+# ifdef USE_LOCALE_CTYPE
+
+ const char * orig_ctype = toggle_locale_c(LC_CTYPE,
+ querylocale_c(LC_COLLATE));
+# endif
+
+ SV * dst = src;
+ STRLEN dstlen;
+ STRLEN srclen;
+ const char *p = SvPV_const(src,srclen);
+ const U32 utf8_flag = SvUTF8(src);
+ char *d = mem_collxfrm_(p, srclen, &dstlen, cBOOL(utf8_flag));
+
+ assert(utf8_flag == 0 || utf8_flag == SVf_UTF8);
+
+ if (d != NULL) {
+ assert(dstlen > 0);
+ dst =newSVpvn_flags(d + COLLXFRM_HDR_LEN,
+ dstlen, SVs_TEMP|utf8_flag);
+ Safefree(d);
+ }
+
+# ifdef USE_LOCALE_CTYPE
+
+ restore_toggled_locale_c(LC_CTYPE, orig_ctype);
+
+# endif
+
+ return dst;
+}
+
#endif /* USE_LOCALE_COLLATE */
#if defined(DEBUGGING) || defined(USE_POSIX_2008_LOCALE)
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index b8493d670c..30d11b0fe5 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -381,6 +381,18 @@ Correctly handle C<OP_ANONCODE> ops generated by CPAN modules that
don't include the OPf_REF flag when propagating lvalue context.
[github #20532]
+L<POSIX::strxfrm|POSIX/strxfrm> now uses the C<LC_CTYPE> locale category
+to specify its collation, ignoring any differing C<LC_COLLATE>. It
+doesn't make sense for a string to be encoded in one locale (say,
+ISO-8859-6, Arabic) and to collate it based on another (like ISO-8859-7,
+Greek). Perl assumes that the current C<LC_CTYPE> locale correctly
+represents the encoding, and collates accordingly.
+
+Also, embedded C<NUL> characters are now allowed in the input.
+
+If locale collation is not enabled on the platform (C<LC_COLLATE>), the
+input is returned unchanged.
+
=back
=head1 Known Problems
diff --git a/proto.h b/proto.h
index 11fe86c481..50464710d5 100644
--- a/proto.h
+++ b/proto.h
@@ -7624,6 +7624,9 @@ PERL_CALLCONV int Perl_magic_setcollxfrm(pTHX_ SV* sv, MAGIC* mg)
#define PERL_ARGS_ASSERT_MAGIC_SETCOLLXFRM \
assert(sv); assert(mg)
+PERL_CALLCONV SV * Perl_strxfrm(pTHX_ SV * src);
+#define PERL_ARGS_ASSERT_STRXFRM \
+ assert(src)
#ifndef NO_MATHOMS
PERL_CALLCONV char* Perl_sv_collxfrm(pTHX_ SV *const sv, STRLEN *const nxp);
#define PERL_ARGS_ASSERT_SV_COLLXFRM \