summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/utf8.pm4
-rw-r--r--lib/utf8.t3
-rw-r--r--pod/perldelta.pod6
-rw-r--r--universal.c16
4 files changed, 25 insertions, 4 deletions
diff --git a/lib/utf8.pm b/lib/utf8.pm
index 823193b8c1..c1f1680336 100644
--- a/lib/utf8.pm
+++ b/lib/utf8.pm
@@ -5,7 +5,7 @@ use warnings;
our $hint_bits = 0x00800000;
-our $VERSION = '1.24';
+our $VERSION = '1.25';
our $AUTOLOAD;
sub import {
@@ -114,6 +114,8 @@ sequence in the native encoding (Latin-1 or EBCDIC) to UTF-8. The
logical character sequence itself is unchanged. If I<$string> is already
upgraded, then this is a no-op. Returns the
number of octets necessary to represent the string as UTF-8.
+Since Perl v5.38, if C<$string> is C<undef> no action is taken; prior to that,
+it would be converted to be defined and zero-length.
If your code needs to be compatible with versions of perl without
C<use feature 'unicode_strings';>, you can force Unicode semantics on
diff --git a/lib/utf8.t b/lib/utf8.t
index d35110baee..fa69fcbb88 100644
--- a/lib/utf8.t
+++ b/lib/utf8.t
@@ -669,6 +669,9 @@ for(__PACKAGE__) {
eval { utf8::upgrade($_) };
is $@, "", 'no error with utf8::upgrade on read-only COW';
}
+
+is(utf8::upgrade(undef), undef, "Returns undef for undef input"); # GH #20419
+
# This one croaks, but not because the scalar is read-only
eval "package \x{100};\n" . <<'END'
for(__PACKAGE__) {
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index b6f50ca471..35730c6bd4 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -45,6 +45,12 @@ XXX For a release on a stable branch, this section aspires to be:
[ List each incompatible change as a =head2 entry ]
+=head2 L<C<utf8::upgrade()>|utf8/Utility functions>
+
+Starting in this release, if the input string is C<undef>, it remains
+C<undef>. Previously it would be changed into a defined, zero-length
+string.
+
=head1 Deprecations
XXX Any deprecated features, syntax, modules etc. should be listed here.
diff --git a/universal.c b/universal.c
index 119117e818..20a36fae85 100644
--- a/universal.c
+++ b/universal.c
@@ -593,11 +593,21 @@ XS(XS_utf8_upgrade)
croak_xs_usage(cv, "sv");
else {
SV * const sv = ST(0);
- STRLEN RETVAL;
+ STRLEN RETVAL = 0;
dXSTARG;
- RETVAL = sv_utf8_upgrade(sv);
- XSprePUSH; PUSHi((IV)RETVAL);
+ XSprePUSH;
+ if (UNLIKELY(! sv)) {
+ XSRETURN_UNDEF;
+ }
+
+ SvGETMAGIC(sv);
+ if (UNLIKELY(! SvOK(sv))) {
+ XSRETURN_UNDEF;
+ }
+
+ RETVAL = sv_utf8_upgrade_nomg(sv);
+ PUSHi( (IV) RETVAL);
}
XSRETURN(1);
}