Make sprintf %c and chr() on inf/nan return the U+FFFD.

%c was made to produce "Inf"/"NaN" earlier, but let's keep with the Unicode way, and make chr() agree with %c.
author: Jarkko Hietaniemi <jhi@iki.fi> 2014-08-27 07:45:00 -0400
committer: Jarkko Hietaniemi <jhi@iki.fi> 2014-08-27 18:21:40 -0400
commit: 1cd88304d705aae8d2b32c6e925fedd52980a122 (patch)
tree: c763a9bd4606e0f77650aaa19d17f16f86b4609f
parent: 88cb850087cc0ad53c82068a153d89273c31675e (diff)
download: perl-1cd88304d705aae8d2b32c6e925fedd52980a122.tar.gz
8 files changed, 85 insertions, 40 deletions
diff --git a/embed.fnc b/embed.fnc
index 0689d25439..0bb6522522 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2663,6 +2663,8 @@ Apnod	|Size_t	|my_strlcat	|NULLOK char *dst|NULLOK const char *src|Size_t size
 Apnod     |Size_t |my_strlcpy     |NULLOK char *dst|NULLOK const char *src|Size_t size
 #endif
 
+Apdn	|bool	|Perl_isinfnan	|NV nv
+
 #if !defined(HAS_SIGNBIT)
 AMdnoP	|int	|Perl_signbit	|NV f
 #endif
diff --git a/embed.h b/embed.h
index 3b398531a4..1e5698c44d 100644
--- a/embed.h
+++ b/embed.h
@@ -27,6 +27,7 @@
 /* Hide global symbols */
 
 #define Gv_AMupdate(a,b)	Perl_Gv_AMupdate(aTHX_ a,b)
+#define Perl_isinfnan		Perl_Perl_isinfnan
 #define _is_in_locale_category(a,b)	Perl__is_in_locale_category(aTHX_ a,b)
 #define _is_uni_FOO(a,b)	Perl__is_uni_FOO(aTHX_ a,b)
 #define _is_uni_perl_idcont(a)	Perl__is_uni_perl_idcont(aTHX_ a)
diff --git a/numeric.c b/numeric.c
index 6fc9279ea7..4b066b2760 100644
--- a/numeric.c
+++ b/numeric.c
@@ -1324,6 +1324,22 @@ Perl_my_atof2(pTHX_ const char* orig, NV* value)
     return (char *)s;
 }
 
+/* Perl_isinfnan() is utility function that returns true if the NV
+ * argument is either an infinity or a NaN, false otherwise. */
+bool
+Perl_isinfnan(NV nv)
+{
+#ifdef Perl_isinf
+    if (Perl_isinf(nv))
+        return TRUE;
+#endif
+#ifdef Perl_isnan
+    if (Perl_isnan(nv))
+        return TRUE;
+#endif
+    return FALSE;
+}
+
 #if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
 long double
 Perl_my_modfl(long double x, long double *ip)
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index ffd8b16fa2..f3adc8278b 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2591,9 +2591,15 @@ a module that is a MRO plugin.  See L<mro> and L<perlmroapi>.
 =item Invalid negative number (%s) in chr
 
 (W utf8) You passed a negative number to C<chr>.  Negative numbers are
-not valid characters numbers, so it return the Unicode replacement
+not valid character numbers, so it return the Unicode replacement
 character (U+FFFD).
 
+=item Invalid number (%f) in chr
+
+(W utf8) You passed an invalid number (like an infinity or
+not-a-number) to C<chr>.  Those are not valid character numbers,
+so it return the Unicode replacement character (U+FFFD).
+
 =item invalid option -D%c, use -D'' to see choices
 
 (S debugging) Perl was called with invalid debugger flags.  Call perl
diff --git a/pp.c b/pp.c
index 67bf36bc32..9b8dd9058c 100644
--- a/pp.c
+++ b/pp.c
@@ -3356,23 +3356,32 @@ PP(pp_chr)
     SV *top = POPs;
 
     SvGETMAGIC(top);
-    if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
-     && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
-	 ||
-	 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
-	  && SvNV_nomg(top) < 0.0))) {
+    if (SvNOK(top) && Perl_isinfnan(SvNV(top))) {
+        if (ckWARN(WARN_UTF8)) {
+            Perl_warner(aTHX_ packWARN(WARN_UTF8),
+                        "Invalid number (%"NVgf") in chr", SvNV(top));
+        }
+        value = UNICODE_REPLACEMENT;
+    }
+    else {
+        if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
+            && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
+                ||
+                ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
+                 && SvNV_nomg(top) < 0.0))) {
 	    if (ckWARN(WARN_UTF8)) {
 		if (SvGMAGICAL(top)) {
 		    SV *top2 = sv_newmortal();
 		    sv_setsv_nomg(top2, top);
 		    top = top2;
 		}
-		Perl_warner(aTHX_ packWARN(WARN_UTF8),
-			   "Invalid negative number (%"SVf") in chr", SVfARG(top));
-	    }
-	    value = UNICODE_REPLACEMENT;
-    } else {
-	value = SvUV_nomg(top);
+                Perl_warner(aTHX_ packWARN(WARN_UTF8),
+                            "Invalid negative number (%"SVf") in chr", SVfARG(top));
+            }
+            value = UNICODE_REPLACEMENT;
+        } else {
+            value = SvUV_nomg(top);
+        }
     }
 
     SvUPGRADE(TARG,SVt_PV);
diff --git a/proto.h b/proto.h
index 347ce7ea25..cb1d14161e 100644
--- a/proto.h
+++ b/proto.h
@@ -33,6 +33,7 @@ PERL_CALLCONV UV	NATIVE_TO_NEED(const UV enc, const UV ch)
 			__attribute__pure__;
 
 PERL_CALLCONV const char *	Perl_PerlIO_context_layers(pTHX_ const char *mode);
+PERL_CALLCONV bool	Perl_Perl_isinfnan(NV nv);
 PERL_CALLCONV void*	Perl_Slab_Alloc(pTHX_ size_t sz)
 			__attribute__malloc__
 			__attribute__warn_unused_result__;
diff --git a/sv.c b/sv.c
index 9982717480..5a77d6bd43 100644
--- a/sv.c
+++ b/sv.c
@@ -11002,6 +11002,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 	I32 epix = 0; /* explicit precision index */
 	I32 evix = 0; /* explicit vector index */
 	bool asterisk = FALSE;
+        bool infnan = FALSE;
 
 	/* echo everything up to the next format specification */
 	for (q = p; q < patend && *q != '%'; ++q) ;
@@ -11349,21 +11350,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 
         if (argsv && SvNOK(argsv)) {
             /* XXX va_arg(*args) case? */
-            NV nv = SvNV(argsv);
-            char g = 0;
-#ifdef Perl_isinf
-            if (Perl_isinf(nv))
-                g = 'g';
-#endif
-#ifdef Perl_isnan
-            if (Perl_isnan(nv))
-                g = 'g';
-#endif
-            if (g) {
-                c = g;
-                q++;
-                goto floating_point;
-            }
+            infnan = Perl_isinfnan(SvNV(argsv));
         }
 
 	switch (c = *q++) {
@@ -11373,7 +11360,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 	case 'c':
 	    if (vectorize)
 		goto unknown;
-	    uv = (args) ? va_arg(*args, int) : SvIV(argsv);
+	    uv = (args) ? va_arg(*args, int) :
+                infnan ? UNICODE_REPLACEMENT : SvIV(argsv);
 	    if ((uv > 255 ||
 		 (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
 		&& !IN_BYTES) {
@@ -11429,6 +11417,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 	    /* INTEGERS */
 
 	case 'p':
+            if (infnan) {
+                c = 'g';
+                goto floating_point;
+            }
 	    if (alt || vectorize)
 		goto unknown;
 	    uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
@@ -11443,14 +11435,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 #endif
 	    /* FALLTHROUGH */
 	case 'd':
-            /* XXX printf Inf/NaN for %[ducp], now produces quite
-             * surprising results: 1, 0, 18446744073709551615,
-             * 9223372036854775808, -9223372036854775807, bogus
-             * Unicode code points, random heap addresses in hex.
-             *
-             * For the argsv() doable (Perl_isinf, Perl_isnan), but
-             * how to do that for the va_arg(*args, ...)? */
 	case 'i':
+            if (infnan) {
+                c = 'g';
+                goto floating_point;
+            }
 	    if (vectorize) {
 		STRLEN ulen;
 		if (!veclen)
@@ -11552,6 +11541,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 	    base = 16;
 
 	uns_integer:
+            if (infnan) {
+                c = 'g';
+                goto floating_point;
+            }
 	    if (vectorize) {
 		STRLEN ulen;
 	vector:
diff --git a/t/op/infnan.t b/t/op/infnan.t
index 5ef8f24e1b..50dbeda0df 100644
--- a/t/op/infnan.t
+++ b/t/op/infnan.t
@@ -22,10 +22,10 @@ my @NaN = ("NAN", "nan", "qnan", "SNAN", "NanQ", "NANS",
            "NaN123", "NAN(123)", "nan%",
            "nanonano"); # RIP, Robin Williams.
 
-my @fmt = qw(e f g a d x c p);
+my @num_fmt = qw(e f g a d u o b x p);
 
-my $inf_tests = 11 + @fmt + 3 * @PInf + 3 * @NInf + 5 + 3;
-my $nan_tests =  7 + @fmt + 2 * @NaN + 3;
+my $inf_tests = 11 + @num_fmt + 4 + 3 * @PInf + 3 * @NInf + 5 + 3;
+my $nan_tests =  7 + @num_fmt + 2 + 2 * @NaN + 3;
 
 my $infnan_tests = 4;
 
@@ -57,10 +57,20 @@ SKIP: {
   is(sprintf("%g", $PInf), "Inf", "$PInf sprintf %g is Inf");
   is(sprintf("%a", $PInf), "Inf", "$PInf sprintf %a is Inf");
 
-  for my $f (@fmt) {
+  for my $f (@num_fmt) {
       is(sprintf("%$f", $PInf), "Inf", "$PInf sprintf %$f is Inf");
   }
 
+  {
+      local $^W = 0;
+
+      is(sprintf("%c", $PInf), chr(0xFFFD), "$PInf sprintf %c is Inf");
+      is(chr($PInf), chr(0xFFFD), "$PInf chr() is U+FFFD");
+
+      is(sprintf("%c", $NInf), chr(0xFFFD), "$NInf sprintf %c is Inf");
+      is(chr($NInf), chr(0xFFFD), "$NInf chr() is U+FFFD");
+  }
+
   for my $i (@PInf) {
     cmp_ok($i + 0 , '==', $PInf, "$i is +Inf");
     cmp_ok($i, '>', 0, "$i is positive");
@@ -108,10 +118,17 @@ SKIP: {
   is($NaN * 2, $NaN, "twice NaN is NaN");
   is($NaN / 2, $NaN, "half of NaN is NaN");
 
-  for my $f (@fmt) {
+  for my $f (@num_fmt) {
       is(sprintf("%$f", $NaN), "NaN", "$NaN sprintf %$f is NaN");
   }
 
+  {
+      local $^W = 0;
+
+      is(sprintf("%c", $NaN), chr(0xFFFD), "$NaN sprintf %c is Inf");
+      is(chr($NaN), chr(0xFFFD), "$NaN chr() is U+FFFD");
+  }
+
   for my $i (@NaN) {
     cmp_ok($i + 0, '!=', $i + 0, "$i is NaN numerically (by not being NaN)");
     is("@{[$i+0]}", "NaN", "$i value stringifies as NaN");
author	Jarkko Hietaniemi <jhi@iki.fi>	2014-08-27 07:45:00 -0400
committer	Jarkko Hietaniemi <jhi@iki.fi>	2014-08-27 18:21:40 -0400
commit	1cd88304d705aae8d2b32c6e925fedd52980a122 (patch)
tree	c763a9bd4606e0f77650aaa19d17f16f86b4609f
parent	88cb850087cc0ad53c82068a153d89273c31675e (diff)
download	perl-1cd88304d705aae8d2b32c6e925fedd52980a122.tar.gz