diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2014-08-27 07:45:00 -0400 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2014-08-27 18:21:40 -0400 |
commit | 1cd88304d705aae8d2b32c6e925fedd52980a122 (patch) | |
tree | c763a9bd4606e0f77650aaa19d17f16f86b4609f | |
parent | 88cb850087cc0ad53c82068a153d89273c31675e (diff) | |
download | perl-1cd88304d705aae8d2b32c6e925fedd52980a122.tar.gz |
Make sprintf %c and chr() on inf/nan return the U+FFFD.
%c was made to produce "Inf"/"NaN" earlier, but let's
keep with the Unicode way, and make chr() agree with %c.
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | embed.h | 1 | ||||
-rw-r--r-- | numeric.c | 16 | ||||
-rw-r--r-- | pod/perldiag.pod | 8 | ||||
-rw-r--r-- | pp.c | 31 | ||||
-rw-r--r-- | proto.h | 1 | ||||
-rw-r--r-- | sv.c | 39 | ||||
-rw-r--r-- | t/op/infnan.t | 27 |
8 files changed, 85 insertions, 40 deletions
@@ -2663,6 +2663,8 @@ Apnod |Size_t |my_strlcat |NULLOK char *dst|NULLOK const char *src|Size_t size Apnod |Size_t |my_strlcpy |NULLOK char *dst|NULLOK const char *src|Size_t size #endif +Apdn |bool |Perl_isinfnan |NV nv + #if !defined(HAS_SIGNBIT) AMdnoP |int |Perl_signbit |NV f #endif @@ -27,6 +27,7 @@ /* Hide global symbols */ #define Gv_AMupdate(a,b) Perl_Gv_AMupdate(aTHX_ a,b) +#define Perl_isinfnan Perl_Perl_isinfnan #define _is_in_locale_category(a,b) Perl__is_in_locale_category(aTHX_ a,b) #define _is_uni_FOO(a,b) Perl__is_uni_FOO(aTHX_ a,b) #define _is_uni_perl_idcont(a) Perl__is_uni_perl_idcont(aTHX_ a) @@ -1324,6 +1324,22 @@ Perl_my_atof2(pTHX_ const char* orig, NV* value) return (char *)s; } +/* Perl_isinfnan() is utility function that returns true if the NV + * argument is either an infinity or a NaN, false otherwise. */ +bool +Perl_isinfnan(NV nv) +{ +#ifdef Perl_isinf + if (Perl_isinf(nv)) + return TRUE; +#endif +#ifdef Perl_isnan + if (Perl_isnan(nv)) + return TRUE; +#endif + return FALSE; +} + #if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL) long double Perl_my_modfl(long double x, long double *ip) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index ffd8b16fa2..f3adc8278b 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -2591,9 +2591,15 @@ a module that is a MRO plugin. See L<mro> and L<perlmroapi>. =item Invalid negative number (%s) in chr (W utf8) You passed a negative number to C<chr>. Negative numbers are -not valid characters numbers, so it return the Unicode replacement +not valid character numbers, so it return the Unicode replacement character (U+FFFD). +=item Invalid number (%f) in chr + +(W utf8) You passed an invalid number (like an infinity or +not-a-number) to C<chr>. Those are not valid character numbers, +so it return the Unicode replacement character (U+FFFD). + =item invalid option -D%c, use -D'' to see choices (S debugging) Perl was called with invalid debugger flags. Call perl @@ -3356,23 +3356,32 @@ PP(pp_chr) SV *top = POPs; SvGETMAGIC(top); - if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */ - && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0) - || - ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top))) - && SvNV_nomg(top) < 0.0))) { + if (SvNOK(top) && Perl_isinfnan(SvNV(top))) { + if (ckWARN(WARN_UTF8)) { + Perl_warner(aTHX_ packWARN(WARN_UTF8), + "Invalid number (%"NVgf") in chr", SvNV(top)); + } + value = UNICODE_REPLACEMENT; + } + else { + if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */ + && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0) + || + ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top))) + && SvNV_nomg(top) < 0.0))) { if (ckWARN(WARN_UTF8)) { if (SvGMAGICAL(top)) { SV *top2 = sv_newmortal(); sv_setsv_nomg(top2, top); top = top2; } - Perl_warner(aTHX_ packWARN(WARN_UTF8), - "Invalid negative number (%"SVf") in chr", SVfARG(top)); - } - value = UNICODE_REPLACEMENT; - } else { - value = SvUV_nomg(top); + Perl_warner(aTHX_ packWARN(WARN_UTF8), + "Invalid negative number (%"SVf") in chr", SVfARG(top)); + } + value = UNICODE_REPLACEMENT; + } else { + value = SvUV_nomg(top); + } } SvUPGRADE(TARG,SVt_PV); @@ -33,6 +33,7 @@ PERL_CALLCONV UV NATIVE_TO_NEED(const UV enc, const UV ch) __attribute__pure__; PERL_CALLCONV const char * Perl_PerlIO_context_layers(pTHX_ const char *mode); +PERL_CALLCONV bool Perl_Perl_isinfnan(NV nv); PERL_CALLCONV void* Perl_Slab_Alloc(pTHX_ size_t sz) __attribute__malloc__ __attribute__warn_unused_result__; @@ -11002,6 +11002,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p I32 epix = 0; /* explicit precision index */ I32 evix = 0; /* explicit vector index */ bool asterisk = FALSE; + bool infnan = FALSE; /* echo everything up to the next format specification */ for (q = p; q < patend && *q != '%'; ++q) ; @@ -11349,21 +11350,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p if (argsv && SvNOK(argsv)) { /* XXX va_arg(*args) case? */ - NV nv = SvNV(argsv); - char g = 0; -#ifdef Perl_isinf - if (Perl_isinf(nv)) - g = 'g'; -#endif -#ifdef Perl_isnan - if (Perl_isnan(nv)) - g = 'g'; -#endif - if (g) { - c = g; - q++; - goto floating_point; - } + infnan = Perl_isinfnan(SvNV(argsv)); } switch (c = *q++) { @@ -11373,7 +11360,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p case 'c': if (vectorize) goto unknown; - uv = (args) ? va_arg(*args, int) : SvIV(argsv); + uv = (args) ? va_arg(*args, int) : + infnan ? UNICODE_REPLACEMENT : SvIV(argsv); if ((uv > 255 || (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv))) && !IN_BYTES) { @@ -11429,6 +11417,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* INTEGERS */ case 'p': + if (infnan) { + c = 'g'; + goto floating_point; + } if (alt || vectorize) goto unknown; uv = PTR2UV(args ? va_arg(*args, void*) : argsv); @@ -11443,14 +11435,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p #endif /* FALLTHROUGH */ case 'd': - /* XXX printf Inf/NaN for %[ducp], now produces quite - * surprising results: 1, 0, 18446744073709551615, - * 9223372036854775808, -9223372036854775807, bogus - * Unicode code points, random heap addresses in hex. - * - * For the argsv() doable (Perl_isinf, Perl_isnan), but - * how to do that for the va_arg(*args, ...)? */ case 'i': + if (infnan) { + c = 'g'; + goto floating_point; + } if (vectorize) { STRLEN ulen; if (!veclen) @@ -11552,6 +11541,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p base = 16; uns_integer: + if (infnan) { + c = 'g'; + goto floating_point; + } if (vectorize) { STRLEN ulen; vector: diff --git a/t/op/infnan.t b/t/op/infnan.t index 5ef8f24e1b..50dbeda0df 100644 --- a/t/op/infnan.t +++ b/t/op/infnan.t @@ -22,10 +22,10 @@ my @NaN = ("NAN", "nan", "qnan", "SNAN", "NanQ", "NANS", "NaN123", "NAN(123)", "nan%", "nanonano"); # RIP, Robin Williams. -my @fmt = qw(e f g a d x c p); +my @num_fmt = qw(e f g a d u o b x p); -my $inf_tests = 11 + @fmt + 3 * @PInf + 3 * @NInf + 5 + 3; -my $nan_tests = 7 + @fmt + 2 * @NaN + 3; +my $inf_tests = 11 + @num_fmt + 4 + 3 * @PInf + 3 * @NInf + 5 + 3; +my $nan_tests = 7 + @num_fmt + 2 + 2 * @NaN + 3; my $infnan_tests = 4; @@ -57,10 +57,20 @@ SKIP: { is(sprintf("%g", $PInf), "Inf", "$PInf sprintf %g is Inf"); is(sprintf("%a", $PInf), "Inf", "$PInf sprintf %a is Inf"); - for my $f (@fmt) { + for my $f (@num_fmt) { is(sprintf("%$f", $PInf), "Inf", "$PInf sprintf %$f is Inf"); } + { + local $^W = 0; + + is(sprintf("%c", $PInf), chr(0xFFFD), "$PInf sprintf %c is Inf"); + is(chr($PInf), chr(0xFFFD), "$PInf chr() is U+FFFD"); + + is(sprintf("%c", $NInf), chr(0xFFFD), "$NInf sprintf %c is Inf"); + is(chr($NInf), chr(0xFFFD), "$NInf chr() is U+FFFD"); + } + for my $i (@PInf) { cmp_ok($i + 0 , '==', $PInf, "$i is +Inf"); cmp_ok($i, '>', 0, "$i is positive"); @@ -108,10 +118,17 @@ SKIP: { is($NaN * 2, $NaN, "twice NaN is NaN"); is($NaN / 2, $NaN, "half of NaN is NaN"); - for my $f (@fmt) { + for my $f (@num_fmt) { is(sprintf("%$f", $NaN), "NaN", "$NaN sprintf %$f is NaN"); } + { + local $^W = 0; + + is(sprintf("%c", $NaN), chr(0xFFFD), "$NaN sprintf %c is Inf"); + is(chr($NaN), chr(0xFFFD), "$NaN chr() is U+FFFD"); + } + for my $i (@NaN) { cmp_ok($i + 0, '!=', $i + 0, "$i is NaN numerically (by not being NaN)"); is("@{[$i+0]}", "NaN", "$i value stringifies as NaN"); |