hexfp: IEEE 754 subnormals printf %a

(cherry picked from commit b6d9b423fab1963346eb79e83b356114396b1f2e)
author: Jarkko Hietaniemi <jhi@iki.fi> 2017-08-09 13:19:16 +0100
committer: Steve Hay <steve.m.hay@googlemail.com> 2017-08-09 13:19:33 +0100
commit: bc87b110ad0060342a7ad96338aebe75e7e10574 (patch)
tree: efcd3eef3a2ca9ef3362460dce703909191aa0f2
parent: 2f530c475e4ce18290dd29b16212b698f17e469f (diff)
download: perl-bc87b110ad0060342a7ad96338aebe75e7e10574.tar.gz
5 files changed, 129 insertions, 26 deletions
diff --git a/perl.h b/perl.h
index 7080620489..e19af5b27c 100644
--- a/perl.h
+++ b/perl.h
@@ -6758,7 +6758,9 @@ extern void moncontrol(int);
 #endif
 
 /* All the basic IEEE formats have the implicit bit,
- * except for the 80-bit extended formats, which will undef this. */
+ * except for the x86 80-bit extended formats, which will undef this.
+ * Also note that the IEEE 754 subnormals (formerly known as denormals)
+ * do not have the implicit bit of one. */
 #define NV_IMPLICIT_BIT
 
 #ifdef LONG_DOUBLEKIND
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 78aeb164e3..737d3633f6 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2479,7 +2479,9 @@ than the floating point supports.
 =item Hexadecimal float: exponent underflow
 
 (W overflow) The hexadecimal floating point has a smaller exponent
-than the floating point supports.
+than the floating point supports.  With the IEEE 754 floating point,
+this may also mean that the subnormals (formerly known as denormals)
+are being used, which may or may not be an error.
 
 =item Hexadecimal float: internal error (%s)
 
diff --git a/sv.c b/sv.c
index ec5b344127..0132ac0c38 100644
--- a/sv.c
+++ b/sv.c
@@ -10985,8 +10985,9 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
  * the hexadecimal values (for %a/%A).  The nv is the NV where the value
  * are being extracted from (either directly from the long double in-memory
  * presentation, or from the uquad computed via frexp+ldexp).  frexp also
- * is used to update the exponent.  vhex is the pointer to the beginning
- * of the output buffer (of VHEX_SIZE).
+ * is used to update the exponent.  The subnormal is set to true
+ * for IEEE 754 subnormals/denormals.  The vhex is the pointer to
+ * the beginning of the output buffer (of VHEX_SIZE).
  *
  * The tricky part is that S_hextract() needs to be called twice:
  * the first time with vend as NULL, and the second time with vend as
@@ -10996,14 +10997,15 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
  * (the extraction of the hexadecimal values) takes place.
  * Sanity failures cause fatal failures during both rounds. */
 STATIC U8*
-S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
+S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal,
+           U8* vhex, U8* vend)
 {
     U8* v = vhex;
     int ix;
     int ixmin = 0, ixmax = 0;
 
-    /* XXX Inf/NaN/denormal handling in the HEXTRACT_IMPLICIT_BIT,
-     * and elsewhere. */
+    /* XXX Inf/NaN are not handled here, since it is
+     * assumed they are to be output as "Inf" and "NaN". */
 
     /* These macros are just to reduce typos, they have multiple
      * repetitions below, but usually only one (or sometimes two)
@@ -11038,11 +11040,17 @@ S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
     for (ix = a; ix <= b; ix++) { HEXTRACT_BYTE(ix); }
 #define HEXTRACT_IMPLICIT_BIT(nv) \
     STMT_START { \
-        if (vend) *v++ = ((nv) == 0.0) ? 0 : 1; else v++; \
+        if (!(*subnormal = (HEXTRACT_EXPONENT_BITS() == 0))) { \
+            if (vend) *v++ = ((nv) == 0.0) ? 0 : 1; else v++; \
+        } \
    } STMT_END
 
-/* Most formats do.  Those which don't should undef this. */
+/* Most formats do.  Those which don't should undef this.
+ *
+ * But also note that IEEE 754 subnormals do not have it, or,
+ * expressed alternatively, their implicit bit is zero. */
 #define HEXTRACT_HAS_IMPLICIT_BIT
+
 /* Many formats do.  Those which don't should undef this. */
 #define HEXTRACT_HAS_TOP_NYBBLE
 
@@ -11056,6 +11064,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
     const U8* vmaxend = vhex + HEXTRACTSIZE;
     PERL_UNUSED_VAR(ix); /* might happen */
     (void)Perl_frexp(PERL_ABS(nv), exponent);
+    *subnormal = FALSE;
     if (vend && (vend <= vhex || vend > vmaxend)) {
         /* diag_listed_as: Hexadecimal float: internal error (%s) */
         Perl_croak(aTHX_ "Hexadecimal float: internal error (entry)");
@@ -11065,10 +11074,11 @@ S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
 #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE)
 #  if LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_LITTLE_ENDIAN
         /* Used in e.g. VMS and HP-UX IA-64, e.g. -0.1L:
-         * 9a 99 99 99 99 99 99 99 99 99 99 99 99 99 fb 3f */
+         * 9a 99 99 99 99 99 99 99 99 99 99 99 99 99 fb bf */
         /* The bytes 13..0 are the mantissa/fraction,
          * the 15,14 are the sign+exponent. */
         const U8* nvp = (const U8*)(&nv);
+#   define HEXTRACT_EXPONENT_BITS() (nvp[14] | (nvp[15] & 0x7F) << 8)
         HEXTRACT_IMPLICIT_BIT(nv);
 #   undef HEXTRACT_HAS_TOP_NYBBLE
         HEXTRACT_BYTES_LE(13, 0);
@@ -11078,14 +11088,15 @@ S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
         /* The bytes 2..15 are the mantissa/fraction,
          * the 0,1 are the sign+exponent. */
         const U8* nvp = (const U8*)(&nv);
+#   define HEXTRACT_EXPONENT_BITS() ((nvp[0] & 0x7F) << 8 | nvp[1])
         HEXTRACT_IMPLICIT_BIT(nv);
 #   undef HEXTRACT_HAS_TOP_NYBBLE
         HEXTRACT_BYTES_BE(2, 15);
 #  elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
         /* x86 80-bit "extended precision", 64 bits of mantissa / fraction /
-         * significand, 15 bits of exponent, 1 bit of sign.  NVSIZE can
-         * be either 12 (ILP32, Solaris x86) or 16 (LP64, Linux and OS X),
-         * meaning that 2 or 6 bytes are empty padding. */
+         * significand, 15 bits of exponent, 1 bit of sign.  No implicit bit.
+         * NVSIZE can be either 12 (ILP32, Solaris x86) or 16 (LP64, Linux
+         * and OS X), meaning that 2 or 6 bytes are empty padding. */
         /* The bytes 7..0 are the mantissa/fraction */
         const U8* nvp = (const U8*)(&nv);
 #    undef HEXTRACT_HAS_IMPLICIT_BIT
@@ -11134,18 +11145,21 @@ S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
 #    ifdef HEXTRACT_LITTLE_ENDIAN
         /* 0 1 2 3 4 5 6 7 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */
         const U8* nvp = (const U8*)(&nv);
+#   define HEXTRACT_EXPONENT_BITS() (nvp[6] | (nvp[7] & 0x7F) << 4)
         HEXTRACT_IMPLICIT_BIT(nv);
         HEXTRACT_TOP_NYBBLE(6);
         HEXTRACT_BYTES_LE(5, 0);
 #    elif defined(HEXTRACT_BIG_ENDIAN)
         /* 7 6 5 4 3 2 1 0 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */
         const U8* nvp = (const U8*)(&nv);
+#   define HEXTRACT_EXPONENT_BITS() (nvp[1] | (nvp[0] & 0x7F) << 4)
         HEXTRACT_IMPLICIT_BIT(nv);
         HEXTRACT_TOP_NYBBLE(1);
         HEXTRACT_BYTES_BE(2, 7);
 #    elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_LE_BE
         /* 4 5 6 7 0 1 2 3 (MSB = 7, LSB = 0, 6:7 = nybble:exponent:sign) */
         const U8* nvp = (const U8*)(&nv);
+#   define HEXTRACT_EXPONENT_BITS() (nvp[2] | (nvp[3] & 0x7F) << 4)
         HEXTRACT_IMPLICIT_BIT(nv);
         HEXTRACT_TOP_NYBBLE(2); /* 6 */
         HEXTRACT_BYTE(1); /* 5 */
@@ -11157,6 +11171,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
 #    elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_BE_LE
         /* 3 2 1 0 7 6 5 4 (MSB = 7, LSB = 0, 7:6 = sign:exponent:nybble) */
         const U8* nvp = (const U8*)(&nv);
+#   define HEXTRACT_EXPONENT_BITS() (nvp[5] | (nvp[4] & 0x7F) << 4)
         HEXTRACT_IMPLICIT_BIT(nv);
         HEXTRACT_TOP_NYBBLE(5); /* 6 */
         HEXTRACT_BYTE(6); /* 5 */
@@ -12404,6 +12419,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 U8* vend; /* pointer to one beyond last digit of vhex */
                 U8* vfnz = NULL; /* first non-zero */
                 U8* vlnz = NULL; /* last non-zero */
+                U8* v0 = NULL; /* first output */
                 const bool lower = (c == 'a');
                 /* At output the values of vhex (up to vend) will
                  * be mapped through the xdig to get the actual
@@ -12412,6 +12428,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 int zerotail = 0; /* how many extra zeros to append */
                 int exponent = 0; /* exponent of the floating point input */
                 bool hexradix = FALSE; /* should we output the radix */
+                bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */
 
                 /* XXX: denormals, NaN, Inf.
                  *
@@ -12421,14 +12438,17 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  * should be output as 0x0.0000000000001p-1022 to
                  * match its internal structure. */
 
-                vend = S_hextract(aTHX_ nv, &exponent, vhex, NULL);
-                S_hextract(aTHX_ nv, &exponent, vhex, vend);
+                vend = S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, NULL);
+                S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, vend);
 
 #if NVSIZE > DOUBLESIZE
 #  ifdef HEXTRACT_HAS_IMPLICIT_BIT
                 /* In this case there is an implicit bit,
-                 * and therefore the exponent is shifted shift by one. */
-                exponent--;
+                 * and therefore the exponent is shifted by one,
+                 * unless this is a subnormal/denormal. */
+                if (!subnormal) {
+                    exponent--;
+                }
 #  else
                 /* In this case there is no implicit bit,
                  * and the exponent is shifted by the first xdigit. */
@@ -12473,17 +12493,44 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                         exponent--;
 #endif
 
+                    if (subnormal) {
+                      if (vfnz[0] > 1) {
+                        /* We need to right shift the hex nybbles so
+                         * that the output of the subnormal starts
+                         * from the first true bit. */
+                        int i, n;
+                        U8 *vshr;
+                        /* Find the ceil(log2(v[0])) of
+                         * the top non-zero nybble. */
+                        for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { }
+                        assert(n < 4);
+                        vlnz[1] = 0;
+                        for (vshr = vlnz; vshr >= vfnz; vshr--) {
+                          vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n);
+                          vshr[0] >>= n;
+                        }
+                        if (vlnz[1]) {
+                          vlnz++;
+                        }
+                      }
+                      v0 = vfnz;
+                    } else {
+                      v0 = vhex;
+                    }
+
                     if (precis > 0) {
-                        if ((SSize_t)(precis + 1) < vend - vhex) {
+                        U8* ve = (subnormal ? vlnz + 1 : vend);
+                        SSize_t vn = ve - (subnormal ? vfnz : vhex);
+                        if ((SSize_t)(precis + 1) < vn) {
                             bool round;
 
-                            v = vhex + precis + 1;
+                            v = v0 + precis + 1;
                             /* Round away from zero: if the tail
                              * beyond the precis xdigits is equal to
                              * or greater than 0x8000... */
                             round = *v > 0x8;
                             if (!round && *v == 0x8) {
-                                for (v++; v < vend; v++) {
+                                for (v++; v < ve; v++) {
                                     if (*v) {
                                         round = TRUE;
                                         break;
@@ -12491,13 +12538,13 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                                 }
                             }
                             if (round) {
-                                for (v = vhex + precis; v >= vhex; v--) {
+                                for (v = v0 + precis; v >= v0; v--) {
                                     if (*v < 0xF) {
                                         (*v)++;
                                         break;
                                     }
                                     *v = 0;
-                                    if (v == vhex) {
+                                    if (v == v0) {
                                         /* If the carry goes all the way to
                                          * the front, we need to output
                                          * a single '1'. This goes against
@@ -12509,14 +12556,16 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                                 }
                             }
                             /* The new effective "last non zero". */
-                            vlnz = vhex + precis;
+                            vlnz = v0 + precis;
                         }
                         else {
-                            zerotail = precis - (vlnz - vhex);
+                            zerotail =
+                              subnormal ? precis - vn + 1 :
+                              precis - (vlnz - vhex);
                         }
                     }
 
-                    v = vhex;
+                    v = v0;
                     *p++ = xdig[*v++];
 
                     /* If there are non-zero xdigits, the radix
diff --git a/t/op/sprintf2.t b/t/op/sprintf2.t
index 43ed919be5..8c852bbbaa 100644
--- a/t/op/sprintf2.t
+++ b/t/op/sprintf2.t
@@ -262,7 +262,7 @@ if ($Config{nvsize} == 8 &&
     print "# no hexfloat tests\n";
 }
 
-plan tests => 1408 + ($Q ? 0 : 12) + @hexfloat + 12;
+plan tests => 1408 + ($Q ? 0 : 12) + @hexfloat + 37;
 
 use strict;
 use Config;
@@ -721,6 +721,7 @@ SKIP: {
 SKIP: {
     # [perl #127183] Non-canonical hexadecimal floats are parsed prematurely
 
+    # IEEE 754 64-bit
     skip("nv_preserves_uv_bits is $Config{nv_preserves_uv_bits}, not 53", 3)
         unless $Config{nv_preserves_uv_bits} == 53;
 
@@ -749,3 +750,44 @@ SKIP: {
            "non-canonical form");
     }
 }
+
+SKIP: {
+    my @subnormals = (
+	# Keep these as strings so that non-IEEE-754 don't trip over them.
+	[ '1e-320', '%a', '0x1.fap-1064' ],
+	[ '1e-321', '%a', '0x1.94p-1067' ],
+	[ '1e-322', '%a', '0x1.4p-1070' ],
+	[ '1e-323', '%a', '0x1p-1073' ],
+	[ '1e-324', '%a', '0x0p+0' ],  # underflow
+	[ '3e-320', '%a', '0x1.7b8p-1062' ],
+	[ '3e-321', '%a', '0x1.2f8p-1065' ],
+	[ '3e-322', '%a', '0x1.e8p-1069' ],
+	[ '3e-323', '%a', '0x1.8p-1072' ],
+	[ '3e-324', '%a', '0x1p-1074' ], # the smallest possible value
+	[ '7e-320', '%a', '0x1.bacp-1061' ],
+	[ '7e-321', '%a', '0x1.624p-1064' ],
+	[ '7e-322', '%a', '0x1.1cp-1067' ],
+	[ '7e-323', '%a', '0x1.cp-1071' ],
+	[ '7e-324', '%a', '0x1p-1074' ], # the smallest possible value, again
+	[ '3e-320', '%.4a', '0x1.7b80p-1062' ],
+	[ '3e-321', '%.4a', '0x1.2f80p-1065' ],
+	[ '3e-322', '%.4a', '0x1.e800p-1069' ],
+	[ '3e-323', '%.4a', '0x1.8000p-1072' ],
+	[ '3e-324', '%.4a', '0x1.0000p-1074' ],
+	[ '3e-320', '%.1a', '0x1.8p-1062' ],
+	[ '3e-321', '%.1a', '0x1.3p-1065' ],
+	[ '3e-322', '%.1a', '0x1.ep-1069' ],
+	[ '3e-323', '%.1a', '0x1.8p-1072' ],
+	[ '3e-324', '%.1a', '0x1.0p-1074' ],
+	);
+
+    # IEEE 754 64-bit
+    skip("nv_preserves_uv_bits is $Config{nv_preserves_uv_bits}, not 53",
+         scalar @subnormals)
+        unless $Config{nv_preserves_uv_bits} == 53;
+
+    for my $t (@subnormals) {
+        my $s = sprintf($t->[1], $t->[0]);
+        is($s, $t->[2], "subnormal @$t got $s");
+    }
+}
diff --git a/toke.c b/toke.c
index f5f7fc3537..8cefd4d1a1 100644
--- a/toke.c
+++ b/toke.c
@@ -10628,6 +10628,14 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 #ifdef NV_MIN_EXP
                                 if (negexp
                                     && -hexfp_exp < NV_MIN_EXP - 1) {
+                                    /* NOTE: this means that the exponent
+                                     * underflow warning happens for
+                                     * the IEEE 754 subnormals (denormals),
+                                     * because DBL_MIN_EXP etc are the lowest
+                                     * possible binary (or, rather, DBL_RADIX-base)
+                                     * exponent for normals, not subnormals.
+                                     *
+                                     * This may or may not be a good thing. */
                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
                                                    "Hexadecimal float: exponent underflow");
                                     break;
author	Jarkko Hietaniemi <jhi@iki.fi>	2017-08-09 13:19:16 +0100
committer	Steve Hay <steve.m.hay@googlemail.com>	2017-08-09 13:19:33 +0100
commit	bc87b110ad0060342a7ad96338aebe75e7e10574 (patch)
tree	efcd3eef3a2ca9ef3362460dce703909191aa0f2
parent	2f530c475e4ce18290dd29b16212b698f17e469f (diff)
download	perl-bc87b110ad0060342a7ad96338aebe75e7e10574.tar.gz