infnan: Implement NaN payload APIs.

Based on the latest ISO/IEC WG draft: http://www.open-std.org/JTC1/sc22/wg14/www/docs/n1778.pdf (section 14.10, pp 42,45-47). There isn't yet an official C1X effort (these weren't part of C11) so there's no C1X to refer to.
author: Jarkko Hietaniemi <jhi@iki.fi> 2015-06-02 18:09:54 -0400
committer: Jarkko Hietaniemi <jhi@iki.fi> 2015-06-12 09:57:41 -0400
commit: 07bb61ac4e127de8c4d5c8a40adf7cd36baa6253 (patch)
tree: 0326aa311dba0c71eabd3c85080e9249288ba336 /ext
parent: 453b60f134ff4079e06d0a4adde0c3f90b71fb2f (diff)
download: perl-07bb61ac4e127de8c4d5c8a40adf7cd36baa6253.tar.gz
5 files changed, 383 insertions, 52 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index 535fccf328..670d8ecb5a 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -1118,6 +1118,167 @@ static NV my_trunc(NV x)
 #  define c99_trunc my_trunc
 #endif
 
+#undef NV_PAYLOAD_DEBUG
+
+/* NOTE: the NaN payload API implementation is hand-rolled, since the
+ * APIs are only proposed ones as of June 2015, so very few, if any,
+ * platforms have implementations yet, so HAS_SETPAYLOAD and such are
+ * unlikely to be helpful.
+ *
+ * XXX - if the core numification wants to actually generate
+ * the nan payload in "nan(123)", and maybe "nans(456)", for
+ * signaling payload", this needs to be moved to e.g. numeric.c
+ * (look for grok_infnan)
+ *
+ * Conversely, if the core stringification wants the nan payload
+ * and/or the nan quiet/signaling distinction, S_getpayload()
+ * from this file needs to be moved, to e.g. sv.c (look for S_infnan_2pv),
+ * and the (trivial) functionality of issignaling() copied
+ * (for generating "NaNS", or maybe even "NaNQ") -- or maybe there
+ * are too many formatting parameters for simple stringification?
+ */
+
+/* While it might make sense for the payload to be UV or IV,
+ * to avoid conversion loss, the proposed ISO interfaces use
+ * a floating point input, which is then truncated to integer,
+ * and only the integer part being used.  This is workable,
+ * except for: (1) the conversion loss (2) suboptimal for
+ * 32-bit integer platforms.  A workaround API for (2) and
+ * in general for bit-honesty would be an array of integers
+ * as the payload... but the proposed C API does nothing of
+ * the kind. */
+#if NVSIZE == UVSIZE
+#  define NV_PAYLOAD_TYPE UV
+#else
+#  define NV_PAYLOAD_TYPE NV
+#endif
+
+#ifdef LONGDOUBLE_DOUBLEDOUBLE
+#  define NV_PAYLOAD_SIZEOF_ASSERT(a) assert(sizeof(a) == NVSIZE / 2)
+#else
+#  define NV_PAYLOAD_SIZEOF_ASSERT(a) assert(sizeof(a) == NVSIZE)
+#endif
+
+static void S_setpayload(NV* nvp, NV_PAYLOAD_TYPE payload, bool signaling)
+{
+  dTHX;
+  static const U8 m[] = { NV_NAN_PAYLOAD_MASK };
+  static const U8 p[] = { NV_NAN_PAYLOAD_PERM };
+  UV a[(NVSIZE + UVSIZE - 1) / UVSIZE] = { 0 };
+  int i;
+  NV_PAYLOAD_SIZEOF_ASSERT(m);
+  NV_PAYLOAD_SIZEOF_ASSERT(p);
+  *nvp = NV_NAN;
+  /* Divide the input into the array in "base unsigned integer" in
+   * little-endian order.  Note that the integer might be smaller than
+   * an NV (if UV is U32, for example). */
+#if NVSIZE == UVSIZE
+  a[0] = payload;  /* The trivial case. */
+#else
+  {
+    NV t1 = c99_trunc(payload); /* towards zero (drop fractional) */
+#ifdef NV_PAYLOAD_DEBUG
+    Perl_warn(aTHX_ "t1 = %"NVgf" (payload %"NVgf")\n", t1, payload);
+#endif
+    if (t1 <= UV_MAX) {
+      a[0] = (UV)t1;  /* Fast path, also avoids rounding errors (right?) */
+    } else {
+      /* UVSIZE < NVSIZE or payload > UV_MAX.
+       *
+       * This may happen for example if:
+       * (1) UVSIZE == 32 and common 64-bit double NV
+       *     (32-bit system not using -Duse64bitint)
+       * (2) UVSIZE == 64 and the x86-style 80-bit long double NV
+       *     (note that here the room for payload is actually the 64 bits)
+       * (3) UVSIZE == 64 and the 128-bit IEEE 764 quadruple NV
+       *     (112 bits in mantissa, 111 bits room for payload)
+       *
+       * NOTE: this is very sensitive to correctly functioning
+       * fmod()/fmodl(), and correct casting of big-unsigned-integer to NV.
+       * If these don't work right, especially the low order bits
+       * are in danger.  For example Solaris and AIX seem to have issues
+       * here, especially if using 32-bit UVs. */
+      NV t2;
+      for (i = 0, t2 = t1; i < (int)C_ARRAY_LENGTH(a); i++) {
+        a[i] = (UV)Perl_fmod(t2, (NV)UV_MAX);
+        t2 = Perl_floor(t2 / (NV)UV_MAX);
+      }
+    }
+  }
+#endif
+#ifdef NV_PAYLOAD_DEBUG
+  for (i = 0; i < (int)C_ARRAY_LENGTH(a); i++) {
+    Perl_warn(aTHX_ "a[%d] = 0x%"UVxf"\n", i, a[i]);
+  }
+#endif
+  for (i = 0; i < (int)sizeof(p); i++) {
+    if (m[i] && p[i] < sizeof(p)) {
+      U8 s = (p[i] % UVSIZE) << 3;
+      UV u = a[p[i] / UVSIZE] & ((UV)0xFF << s);
+      U8 b = (U8)((u >> s) & m[i]);
+      ((U8 *)(nvp))[i] &= ~m[i]; /* For NaNs with non-zero payload bits. */
+      ((U8 *)(nvp))[i] |= b;
+#ifdef NV_PAYLOAD_DEBUG
+      Perl_warn(aTHX_ "set p[%2d] = %02x (i = %d, m = %02x, s = %2d, b = %02x, u = %08"UVxf")\n", i, ((U8 *)(nvp))[i], i, m[i], s, b, u);
+#endif
+      a[p[i] / UVSIZE] &= ~u;
+    }
+  }
+  if (signaling) {
+    NV_NAN_SET_SIGNALING(nvp);
+  }
+#ifdef USE_LONG_DOUBLE
+# if LONG_DOUBLEKIND == 3 || LONG_DOUBLEKIND == 4
+  memset((char *)nvp + 10, '\0', LONG_DOUBLESIZE - 10); /* x86 long double */
+# endif
+#endif
+  for (i = 0; i < (int)C_ARRAY_LENGTH(a); i++) {
+    if (a[i]) {
+      Perl_warn(aTHX_ "payload lost bits (%"UVxf")", a[i]);
+      break;
+    }
+  }
+#ifdef NV_PAYLOAD_DEBUG
+  for (i = 0; i < NVSIZE; i++) {
+    PerlIO_printf(Perl_debug_log, "%02x ", ((U8 *)(nvp))[i]);
+  }
+  PerlIO_printf(Perl_debug_log, "\n");
+#endif
+}
+
+static NV_PAYLOAD_TYPE S_getpayload(NV nv)
+{
+  dTHX;
+  static const U8 m[] = { NV_NAN_PAYLOAD_MASK };
+  static const U8 p[] = { NV_NAN_PAYLOAD_PERM };
+  UV a[(NVSIZE + UVSIZE - 1) / UVSIZE] = { 0 };
+  int i;
+  NV payload;
+  NV_PAYLOAD_SIZEOF_ASSERT(m);
+  NV_PAYLOAD_SIZEOF_ASSERT(p);
+  payload = 0;
+  for (i = 0; i < (int)sizeof(p); i++) {
+    if (m[i] && p[i] < NVSIZE) {
+      U8 s = (p[i] % UVSIZE) << 3;
+      a[p[i] / UVSIZE] |= (UV)(((U8 *)(&nv))[i] & m[i]) << s;
+    }
+  }
+  for (i = (int)C_ARRAY_LENGTH(a) - 1; i >= 0; i--) {
+#ifdef NV_PAYLOAD_DEBUG
+    Perl_warn(aTHX_ "a[%d] = %"UVxf"\n", i, a[i]);
+#endif
+    payload *= UV_MAX;
+    payload += a[i];
+  }
+#ifdef NV_PAYLOAD_DEBUG
+  for (i = 0; i < NVSIZE; i++) {
+    PerlIO_printf(Perl_debug_log, "%02x ", ((U8 *)(&nv))[i]);
+  }
+  PerlIO_printf(Perl_debug_log, "\n");
+#endif
+  return payload;
+}
+
 /* XXX This comment is just to make I_TERMIO and I_SGTTY visible to
    metaconfig for future extension writers.  We don't use them in POSIX.
    (This is really sneaky :-)  --AD
@@ -2508,6 +2669,41 @@ fpclassify(x)
 	RETVAL
 
 NV
+getpayload(nv)
+	NV nv
+    CODE:
+	RETVAL = S_getpayload(nv);
+    OUTPUT:
+	RETVAL
+
+void
+setpayload(nv, payload)
+	NV nv
+	NV payload
+    CODE:
+	S_setpayload(&nv, payload, FALSE);
+    OUTPUT:
+	nv
+
+void
+setpayloadsig(nv, payload)
+	NV nv
+	NV payload
+    CODE:
+	nv = NV_NAN;
+	S_setpayload(&nv, payload, TRUE);
+    OUTPUT:
+	nv
+
+int
+issignaling(nv)
+	NV nv
+    CODE:
+	RETVAL = Perl_isnan(nv) && NV_NAN_IS_SIGNALING(&nv);
+    OUTPUT:
+	RETVAL
+
+NV
 copysign(x,y)
 	NV		x
 	NV		y
@@ -2707,51 +2903,27 @@ fma(x,y,z)
 	RETVAL
 
 NV
-nan(s = 0)
-	char*	s;
+nan(payload = 0)
+	NV payload
     CODE:
-	PERL_UNUSED_VAR(s);
-#ifdef c99_nan
-	RETVAL = c99_nan(s ? s : "");
-#elif defined(NV_NAN)
-	/* XXX if s != NULL, warn about unused argument,
-         * or implement the nan payload setting. */
-        /* NVSIZE == 8: the NaN "header" (the exponent) is 0x7FF (the 0x800
-         * is the sign bit, which should be irrelevant for NaN, so really
-         * also 0xFFF), leaving 64 - 12 = 52 bits for the NaN payload
-         * (6.5 bytes, note about infinities below).
-         *
-         * (USE_LONG_DOUBLE and)
-         * LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN:
-         * the NaN "header" is still 0x7FF, leaving 80 - 12 = 68 bits
-         * for the payload (8.5 bytes, note about infinities below).
-         *
-         * doubledouble? aargh. Maybe like doubles, 52 + 52 = 104 bits?
-         *
-         * NVSIZE == 16:
-         * the NaN "header" is still 0x7FF, leaving 128 - 12 = 116 bits
-         * for the payload (14.5 bytes, note about infinities below)
-         *
-         * Which ones of the NaNs are 'signaling' and which are 'quiet',
-         * depends.  In the IEEE-754 1985, nothing was specified.  But the
-         * majority of companies decided that the MSB of the mantissa was
-         * the bit for 'quiet'.  (Only PA-RISC and MIPS were different,
-         * using the MSB as 'signaling'.)  The IEEE-754 2008 *recommended*
-         * (but did not dictate) the MSB as the 'quiet' bit.
-         *
-         * In other words, on most platforms, and for 64-bit doubles:
-         * [7FF8000000000000, 7FFFFFFFFFFFFFFF] quiet
-         * [FFF8000000000000, FFFFFFFFFFFFFFFF] quiet
-         * [7FF0000000000001, 7FF7FFFFFFFFFFFF] signaling
-         * [FFF0000000000001, FFF7FFFFFFFFFFFF] signaling
-         *
-         * The C99 nan() is supposed to generate *quiet* NaNs.
-         *
-         * Note the asymmetry:
-         * The 7FF0000000000000 is positive infinity,
-         * the FFF0000000000000 is negative infinity.
-         */
-	RETVAL = NV_NAN;
+#ifdef NV_NAN
+        /* If no payload given, just return the default NaN.
+         * This makes a difference in platforms where the default
+         * NaN is not all zeros. */
+	if (items == 0) {
+          RETVAL = NV_NAN;
+	} else {
+          S_setpayload(&RETVAL, payload, FALSE);
+        }
+#elif defined(c99_nan)
+	{
+	  STRLEN elen = my_snprintf(PL_efloatbuf, PL_efloatsize, "%g", nv);
+          if ((IV)elen == -1) {
+	    RETVAL = NV_NAN;
+          } else {
+            RETVAL = c99_nan(PL_efloatbuf);
+          }
+        }
 #else
 	not_here("nan");
 #endif
diff --git a/ext/POSIX/lib/POSIX.pm b/ext/POSIX/lib/POSIX.pm
index 15eb5d624c..215b1f5459 100644
--- a/ext/POSIX/lib/POSIX.pm
+++ b/ext/POSIX/lib/POSIX.pm
@@ -403,6 +403,8 @@ my %other_export_tags = (
     )],
 
     stdlib_h_c99 => [ @{$default_export_tags{stdlib_h}}, 'strtold' ],
+
+    nan_payload => [ qw(getpayload setpayload setpayloadsig issignaling) ],
 );
 
 {
diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod
index d9e84b4361..3e6f78d27d 100644
--- a/ext/POSIX/lib/POSIX.pod
+++ b/ext/POSIX/lib/POSIX.pod
@@ -626,6 +626,17 @@ This is identical to Perl's builtin C<getlogin()> function for
 returning the user name associated with the current session, see
 L<perlfunc/getlogin>.
 
+=item C<getpayload>
+
+	use POSIX ':nan_payload';
+	getpayload($var)
+
+Returns the C<NaN> payload.
+
+Note the API instability warning in L</setpayload>.
+
+See L</nan> for more discussion about C<NaN>.
+
 =item C<getpgrp>
 
 This is identical to Perl's builtin C<getpgrp()> function for
@@ -867,6 +878,17 @@ modifier is in effect?>).
 The function returns C<TRUE> if the input string is empty, or if the
 corresponding C function returns C<TRUE> for every byte in the string.
 
+=item C<issignaling>
+
+	use POSIX ':nan_payload';
+	issignaling($var, $payload)
+
+Return true if the argument is a I<signaling> NaN.
+
+Note the API instability warning in L</setpayload>.
+
+See L</nan> for more discussion about C<NaN>.
+
 =item C<isspace>
 
 Deprecated function whose use raises a warning, and which is slated to
@@ -1193,9 +1215,38 @@ See also L</round>.
 
 =item C<nan>
 
-Returns not-a-number [C99].
+   my $nan = nan();
+
+Returns C<NaN>, not-a-number [C99].
+
+The returned NaN is always a I<quiet> NaN, as opposed to I<signaling>.
+
+With an argument, can be used to generate a NaN with I<payload>.
+The argument is first interpreted as a floating point number,
+but then any fractional parts are truncated (towards zero),
+and the value is interpreted as an unsigned integer.
+The bits of this integer are stored in the unused bits of the NaN.
+
+The result has a dual nature: it is a NaN, but it also carries
+the integer inside it.  The integer can be retrieved with L</getpayload>.
+Note, though, that the payload is not propagated, not even on copies,
+and definitely not in arithmetic operations.
+
+How many bits fit in the NaN depends on what kind of floating points
+are being used, but on the most common platforms (64-bit IEEE 754,
+or the x86 80-bit long doubles) there are 51 and 61 bits available,
+respectively.  (There would be 52 and 62, but the quiet/signaling
+bit of NaNs takes away one.)  However, because of the floating-point-to-
+integer-and-back conversions, please test carefully whether you get back
+what you put in.  If your integers are only 32 bits wide, you probably
+should not rely on more than 32 bits of payload.
 
-See also L</isnan>.
+Whether a "signaling" NaN is in any way different from a "quiet" NaN,
+depends on the platform.  Also note that the payload of the default
+NaN (no argument to nan()) is not necessarily zero, use C<setpayload>
+to explicitly set the payload.
+
+See also L</isnan>, L</setpayload> and L</issignaling>.
 
 =item C<nearbyint>
 
@@ -1489,6 +1540,38 @@ out which locales are available in your system.
 
 	$loc = setlocale( LC_COLLATE, "es_AR.ISO8859-1" );
 
+=item C<setpayload>
+
+	use POSIX ':nan_payload';
+	setpayload($var, $payload);
+
+Sets the C<NaN> payload of var.
+
+NOTE: the NaN payload APIs are based on the latest (as of June 2015)
+proposed ISO C interfaces, but they are not yet a standard.  Things
+may change.
+
+See L</nan> for more discussion about C<NaN>.
+
+See also L</setpayloadsig>, L</isnan>, L</getpayload>, and L</issignaling>.
+
+=item C<setpayloadsig>
+
+	use POSIX ':nan_payload';
+	setpayloadsig($var, $payload);
+
+Like L</setpayload> but also makes the NaN I<signaling>.
+
+Depending on the platform the NaN may or may not behave differently.
+
+Note the API instability warning in L</setpayload>.
+
+Note that because how the floating point formats work out, on the most
+common platforms signaling payload of zero is best avoided,
+since it might end up being identical to C<+Inf>.
+
+See also L</nan>, L</isnan>, L</getpayload>, and L</issignaling>.
+
 =item C<setpgid>
 
 This is similar to the C function C<setpgid()> for
diff --git a/ext/POSIX/t/export.t b/ext/POSIX/t/export.t
index 91593e0152..553a8a9ebe 100644
--- a/ext/POSIX/t/export.t
+++ b/ext/POSIX/t/export.t
@@ -138,6 +138,10 @@ my %expect = (
             nearbyint nextafter nexttoward remainder remquo rint round scalbn
             signbit tgamma trunc y0 y1 yn strtold
         ),
+        # this stuff was added in 5.23
+        qw(
+            getpayload issignaling setpayload setpayloadsig
+        ),
     ],
 );
 
diff --git a/ext/POSIX/t/math.t b/ext/POSIX/t/math.t
index 7e707532d5..027f8ed544 100644
--- a/ext/POSIX/t/math.t
+++ b/ext/POSIX/t/math.t
@@ -3,6 +3,7 @@
 use strict;
 
 use POSIX ':math_h_c99';
+use POSIX ':nan_payload';
 use Test::More;
 
 use Config;
@@ -69,13 +70,11 @@ sub near {
 }
 
 SKIP: {
-    my $C99_SKIP = 59;
-
     unless ($Config{d_acosh}) {
-        skip "no acosh, suspecting no C99 math", $C99_SKIP;
+        skip "no acosh, suspecting no C99 math";
     }
     if ($^O =~ /Win32|VMS/) {
-        skip "running in $^O, C99 math support uneven", $C99_SKIP;
+        skip "running in $^O, C99 math support uneven";
     }
     near(M_SQRT2, 1.4142135623731, "M_SQRT2", 1e-9);
     near(M_E, 2.71828182845905, "M_E", 1e-9);
@@ -137,8 +136,79 @@ SKIP: {
     near(tgamma(9), 40320, "tgamma 9", 1.5e-7);
     near(lgamma(9), 10.6046029027452, "lgamma 9", 1.5e-7);
 
-    # If adding more tests here, update also the $C99_SKIP
-    # at the beginning of this SKIP block.
+    # These don't work on old mips/hppa platforms because == Inf (or == -Inf).
+    # ok(isnan(setpayload(0)), "setpayload zero");
+    # is(getpayload(setpayload(0)), 0, "setpayload + getpayload (zero)");
+    #
+    # These don't work on most platforms because == Inf (or == -Inf).
+    # ok(isnan(setpayloadsig(0)), "setpayload zero");
+    # is(getpayload(setpayloadsig(0)), 0, "setpayload + getpayload (zero)");
+
+    # Verify that the payload set be setpayload()
+    # (1) still is a nan
+    # (2) but the payload can be retrieved
+    # (3) but is not signaling
+    my $x = 0;
+    setpayload($x, 0x12345);
+    ok(isnan($x), "setpayload + isnan");
+    is(getpayload($x), 0x12345, "setpayload + getpayload");
+    ok(!issignaling($x), "setpayload + issignaling");
+
+    # Verify that the signaling payload set be setpayloadsig()
+    # (1) still is a nan
+    # (2) but the payload can be retrieved
+    # (3) and is signaling
+    setpayloadsig($x, 0x12345);
+    ok(isnan($x), "setpayloadsig + isnan");
+    is(getpayload($x), 0x12345, "setpayload + getpayload");
+    ok(issignaling($x), "setpayloadsig + issignaling");
+
+    # Try a payload more than one byte.
+    is(getpayload(nan(0x12345)), 0x12345, "nan + getpayload");
+
+    # Try payloads of 2^k, most importantly at and beyond 2^32.  These
+    # tests will fail if NV is just 32-bit float, but that Should Not
+    # Happen (tm).
+    is(getpayload(nan(2**31)), 2**31, "nan + getpayload 2**31");
+    is(getpayload(nan(2**32)), 2**32, "nan + getpayload 2**32");
+    is(getpayload(nan(2**33)), 2**33, "nan + getpayload 2**33");
+
+    # Payloads just lower than 2^k.
+    is(getpayload(nan(2**31-1)), 2**31-1, "nan + getpayload 2**31-1");
+    is(getpayload(nan(2**32-1)), 2**32-1, "nan + getpayload 2**32-1");
+
+    # Payloads not divisible by two (and larger than 2**32).
+
+    SKIP: {
+        # solaris gets 10460353202 from getpayload() when it should
+        # get 10460353203 (the 3**21). Things go wrong already in
+        # the nan() payload setting: [0x2, 0x6f7c52b4] (ivsize=4)
+        # instead [0x2, 0x6f7c52b3].  Then at getpayload() things
+        # go wrong again, now in other direction: with the (wrong)
+        # [0x2, 0x6f7c52b4] encoded in the nan we should decode into
+        # 10460353204, but we get 10460353202.  It doesn't seem to
+        # help even if we use 'unsigned long long' instead of UV/U32
+        # in the POSIX.xs:S_setpayload/S_getpayload.
+        #
+        # casting bug?  fmod() bug?  Though also broken with
+        # -Duselongdouble + fmodl(), so maybe Solaris cc bug
+        # in general?
+        #
+        # Ironically, the large prime seems to work even in Solaris,
+        # probably just by blind luck.
+        skip($^O, 1) if $^O eq 'solaris';
+        is(getpayload(nan(3**21)), 3**21, "nan + getpayload 3**21");
+    }
+    is(getpayload(nan(4294967311)), 4294967311, "nan + getpayload prime");
+
+    # Truncates towards zero.
+    is(getpayload(nan(1234.567)), 1234, "nan (trunc) + getpayload");
+
+    # Not signaling.
+    ok(!issignaling(0), "issignaling zero");
+    ok(!issignaling(+Inf), "issignaling +Inf");
+    ok(!issignaling(-Inf), "issignaling -Inf");
+    ok(!issignaling(NaN), "issignaling NaN");
 } # SKIP
 
 done_testing();
author	Jarkko Hietaniemi <jhi@iki.fi>	2015-06-02 18:09:54 -0400
committer	Jarkko Hietaniemi <jhi@iki.fi>	2015-06-12 09:57:41 -0400
commit	07bb61ac4e127de8c4d5c8a40adf7cd36baa6253 (patch)
tree	0326aa311dba0c71eabd3c85080e9249288ba336 /ext
parent	453b60f134ff4079e06d0a4adde0c3f90b71fb2f (diff)
download	perl-07bb61ac4e127de8c4d5c8a40adf7cd36baa6253.tar.gz