Improve speed of classify_float

As suggested in the discussion of GPR#272: - Do not go through fpclassify() (speedup: 2 to 3) - Add 64-bit variant of the code (additional speedup: 10%-20%)
author: Xavier Leroy <xavier.leroy@inria.fr> 2015-11-15 16:43:46 +0100
committer: Xavier Leroy <xavier.leroy@inria.fr> 2015-11-15 16:43:46 +0100
commit: e7f339e6bdb34408babcbe1745c8c0ad6744d125 (patch)
tree: b7b712eb9216817e8735480f3eb502ed702908d7
parent: 50648ed2b741c21e64dcc3cb82b926f0b320338d (diff)
download: ocaml-e7f339e6bdb34408babcbe1745c8c0ad6744d125.tar.gz
2 files changed, 18 insertions, 20 deletions
diff --git a/Changes b/Changes
index b673fe96b8..f7f0bf7422 100644
--- a/Changes
+++ b/Changes
@@ -149,6 +149,8 @@ Standard library:
 - GPR#265: new implementation of Queue avoiding Obj.magic
   (Jérémie Dimino)
 - GPR#272: Switch classify_float to [@@unboxed] (Alain Frisch)
+- Improve speed of classify_float by not going through fpclassify()
+  (Alain Frisch, Xavier Leroy)
 - GPR#277: Switch the following externals to [@@unboxed]:
   * {Nativeint,Int32,Int64}.{of,to}_float
   * Int{32,64}.float_of_bits
diff --git a/byterun/floats.c b/byterun/floats.c
index 4fa575ef3b..91af6c13dd 100644
--- a/byterun/floats.c
+++ b/byterun/floats.c
@@ -463,27 +463,23 @@ enum { FP_normal, FP_subnormal, FP_zero, FP_infinite, FP_nan };
 
 value caml_classify_float_unboxed(double vd)
 {
-  /* Cygwin 1.3 has problems with fpclassify (PR#1293), so don't use it */
-  /* FIXME Cygwin 1.3 is ancient! Revisit this decision. */
-
-  /* Informal benchmarking (see GPR#272) suggests that the emulation
-     version is faster than calling the libc.  We could switch to it,
-     and also provide an even faster version for 64-bit systems as
-     suggested by XL.  -- AF */
-
-#if defined(fpclassify) && !defined(__CYGWIN__) && !defined(__MINGW32__)
-  switch (fpclassify(vd)) {
-  case FP_NAN:
-    return Val_int(FP_nan);
-  case FP_INFINITE:
-    return Val_int(FP_infinite);
-  case FP_ZERO:
-    return Val_int(FP_zero);
-  case FP_SUBNORMAL:
-    return Val_int(FP_subnormal);
-  default: /* case FP_NORMAL */
-    return Val_int(FP_normal);
+#ifdef ARCH_SIXTYFOUR
+  union { double d; uint64_t i; } u;
+  uint64_t n;
+  uint32_t e;
+
+  u.d = vd;
+  n = u.i << 1;                 /* shift sign bit off */
+  if (n == 0) return Val_int(FP_zero);
+  e = n >> 53;                  /* extract exponent */
+  if (e == 0) return Val_int(FP_subnormal);
+  if (e == 0x7FF) {
+    if (n << 11 == 0)           /* shift exponent off */
+      return Val_int(FP_infinite);
+    else
+      return Val_int(FP_nan);
   }
+  return Val_int(FP_normal);
 #else
   union double_as_two_int32 u;
   uint32_t h, l;
author	Xavier Leroy <xavier.leroy@inria.fr>	2015-11-15 16:43:46 +0100
committer	Xavier Leroy <xavier.leroy@inria.fr>	2015-11-15 16:43:46 +0100
commit	e7f339e6bdb34408babcbe1745c8c0ad6744d125 (patch)
tree	b7b712eb9216817e8735480f3eb502ed702908d7
parent	50648ed2b741c21e64dcc3cb82b926f0b320338d (diff)
download	ocaml-e7f339e6bdb34408babcbe1745c8c0ad6744d125.tar.gz