From 5279fec40385f5daeb97646409a320e2f621b321 Mon Sep 17 00:00:00 2001
From: zimmerma <zimmerma@211d60ee-9f03-0410-a15a-8952a2c7a4e4>
Date: Tue, 24 Dec 2013 09:45:16 +0000
Subject: improve speed of asin for 1+i*y with tiny y

git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@1402 211d60ee-9f03-0410-a15a-8952a2c7a4e4
---
 src/asin.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/asin.c b/src/asin.c
index e796461..a92421a 100644
--- a/src/asin.c
+++ b/src/asin.c
@@ -1,6 +1,6 @@
 /* mpc_asin -- arcsine of a complex number.
 
-Copyright (C) 2009, 2010, 2011, 2012 INRIA
+Copyright (C) 2009, 2010, 2011, 2012, 2013 INRIA
 
 This file is part of GNU MPC.
 
@@ -20,6 +20,67 @@ along with this program. If not, see http://www.gnu.org/licenses/ .
 
 #include "mpc-impl.h"
 
+/* Special case op = 1 + i*y for tiny y (see algorithms.tex).
+   Return 0 if special formula fails, otherwise put in z1 the approximate
+   value which needs to be converted to rop.
+   z1 is a temporary variable with enough precision.
+ */
+static int
+mpc_asin_special (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd, mpc_ptr z1)
+{
+  mpfr_exp_t ey = mpfr_get_exp (mpc_imagref (op));
+  mpfr_t abs_y;
+  mpfr_prec_t p;
+  int inex;
+
+  /* |Re(asin(1+i*y)) - pi/2| <= y^(1/2) */
+  if (ey >= 0 || ((-ey) / 2 < mpfr_get_prec (mpc_realref (z1))))
+    return 0;
+  if (mpfr_get_prec (mpc_imagref (z1)) > (-ey) + 2) /* p > -ey + 2 */
+    return 0;
+
+  mpfr_const_pi (mpc_realref (z1), MPFR_RNDN);
+  mpfr_div_2exp (mpc_realref (z1), mpc_realref (z1), 1, MPFR_RNDN); /* exact */
+  p = mpfr_get_prec (mpc_realref (z1));
+  /* if z1 has precision p, the error on z1 is 1/2*ulp(z1) = 2^(-p) so far,
+     and since ey <= -2p, then y^(1/2) <= 1/2*ulp(z1) too, thus the total
+     error is bounded by ulp(z1) */
+  if (!mpfr_can_round (mpc_realref(z1), p, MPFR_RNDN, MPFR_RNDZ,
+                       mpfr_get_prec (mpc_realref(rop)) +
+                       (MPC_RND_RE(rnd) == MPFR_RNDN)))
+    return 0;
+  
+  /* |Im(asin(1+i*y)) - y^(1/2)| <= (1/12) * y^(3/2) for y >= 0 (err >= 0)
+     |Im(asin(1-i*y)) + y^(1/2)| <= (1/12) * y^(3/2) for y >= 0 (err <= 0) */
+  abs_y[0] = mpc_imagref (op)[0];
+  if (mpfr_signbit (mpc_imagref (op)))
+    MPFR_CHANGE_SIGN (abs_y);
+  inex = mpfr_sqrt (mpc_imagref (z1), abs_y, MPFR_RNDN); /* error <= 1/2 ulp */
+  if (mpfr_signbit (mpc_imagref (op)))
+    MPFR_CHANGE_SIGN (mpc_imagref (z1));
+  /* If z1 has precision p, the error on z1 is 1/2*ulp(z1) = 2^(-p) so far,
+     and (1/12) * y^(3/2) <= (1/8) * y * y^(1/2) <= 2^(ey-3)*2^p*ulp(y^(1/2))
+     thus for p+ey-3 <= -1 we have (1/12) * y^(3/2) <= (1/2) * ulp(y^(1/2)),
+     and the total error is bounded by ulp(z1).
+     Note: if y^(1/2) is exactly representable, and ends with many zeroes,
+     then mpfr_can_round below will fail; however in that case we know that
+     Im(asin(1+i*y)) is away from +/-y^(1/2) wrt zero. */
+  if (inex == 0) /* enlarge im(z1) so that the inexact flag is correct */
+    {
+      if (mpfr_signbit (mpc_imagref (op)))
+        mpfr_nextbelow (mpc_imagref (z1));
+      else
+        mpfr_nextabove (mpc_imagref (z1));
+      return 1;
+    }
+  p = mpfr_get_prec (mpc_imagref (z1));
+  if (!mpfr_can_round (mpc_imagref(z1), p - 1, MPFR_RNDA, MPFR_RNDZ,
+                      mpfr_get_prec (mpc_imagref(rop)) +
+                      (MPC_RND_IM(rnd) == MPFR_RNDN)))
+    return 0;
+  return 1;
+}
+
 int
 mpc_asin (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
 {
@@ -153,6 +214,10 @@ mpc_asin (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
     mpfr_set_prec (mpc_realref(z1), p);
     mpfr_set_prec (mpc_imagref(z1), p);
 
+    /* try special code for 1+i*y with tiny y */
+    if (loop == 1 && mpc_asin_special (rop, op, rnd, z1))
+      break;
+
     /* z1 <- z^2 */
     mpc_sqr (z1, op, MPC_RNDNN);
     /* err(x) <= 1/2 ulp(x), err(y) <= 1/2 ulp(y) */
-- 
cgit v1.2.1