1 files changed, 1622 insertions, 1578 deletions
diff --git a/libgcc/config/libbid/bid128_round_integral.c b/libgcc/config/libbid/bid128_round_integral.c
index a9952a9bc8b..d517095a770 100644
--- a/libgcc/config/libbid/bid128_round_integral.c
+++ b/libgcc/config/libbid/bid128_round_integral.c
@@ -34,997 +34,340 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  *  BID128_round_integral_exact
  ****************************************************************************/
 
-BID128_FUNCTION_ARG1(__bid128_round_integral_exact, x)
+BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x)
 
-  UINT128 res = {{ 0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull }};
-  UINT64 x_sign;
-  UINT64 x_exp;
-  int exp;			// unbiased exponent
+     UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
+     };
+UINT64 x_sign;
+UINT64 x_exp;
+int exp;			// unbiased exponent
   // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
-  UINT64 tmp64;
-  BID_UI64DOUBLE tmp1;
-  unsigned int x_nr_bits;
-  int q, ind, shift;
-  UINT128 C1;
-  UINT256 fstar;
-  UINT256 P256;
+UINT64 tmp64;
+BID_UI64DOUBLE tmp1;
+unsigned int x_nr_bits;
+int q, ind, shift;
+UINT128 C1;
+UINT256 fstar;
+UINT256 P256;
 
   // check for NaN or Infinity
-  if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
-    // x is special
-    if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
-      if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
-	// set invalid flag
-	*pfpsf |= INVALID_EXCEPTION;
-	// return Quiet (SNaN)
-	res.w[1] = x.w[1] & 0xfdffffffffffffffull;
-	res.w[0] = x.w[0];
-      } else {	// x is QNaN
-	// return the input QNaN
-	res.w[1] = x.w[1];
-	res.w[0] = x.w[0];
-      }
-      BID_RETURN (res);
-    } else {	// x is not a NaN, so it must be infinity
-      if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
-	// return +inf
-	res.w[1] = 0x7800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      } else {	// x is -inf 
-	// return -inf
-	res.w[1] = 0xf800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      }
-      BID_RETURN (res);
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+  // x is special
+  if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
+    // if x = NaN, then res = Q (x)
+    // check first for non-canonical NaN payload
+    if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+	(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+	 (x.w[0] > 0x38c15b09ffffffffull))) {
+      x.w[1] = x.w[1] & 0xffffc00000000000ull;
+      x.w[0] = 0x0ull;
+    }
+    if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
+      // set invalid flag
+      *pfpsf |= INVALID_EXCEPTION;
+      // return quiet (x)
+      res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
+      res.w[0] = x.w[0];
+    } else {	// x is QNaN
+      // return x
+      res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
+      res.w[0] = x.w[0];
+    }
+    BID_RETURN (res)
+  } else {	// x is not a NaN, so it must be infinity
+    if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
+      // return +inf
+      res.w[1] = 0x7800000000000000ull;
+      res.w[0] = 0x0000000000000000ull;
+    } else {	// x is -inf 
+      // return -inf
+      res.w[1] = 0xf800000000000000ull;
+      res.w[0] = 0x0000000000000000ull;
     }
+    BID_RETURN (res);
   }
+}
   // unpack x
-  x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
-  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bit positions
-  C1.w[1] = x.w[1] & MASK_COEFF;
-  C1.w[0] = x.w[0];
-
-  // test for non-canonical values:
-  // - values whose encoding begins with x00, x01, or x10 and whose
-  //   coefficient is larger than 10^34 -1, or
-  // - values whose encoding begins with x1100, x1101, x1110 (if NaNs
-  //   and infinitis were eliminated already this test is reduced to
-  //   checking for x10x)
+x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
 
-  // test for non-canonical values of the argument x
-  if ((((C1.w[1] > 0x0001ed09bead87c0ull) || 
-      ((C1.w[1] == 0x0001ed09bead87c0ull) && 
-      (C1.w[0] > 0x378d8e63ffffffffull))) && 
-      ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull)) || 
-      ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) {
-    x.w[1] = x.w[1] & 0x8000000000000000ull;	// preserve the sign bit
-    x.w[0] = 0;
-    x_exp = 0x1820ull << 49;
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
+  C1.w[1] = 0;	// significand high
+  C1.w[0] = 0;	// significand low
+} else {	// G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
     C1.w[1] = 0;
     C1.w[0] = 0;
+  } else {	// canonical
+    ;
   }
+}
+
   // test for input equal to zero
-  if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
-    // x is 0
-    // return 0 preserving the sign bit and the preferred exponent
-    // of MAX(Q(x), 0)
-    if (x_exp <= (0x1820ull << 49)) {
-      res.w[1] =
-	(x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
-    } else {
-      res.w[1] = x.w[1] & 0xfffe000000000000ull;
-    }
-    res.w[0] = 0x0000000000000000ull;
-    BID_RETURN (res);
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
   }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
   // x is not special and is not zero
 
-  switch (rnd_mode) {
-  case ROUNDING_TO_NEAREST:
-  case ROUNDING_TIES_AWAY:
-    // if (exp <= -(p+1)) return 0.0
-    if (x_exp <= 0x2ffa000000000000ull) {	// 0x2ffa000000000000ull == -35
-      res.w[1] = x_sign | 0x3040000000000000ull;
-      res.w[0] = 0x0000000000000000ull;
-      *pfpsf |= INEXACT_EXCEPTION;
-      BID_RETURN (res);
-    }
-    break;
-  case ROUNDING_DOWN:
-    // if (exp <= -p) return -1.0 or +0.0
-    if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffa000000000000ull == -34
-      if (x_sign) {
-	// if negative, return negative 1, because we know coefficient
-	// is non-zero (would have been caught above)
-	res.w[1] = 0xb040000000000000ull;
-	res.w[0] = 0x0000000000000001ull;
-      } else {
-	// if positive, return positive 0, because we know coefficient is
-	// non-zero (would have been caught above)
-	res.w[1] = 0x3040000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      }
-      *pfpsf |= INEXACT_EXCEPTION;
-      BID_RETURN (res);
-    }
-    break;
-  case ROUNDING_UP:
-    // if (exp <= -p) return -0.0 or +1.0
-    if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
-      if (x_sign) {
-	// if negative, return negative 0, because we know the coefficient
-	// is non-zero (would have been caught above)
-	res.w[1] = 0xb040000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      } else {
-	// if positive, return positive 1, because we know coefficient is
-	// non-zero (would have been caught above)
-	res.w[1] = 0x3040000000000000ull;
-	res.w[0] = 0x0000000000000001ull;
-      }
-      *pfpsf |= INEXACT_EXCEPTION;
-      BID_RETURN (res);
-    }
-    break;
-  case ROUNDING_TO_ZERO:
-    // if (exp <= -p) return -0.0 or +0.0
-    if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
-      res.w[1] = x_sign | 0x3040000000000000ull;
-      res.w[0] = 0x0000000000000000ull;
-      *pfpsf |= INEXACT_EXCEPTION;
-      BID_RETURN (res);
-    }
-    break;
-  }
-
-  // q = nr. of decimal digits in x
-  //  determine first the nr. of bits in x
-  if (C1.w[1] == 0) {
-    if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
-      // split the 64-bit value in two 32-bit halves to avoid rounding errors
-      if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
-	tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
-	x_nr_bits =
-	  33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      } else {	// x < 2^32
-	tmp1.d = (double) (C1.w[0]);	// exact conversion
-	x_nr_bits =
-	  1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      }
-    } else {	// if x < 2^53
-      tmp1.d = (double) C1.w[0];	// exact conversion
-      x_nr_bits =
-	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-    }
-  } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
-    tmp1.d = (double) C1.w[1];	// exact conversion
-    x_nr_bits =
-      65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-  }
-  q = __bid_nr_digits[x_nr_bits - 1].digits;
-  if (q == 0) {
-    q = __bid_nr_digits[x_nr_bits - 1].digits1;
-    if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	|| (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	&& C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo))
-      q++;
-  }
-  exp = (x_exp >> 49) - 6176;
-  if (exp >= 0) {	// -exp <= 0
-    // the argument is an integer already
-    res.w[1] = x.w[1];
-    res.w[0] = x.w[0];
+switch (rnd_mode) {
+case ROUNDING_TO_NEAREST:
+case ROUNDING_TIES_AWAY:
+  // if (exp <= -(p+1)) return 0.0
+  if (x_exp <= 0x2ffa000000000000ull) {	// 0x2ffa000000000000ull == -35
+    res.w[1] = x_sign | 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
-  // exp < 0
-  switch (rnd_mode) {
-  case ROUNDING_TO_NEAREST:
-    if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
-      // need to shift right -exp digits from the coefficient; exp will be 0
-      ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
-      // chop off ind digits from the lower part of C1 
-      // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
-      tmp64 = C1.w[0];
-      if (ind <= 19) {
-	C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
-      } else {
-	C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-	C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
-      }
-      if (C1.w[0] < tmp64)
-	C1.w[1]++;
-      // calculate C* and f*
-      // C* is actually floor(C*) in this case
-      // C* and f* need shifting and masking, as shown by
-      // __bid_shiftright128[] and __bid_maskhigh128[]
-      // 1 <= x <= 34
-      // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
-      // C* = (C1 + 1/2 * 10^x) * 10^(-x)
-      // the approximation of 10^(-x) was rounded up to 118 bits
-      __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
-      // determine the value of res and fstar
-
-      // determine inexactness of the rounding of C*
-      // if (0 < f* - 1/2 < 10^(-x)) then
-      //   the result is exact
-      // else // if (f* - 1/2 > T*) then
-      //   the result is inexact
-      // Note: we are going to use __bid_ten2mk128[] instead of __bid_ten2mk128trunc[]
-
-      if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
-	// redundant shift = __bid_shiftright128[ind - 1]; // shift = 0
-	res.w[1] = P256.w[3];
-	res.w[0] = P256.w[2];
-	// redundant fstar.w[3] = 0;
-	// redundant fstar.w[2] = 0;
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* < 10^(-x) <=> midpoint
-	// f* is in the right position to be compared with
-	// 10^(-x) from __bid_ten2mk128[]
-	// if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
-	if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
-	    ((fstar.w[1] < (__bid_ten2mk128[ind - 1].w[1]))
-	     || ((fstar.w[1] == __bid_ten2mk128[ind - 1].w[1])
-		 && (fstar.w[0] < __bid_ten2mk128[ind - 1].w[0])))) {
-	  // subract 1 to make even
-	  if (res.w[0]-- == 0) {
-	    res.w[1]--;
-	  }
-	}
-	if (fstar.w[1] > 0x8000000000000000ull || 
-            (fstar.w[1] == 0x8000000000000000ull && fstar.w[0] > 0x0ull)) {
-          // f* > 1/2 and the result may be exact
-	  tmp64 = fstar.w[1] - 0x8000000000000000ull;	// f* - 1/2
-	  if ((tmp64 > __bid_ten2mk128[ind - 1].w[1]
-	       || (tmp64 == __bid_ten2mk128[ind - 1].w[1]
-	       && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) {
-	    // set the inexact flag
-	    *pfpsf |= INEXACT_EXCEPTION;
-	  }	// else the result is exact 
-	} else {	// the result is inexact; f2* <= 1/2  
-	  // set the inexact flag 
-	  *pfpsf |= INEXACT_EXCEPTION;
-	}
-      } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-	shift = __bid_shiftright128[ind - 1];	// 3 <= shift <= 63
-	res.w[1] = (P256.w[3] >> shift);
-	res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
-	// redundant fstar.w[3] = 0;
-	fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* < 10^(-x) <=> midpoint
-	// f* is in the right position to be compared with
-	// 10^(-x) from __bid_ten2mk128[]
-	if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
-	    fstar.w[2] == 0 && (fstar.w[1] < __bid_ten2mk128[ind - 1].w[1]
-				|| (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-				&& fstar.w[0] < __bid_ten2mk128[ind - 1].w[0]))) {
-	  // subract 1 to make even
-	  if (res.w[0]-- == 0) {
-	    res.w[1]--;
-	  }
-	}
-	if (fstar.w[2] > __bid_one_half128[ind - 1]
-	    || (fstar.w[2] == __bid_one_half128[ind - 1]
-	    && (fstar.w[1] || fstar.w[0]))) {
-	  // f2* > 1/2 and the result may be exact
-	  // Calculate f2* - 1/2
-	  tmp64 = fstar.w[2] - __bid_one_half128[ind - 1];
-	  if (tmp64 || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	      || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	      && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	    // set the inexact flag
-	    *pfpsf |= INEXACT_EXCEPTION;
-	  }	// else the result is exact
-	} else {	// the result is inexact; f2* <= 1/2
-	  // set the inexact flag
-	  *pfpsf |= INEXACT_EXCEPTION;
-	}
-      } else {	// 22 <= ind - 1 <= 33
-	shift = __bid_shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
-	res.w[1] = 0;
-	res.w[0] = P256.w[3] >> shift;
-	fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1];
-	fstar.w[2] = P256.w[2];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* < 10^(-x) <=> midpoint
-	// f* is in the right position to be compared with
-	// 10^(-x) from __bid_ten2mk128[]
-	if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
-	    fstar.w[3] == 0 && fstar.w[2] == 0
-	    && (fstar.w[1] < __bid_ten2mk128[ind - 1].w[1]
-		|| (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-		&& fstar.w[0] < __bid_ten2mk128[ind - 1].w[0]))) {
-	  // subract 1 to make even
-	  if (res.w[0]-- == 0) {
-	    res.w[1]--;
-	  }
-	}
-	if (fstar.w[3] > __bid_one_half128[ind - 1]
-	    || (fstar.w[3] == __bid_one_half128[ind - 1]
-	    && (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
-	  // f2* > 1/2 and the result may be exact
-	  // Calculate f2* - 1/2
-	  tmp64 = fstar.w[3] - __bid_one_half128[ind - 1];
-	  if (tmp64 || fstar.w[2]
-	      || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	      || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	      && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	    // set the inexact flag
-	    *pfpsf |= INEXACT_EXCEPTION;
-	  }	// else the result is exact
-	} else {	// the result is inexact; f2* <= 1/2
-	  // set the inexact flag
-	  *pfpsf |= INEXACT_EXCEPTION;
-	}
-      }
-      res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
-      BID_RETURN (res);
-    } else {	// if ((q + exp) < 0) <=> q < -exp
-      // the result is +0 or -0
-      res.w[1] = x_sign | 0x3040000000000000ull;
-      res.w[0] = 0x0000000000000000ull;
-      *pfpsf |= INEXACT_EXCEPTION;
-      BID_RETURN (res);
-    }
-    break;
-  case ROUNDING_TIES_AWAY:
-    if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
-      // need to shift right -exp digits from the coefficient; exp will be 0
-      ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
-      // chop off ind digits from the lower part of C1 
-      // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
-      tmp64 = C1.w[0];
-      if (ind <= 19) {
-	C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
-      } else {
-	C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-	C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
-      }
-      if (C1.w[0] < tmp64)
-	C1.w[1]++;
-      // calculate C* and f*
-      // C* is actually floor(C*) in this case
-      // C* and f* need shifting and masking, as shown by
-      // __bid_shiftright128[] and __bid_maskhigh128[]
-      // 1 <= x <= 34
-      // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
-      // C* = (C1 + 1/2 * 10^x) * 10^(-x)
-      // the approximation of 10^(-x) was rounded up to 118 bits
-      __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
-      // the top Ex bits of 10^(-x) are T* = __bid_ten2mk128trunc[ind], e.g.
-      // if x=1, T*=__bid_ten2mk128trunc[0]=0x19999999999999999999999999999999
-      // if (0 < f* < 10^(-x)) then the result is a midpoint
-      //   if floor(C*) is even then C* = floor(C*) - logical right
-      //       shift; C* has p decimal digits, correct by Prop. 1)
-      //   else if floor(C*) is odd C* = floor(C*)-1 (logical right
-      //       shift; C* has p decimal digits, correct by Pr. 1)
-      // else
-      //   C* = floor(C*) (logical right shift; C has p decimal digits,
-      //       correct by Property 1)
-      // n = C* * 10^(e+x)
-
-      // determine also the inexactness of the rounding of C*
-      // if (0 < f* - 1/2 < 10^(-x)) then
-      //   the result is exact
-      // else // if (f* - 1/2 > T*) then
-      //   the result is inexact
-      // Note: we are going to use __bid_ten2mk128[] instead of __bid_ten2mk128trunc[]
-      // shift right C* by Ex-128 = __bid_shiftright128[ind]
-      if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
-	// redundant shift = __bid_shiftright128[ind - 1]; // shift = 0
-	res.w[1] = P256.w[3];
-	res.w[0] = P256.w[2];
-	// redundant fstar.w[3] = 0;
-	// redundant fstar.w[2] = 0;
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	if (fstar.w[1] > 0x8000000000000000ull || 
-            (fstar.w[1] == 0x8000000000000000ull && fstar.w[0] > 0x0ull)) {
-          // f* > 1/2 and the result may be exact
-	  tmp64 = fstar.w[1] - 0x8000000000000000ull;	// f* - 1/2
-	  if ((tmp64 > __bid_ten2mk128[ind - 1].w[1]
-	       || (tmp64 == __bid_ten2mk128[ind - 1].w[1]
-	       && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) {
-	    // set the inexact flag
-	    *pfpsf |= INEXACT_EXCEPTION;
-	  }	// else the result is exact
-	} else {	// the result is inexact; f2* <= 1/2
-	  // set the inexact flag
-	  *pfpsf |= INEXACT_EXCEPTION;
-	}
-      } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-	shift = __bid_shiftright128[ind - 1];	// 3 <= shift <= 63
-	res.w[1] = (P256.w[3] >> shift);
-	res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
-	// redundant fstar.w[3] = 0;
-	fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	if (fstar.w[2] > __bid_one_half128[ind - 1]
-	    || (fstar.w[2] == __bid_one_half128[ind - 1]
-	    && (fstar.w[1] || fstar.w[0]))) {
-	  // f2* > 1/2 and the result may be exact
-	  // Calculate f2* - 1/2
-	  tmp64 = fstar.w[2] - __bid_one_half128[ind - 1];
-	  if (tmp64 || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	      || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	      && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	    // set the inexact flag
-	    *pfpsf |= INEXACT_EXCEPTION;
-	  }	// else the result is exact
-	} else {	// the result is inexact; f2* <= 1/2
-	  // set the inexact flag
-	  *pfpsf |= INEXACT_EXCEPTION;
-	}
-      } else {	// 22 <= ind - 1 <= 33
-	shift = __bid_shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
-	res.w[1] = 0;
-	res.w[0] = P256.w[3] >> shift;
-	fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1];
-	fstar.w[2] = P256.w[2];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	if (fstar.w[3] > __bid_one_half128[ind - 1]
-	    || (fstar.w[3] == __bid_one_half128[ind - 1]
-	    && (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
-	  // f2* > 1/2 and the result may be exact
-	  // Calculate f2* - 1/2
-	  tmp64 = fstar.w[3] - __bid_one_half128[ind - 1];
-	  if (tmp64 || fstar.w[2]
-	      || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	      || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	      && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	    // set the inexact flag
-	    *pfpsf |= INEXACT_EXCEPTION;
-	  }	// else the result is exact
-	} else {	// the result is inexact; f2* <= 1/2
-	  // set the inexact flag
-	  *pfpsf |= INEXACT_EXCEPTION;
-	}
-      }
-      // if the result was a midpoint, it was already rounded away from zero
-      res.w[1] |= x_sign | 0x3040000000000000ull;
-      BID_RETURN (res);
-    } else {	// if ((q + exp) < 0) <=> q < -exp
-      // the result is +0 or -0
-      res.w[1] = x_sign | 0x3040000000000000ull;
-      res.w[0] = 0x0000000000000000ull;
-      *pfpsf |= INEXACT_EXCEPTION;
-      BID_RETURN (res);
-    }
-    break;
-  case ROUNDING_DOWN:
-    if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
-      // need to shift right -exp digits from the coefficient; exp will be 0
-      ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x' 
-      // (number of digits to be chopped off)
-      // chop off ind digits from the lower part of C1 
-      // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
-      // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
-      // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
-      // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
-      // tmp64 = C1.w[0];
-      // if (ind <= 19) {
-      //   C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
-      // } else {
-      //   C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-      //   C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
-      // }
-      // if (C1.w[0] < tmp64) C1.w[1]++;
-      // if carry-out from C1.w[0], increment C1.w[1]
-      // calculate C* and f*
-      // C* is actually floor(C*) in this case
-      // C* and f* need shifting and masking, as shown by
-      // __bid_shiftright128[] and __bid_maskhigh128[]
-      // 1 <= x <= 34
-      // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
-      // C* = (C1 + 1/2 * 10^x) * 10^(-x)
-      // the approximation of 10^(-x) was rounded up to 118 bits
-      __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
-      if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
-	res.w[1] = P256.w[3];
-	res.w[0] = P256.w[2];
-	// redundant fstar.w[3] = 0;
-	// redundant fstar.w[2] = 0;
-	// redundant fstar.w[1] = P256.w[1];
-	// redundant fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with
-	// 10^(-x) from __bid_ten2mk128[]
-	if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1])
-	    || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1]
-		&& (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) {
-	  *pfpsf |= INEXACT_EXCEPTION;
-	  // if positive, the truncated value is already the correct result
-	  if (x_sign) {	// if negative
-	    if (++res.w[0] == 0) {
-	      res.w[1]++;
-	    }
-	  }
-	}
-      } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-	shift = __bid_shiftright128[ind - 1];	// 0 <= shift <= 102
-	res.w[1] = (P256.w[3] >> shift);
-	res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
-	// redundant fstar.w[3] = 0;
-	fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	  *pfpsf |= INEXACT_EXCEPTION;
-	  // if positive, the truncated value is already the correct result
-	  if (x_sign) {	// if negative
-	    if (++res.w[0] == 0) {
-	      res.w[1]++;
-	    }
-	  }
-	}
-      } else {	// 22 <= ind - 1 <= 33
-	shift = __bid_shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
-	res.w[1] = 0;
-	res.w[0] = P256.w[3] >> shift;
-	fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1];
-	fstar.w[2] = P256.w[2];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[3] || fstar.w[2]
-	    || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	  *pfpsf |= INEXACT_EXCEPTION;
-	  // if positive, the truncated value is already the correct result
-	  if (x_sign) {	// if negative
-	    if (++res.w[0] == 0) {
-	      res.w[1]++;
-	    }
-	  }
-	}
-      }
-      res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
-      BID_RETURN (res);
-    } else {	// if exp < 0 and q + exp <= 0
-      if (x_sign) {	// negative rounds down to -1.0
-	res.w[1] = 0xb040000000000000ull;
-	res.w[0] = 0x0000000000000001ull;
-      } else {	// positive rpunds down to +0.0
-	res.w[1] = 0x3040000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      }
-      *pfpsf |= INEXACT_EXCEPTION;
-      BID_RETURN (res);
-    }
-    break;
-  case ROUNDING_UP:
-    if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
-      // need to shift right -exp digits from the coefficient; exp will be 0
-      ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x' 
-      // (number of digits to be chopped off)
-      // chop off ind digits from the lower part of C1 
-      // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
-      // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
-      // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
-      // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
-      // tmp64 = C1.w[0];
-      // if (ind <= 19) {
-      //   C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
-      // } else {
-      //   C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-      //   C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
-      // }
-      // if (C1.w[0] < tmp64) C1.w[1]++;  
-      // if carry-out from C1.w[0], increment C1.w[1]
-      // calculate C* and f*
-      // C* is actually floor(C*) in this case
-      // C* and f* need shifting and masking, as shown by
-      // __bid_shiftright128[] and __bid_maskhigh128[]
-      // 1 <= x <= 34
-      // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
-      // C* = C1 * 10^(-x)
-      // the approximation of 10^(-x) was rounded up to 118 bits
-      __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
-      if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
-	res.w[1] = P256.w[3];
-	res.w[0] = P256.w[2];
-	// redundant fstar.w[3] = 0;
-	// redundant fstar.w[2] = 0;
-	// redundant fstar.w[1] = P256.w[1]; 
-	// redundant fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with 
-	// 10^(-x) from __bid_ten2mk128[]
-	if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1])
-	    || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1]
-		&& (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) {
-	  *pfpsf |= INEXACT_EXCEPTION;
-	  // if negative, the truncated value is already the correct result
-	  if (!x_sign) {	// if positive
-	    if (++res.w[0] == 0) {
-	      res.w[1]++;
-	    }
-	  }
-	}
-      } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-	shift = __bid_shiftright128[ind - 1];	// 3 <= shift <= 63
-	res.w[1] = (P256.w[3] >> shift);
-	res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
-	// redundant fstar.w[3] = 0;
-	fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with 
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	  *pfpsf |= INEXACT_EXCEPTION;
-	  // if negative, the truncated value is already the correct result
-	  if (!x_sign) {	// if positive
-	    if (++res.w[0] == 0) {
-	      res.w[1]++;
-	    }
-	  }
-	}
-      } else {	// 22 <= ind - 1 <= 33
-	shift = __bid_shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
-	res.w[1] = 0;
-	res.w[0] = P256.w[3] >> shift;
-	fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1];
-	fstar.w[2] = P256.w[2];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with 
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[3] || fstar.w[2]
-	    || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	  *pfpsf |= INEXACT_EXCEPTION;
-	  // if negative, the truncated value is already the correct result
-	  if (!x_sign) {	// if positive
-	    if (++res.w[0] == 0) {
-	      res.w[1]++;
-	    }
-	  }
-	}
-      }
-      res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
-      BID_RETURN (res);
-    } else {	// if exp < 0 and q + exp <= 0
-      if (x_sign) {	// negative rounds up to -0.0
-	res.w[1] = 0xb040000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      } else {	// positive rpunds up to +1.0
-	res.w[1] = 0x3040000000000000ull;
-	res.w[0] = 0x0000000000000001ull;
-      }
-      *pfpsf |= INEXACT_EXCEPTION;
-      BID_RETURN (res);
-    }
-    break;
-  case ROUNDING_TO_ZERO:
-    if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
-      // need to shift right -exp digits from the coefficient; exp will be 0
-      ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
-      // (number of digits to be chopped off)
-      // chop off ind digits from the lower part of C1 
-      // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
-      // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
-      // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
-      // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
-      //tmp64 = C1.w[0];
-      // if (ind <= 19) {
-      //   C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
-      // } else {
-      //   C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-      //   C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
-      // }
-      // if (C1.w[0] < tmp64) C1.w[1]++;  
-      // if carry-out from C1.w[0], increment C1.w[1]
-      // calculate C* and f*
-      // C* is actually floor(C*) in this case
-      // C* and f* need shifting and masking, as shown by
-      // __bid_shiftright128[] and __bid_maskhigh128[]
-      // 1 <= x <= 34
-      // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
-      // C* = (C1 + 1/2 * 10^x) * 10^(-x)
-      // the approximation of 10^(-x) was rounded up to 118 bits
-      __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
-      if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
-	res.w[1] = P256.w[3];
-	res.w[0] = P256.w[2];
-	// redundant fstar.w[3] = 0;
-	// redundant fstar.w[2] = 0;
-	// redundant fstar.w[1] = P256.w[1]; 
-	// redundant fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with 
-	// 10^(-x) from __bid_ten2mk128[]
-	if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1])
-	    || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1]
-		&& (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) {
-	  *pfpsf |= INEXACT_EXCEPTION;
-	}
-      } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-	shift = __bid_shiftright128[ind - 1];	// 3 <= shift <= 63
-	res.w[1] = (P256.w[3] >> shift);
-	res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
-	// redundant fstar.w[3] = 0;
-	fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with 
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	  *pfpsf |= INEXACT_EXCEPTION;
-	}
-      } else {	// 22 <= ind - 1 <= 33
-	shift = __bid_shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
-	res.w[1] = 0;
-	res.w[0] = P256.w[3] >> shift;
-	fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1];
-	fstar.w[2] = P256.w[2];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with 
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[3] || fstar.w[2]
-	    || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
-	  *pfpsf |= INEXACT_EXCEPTION;
-	}
-      }
-      res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
-      BID_RETURN (res);
-    } else {	// if exp < 0 and q + exp <= 0 the result is +0 or -0
-      res.w[1] = x_sign | 0x3040000000000000ull;
+  break;
+case ROUNDING_DOWN:
+  // if (exp <= -p) return -1.0 or +0.0
+  if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffa000000000000ull == -34
+    if (x_sign) {
+      // if negative, return negative 1, because we know coefficient
+      // is non-zero (would have been caught above)
+      res.w[1] = 0xb040000000000000ull;
+      res.w[0] = 0x0000000000000001ull;
+    } else {
+      // if positive, return positive 0, because we know coefficient is
+      // non-zero (would have been caught above)
+      res.w[1] = 0x3040000000000000ull;
       res.w[0] = 0x0000000000000000ull;
-      *pfpsf |= INEXACT_EXCEPTION;
-      BID_RETURN (res);
     }
-    break;
-  }
-  BID_RETURN (res);
-}
-
-/*****************************************************************************
- *  BID128_round_integral_nearest_even
- ****************************************************************************/
-
-BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_nearest_even, x)
-
-  UINT128 res;
-  UINT64 x_sign;
-  UINT64 x_exp;
-  int exp;			// unbiased exponent
-  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
-  UINT64 tmp64;
-  BID_UI64DOUBLE tmp1;
-  unsigned int x_nr_bits;
-  int q, ind, shift;
-  UINT128 C1;
-  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits
-  UINT256 fstar;
-  UINT256 P256;
-
-  // check for NaN or Infinity
-  if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
-    // x is special
-    if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
-      if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
-	// set invalid flag
-	*pfpsf |= INVALID_EXCEPTION;
-	// return Quiet (SNaN)
-	res.w[1] = x.w[1] & 0xfdffffffffffffffull;
-	res.w[0] = x.w[0];
-      } else {	// x is QNaN
-	// return the input QNaN
-	res.w[1] = x.w[1];
-	res.w[0] = x.w[0];
-      }
-      BID_RETURN (res);
-    } else {	// x is not a NaN, so it must be infinity
-      if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
-	// return +inf
-	res.w[1] = 0x7800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      } else {	// x is -inf 
-	// return -inf
-	res.w[1] = 0xf800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      }
-      BID_RETURN (res);
-    }
-  }
-  // unpack x
-  x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
-  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bit positions
-  C1.w[1] = x.w[1] & MASK_COEFF;
-  C1.w[0] = x.w[0];
-
-  // test for non-canonical values:
-  // - values whose encoding begins with x00, x01, or x10 and whose
-  //   coefficient is larger than 10^34 -1, or
-  // - values whose encoding begins with x1100, x1101, x1110 (if NaNs
-  //   and infinitis were eliminated already this test is reduced to
-  //   checking for x10x)
-
-  // test for non-canonical values of the argument x
-  if ((((C1.w[1] > 0x0001ed09bead87c0ull)
-       || ((C1.w[1] == 0x0001ed09bead87c0ull)
-	   && (C1.w[0] > 0x378d8e63ffffffffull)))
-      && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull))
-      || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) {
-    x.w[1] = x.w[1] & 0x8000000000000000ull;	// preserve the sign bit
-    x.w[0] = 0;
-    x_exp = 0x1820ull << 49;
-    C1.w[1] = 0;
-    C1.w[0] = 0;
+    *pfpsf |= INEXACT_EXCEPTION;
+    BID_RETURN (res);
   }
-  // test for input equal to zero
-  if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
-    // x is 0
-    // return 0 preserving the sign bit and the preferred exponent
-    // of MAX(Q(x), 0)
-    if (x_exp <= (0x1820ull << 49)) {
-      res.w[1] =
-	(x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  break;
+case ROUNDING_UP:
+  // if (exp <= -p) return -0.0 or +1.0
+  if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
+    if (x_sign) {
+      // if negative, return negative 0, because we know the coefficient
+      // is non-zero (would have been caught above)
+      res.w[1] = 0xb040000000000000ull;
+      res.w[0] = 0x0000000000000000ull;
     } else {
-      res.w[1] = x.w[1] & 0xfffe000000000000ull;
+      // if positive, return positive 1, because we know coefficient is
+      // non-zero (would have been caught above)
+      res.w[1] = 0x3040000000000000ull;
+      res.w[0] = 0x0000000000000001ull;
     }
-    res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
-  // x is not special and is not zero
-
-  // if (exp <= -(p+1)) return 0
-  if (x_exp <= 0x2ffa000000000000ull) {	// 0x2ffa000000000000ull == -35
+  break;
+case ROUNDING_TO_ZERO:
+  // if (exp <= -p) return -0.0 or +0.0
+  if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
     res.w[1] = x_sign | 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
+  break;
+}
+
   // q = nr. of decimal digits in x
   //  determine first the nr. of bits in x
-  if (C1.w[1] == 0) {
-    if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
-      // split the 64-bit value in two 32-bit halves to avoid rounding errors
-      if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
-	tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
-	x_nr_bits =
-	  33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      } else {	// x < 2^32
-	tmp1.d = (double) (C1.w[0]);	// exact conversion
-	x_nr_bits =
-	  1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      }
-    } else {	// if x < 2^53
-      tmp1.d = (double) C1.w[0];	// exact conversion
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
+      x_nr_bits =
+	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {	// x < 2^32
+      tmp1.d = (double) (C1.w[0]);	// exact conversion
       x_nr_bits =
 	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
-  } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
-    tmp1.d = (double) C1.w[1];	// exact conversion
+  } else {	// if x < 2^53
+    tmp1.d = (double) C1.w[0];	// exact conversion
     x_nr_bits =
-      65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-  }
-  q = __bid_nr_digits[x_nr_bits - 1].digits;
-  if (q == 0) {
-    q = __bid_nr_digits[x_nr_bits - 1].digits1;
-    if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	|| (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	&& C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo))
-      q++;
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
   }
-  exp = (x_exp >> 49) - 6176;
-  if (exp >= 0) {	// -exp <= 0
-    // the argument is an integer already
-    res.w[1] = x.w[1];
-    res.w[0] = x.w[0];
-    BID_RETURN (res);
-  } else if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
-    // need to shift right -exp digits from the coefficient; the exp will be 0
+} else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];	// exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {	// -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+}
+  // exp < 0
+switch (rnd_mode) {
+case ROUNDING_TO_NEAREST:
+  if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
+    // need to shift right -exp digits from the coefficient; exp will be 0
     ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
     // chop off ind digits from the lower part of C1 
     // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
     tmp64 = C1.w[0];
     if (ind <= 19) {
-      C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
+      C1.w[0] = C1.w[0] + midpoint64[ind - 1];
     } else {
-      C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-      C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
+      C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+      C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
     }
     if (C1.w[0] < tmp64)
       C1.w[1]++;
     // calculate C* and f*
     // C* is actually floor(C*) in this case
     // C* and f* need shifting and masking, as shown by
-    // __bid_shiftright128[] and __bid_maskhigh128[]
+    // shiftright128[] and maskhigh128[]
     // 1 <= x <= 34
-    // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
+    // kx = 10^(-x) = ten2mk128[ind - 1]
     // C* = (C1 + 1/2 * 10^x) * 10^(-x)
     // the approximation of 10^(-x) was rounded up to 118 bits
-    __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
     // determine the value of res and fstar
+
+    // determine inexactness of the rounding of C*
+    // if (0 < f* - 1/2 < 10^(-x)) then
+    //   the result is exact
+    // else // if (f* - 1/2 > T*) then
+    //   the result is inexact
+    // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
+
     if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
-      // redundant shift = __bid_shiftright128[ind - 1]; // shift = 0
+      // redundant shift = shiftright128[ind - 1]; // shift = 0
       res.w[1] = P256.w[3];
       res.w[0] = P256.w[2];
       // redundant fstar.w[3] = 0;
       // redundant fstar.w[2] = 0;
-      // redundant fstar.w[1] = P256.w[1];
-      // redundant fstar.w[0] = P256.w[0];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
       // fraction f* < 10^(-x) <=> midpoint
       // f* is in the right position to be compared with
-      // 10^(-x) from __bid_ten2mk128[]
+      // 10^(-x) from ten2mk128[]
       // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
       if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
-	  ((P256.w[1] < (__bid_ten2mk128[ind - 1].w[1]))
-	   || ((P256.w[1] == __bid_ten2mk128[ind - 1].w[1])
-	       && (P256.w[0] < __bid_ten2mk128[ind - 1].w[0])))) {
+	  ((fstar.w[1] < (ten2mk128[ind - 1].w[1]))
+	   || ((fstar.w[1] == ten2mk128[ind - 1].w[1])
+	       && (fstar.w[0] < ten2mk128[ind - 1].w[0])))) {
 	// subract 1 to make even
 	if (res.w[0]-- == 0) {
 	  res.w[1]--;
 	}
       }
+      if (fstar.w[1] > 0x8000000000000000ull ||
+	  (fstar.w[1] == 0x8000000000000000ull
+	   && fstar.w[0] > 0x0ull)) {
+	// f* > 1/2 and the result may be exact
+	tmp64 = fstar.w[1] - 0x8000000000000000ull;	// f* - 1/2
+	if (tmp64 > ten2mk128[ind - 1].w[1] ||
+	    (tmp64 == ten2mk128[ind - 1].w[1] &&
+	     fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	  // set the inexact flag
+	  *pfpsf |= INEXACT_EXCEPTION;
+	}	// else the result is exact 
+      } else {	// the result is inexact; f2* <= 1/2  
+	// set the inexact flag 
+	*pfpsf |= INEXACT_EXCEPTION;
+      }
     } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-      shift = __bid_shiftright128[ind - 1];	// 3 <= shift <= 63
+      shift = shiftright128[ind - 1];	// 3 <= shift <= 63
       res.w[1] = (P256.w[3] >> shift);
       res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
       // redundant fstar.w[3] = 0;
-      fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* < 10^(-x) <=> midpoint
       // f* is in the right position to be compared with
-      // 10^(-x) from __bid_ten2mk128[]
+      // 10^(-x) from ten2mk128[]
       if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
-	  fstar.w[2] == 0 && (fstar.w[1] < __bid_ten2mk128[ind - 1].w[1]
-			      || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-			      && fstar.w[0] < __bid_ten2mk128[ind - 1].w[0]))) {
+	  fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
+			      (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+			       fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
 	// subract 1 to make even
 	if (res.w[0]-- == 0) {
 	  res.w[1]--;
 	}
       }
+      if (fstar.w[2] > onehalf128[ind - 1] ||
+	  (fstar.w[2] == onehalf128[ind - 1]
+	   && (fstar.w[1] || fstar.w[0]))) {
+	// f2* > 1/2 and the result may be exact
+	// Calculate f2* - 1/2
+	tmp64 = fstar.w[2] - onehalf128[ind - 1];
+	if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+	    (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+	     fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	  // set the inexact flag
+	  *pfpsf |= INEXACT_EXCEPTION;
+	}	// else the result is exact
+      } else {	// the result is inexact; f2* <= 1/2
+	// set the inexact flag
+	*pfpsf |= INEXACT_EXCEPTION;
+      }
     } else {	// 22 <= ind - 1 <= 33
-      shift = __bid_shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
+      shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
       res.w[1] = 0;
       res.w[0] = P256.w[3] >> shift;
-      fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1];
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
       fstar.w[2] = P256.w[2];
       fstar.w[1] = P256.w[1];
       fstar.w[0] = P256.w[0];
       // fraction f* < 10^(-x) <=> midpoint
       // f* is in the right position to be compared with
-      // 10^(-x) from __bid_ten2mk128[]
+      // 10^(-x) from ten2mk128[]
       if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
-	  fstar.w[3] == 0 && fstar.w[2] == 0
-	  && (fstar.w[1] < __bid_ten2mk128[ind - 1].w[1]
-	      || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	      && fstar.w[0] < __bid_ten2mk128[ind - 1].w[0]))) {
+	  fstar.w[3] == 0 && fstar.w[2] == 0 &&
+	  (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
+	   (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+	    fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
 	// subract 1 to make even
 	if (res.w[0]-- == 0) {
 	  res.w[1]--;
 	}
       }
+      if (fstar.w[3] > onehalf128[ind - 1] ||
+	  (fstar.w[3] == onehalf128[ind - 1] &&
+	   (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
+	// f2* > 1/2 and the result may be exact
+	// Calculate f2* - 1/2
+	tmp64 = fstar.w[3] - onehalf128[ind - 1];
+	if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
+	    || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+		&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	  // set the inexact flag
+	  *pfpsf |= INEXACT_EXCEPTION;
+	}	// else the result is exact
+      } else {	// the result is inexact; f2* <= 1/2
+	// set the inexact flag
+	*pfpsf |= INEXACT_EXCEPTION;
+      }
     }
     res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
     BID_RETURN (res);
@@ -1032,156 +375,139 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_nearest_even, x)
     // the result is +0 or -0
     res.w[1] = x_sign | 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
-}
-
-/*****************************************************************************
- *  BID128_round_integral_negative
- ****************************************************************************/
-
-BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_negative, x)
-
-  UINT128 res;
-  UINT64 x_sign;
-  UINT64 x_exp;
-  int exp;			// unbiased exponent
-  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo 
-  // (all are UINT64)
-  BID_UI64DOUBLE tmp1;
-  unsigned int x_nr_bits;
-  int q, ind, shift;
-  UINT128 C1;
-  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 
-  // 113 bits
-  UINT256 fstar;
-  UINT256 P256;
+  break;
+case ROUNDING_TIES_AWAY:
+  if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
+    // need to shift right -exp digits from the coefficient; exp will be 0
+    ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
+    // chop off ind digits from the lower part of C1 
+    // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
+    tmp64 = C1.w[0];
+    if (ind <= 19) {
+      C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+    } else {
+      C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+      C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+    }
+    if (C1.w[0] < tmp64)
+      C1.w[1]++;
+    // calculate C* and f*
+    // C* is actually floor(C*) in this case
+    // C* and f* need shifting and masking, as shown by
+    // shiftright128[] and maskhigh128[]
+    // 1 <= x <= 34
+    // kx = 10^(-x) = ten2mk128[ind - 1]
+    // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+    // the approximation of 10^(-x) was rounded up to 118 bits
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+    // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
+    // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
+    // if (0 < f* < 10^(-x)) then the result is a midpoint
+    //   if floor(C*) is even then C* = floor(C*) - logical right
+    //       shift; C* has p decimal digits, correct by Prop. 1)
+    //   else if floor(C*) is odd C* = floor(C*)-1 (logical right
+    //       shift; C* has p decimal digits, correct by Pr. 1)
+    // else
+    //   C* = floor(C*) (logical right shift; C has p decimal digits,
+    //       correct by Property 1)
+    // n = C* * 10^(e+x)
 
-  // check for NaN or Infinity
-  if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
-    // x is special
-    if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
-      if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
-	// set invalid flag
-	*pfpsf |= INVALID_EXCEPTION;
-	// return Quiet (SNaN)
-	res.w[1] = x.w[1] & 0xfdffffffffffffffull;
-	res.w[0] = x.w[0];
-      } else {	// x is QNaN
-	// return the input QNaN
-	res.w[1] = x.w[1];
-	res.w[0] = x.w[0];
+    // determine also the inexactness of the rounding of C*
+    // if (0 < f* - 1/2 < 10^(-x)) then
+    //   the result is exact
+    // else // if (f* - 1/2 > T*) then
+    //   the result is inexact
+    // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
+    // shift right C* by Ex-128 = shiftright128[ind]
+    if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
+      // redundant shift = shiftright128[ind - 1]; // shift = 0
+      res.w[1] = P256.w[3];
+      res.w[0] = P256.w[2];
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      if (fstar.w[1] > 0x8000000000000000ull ||
+	  (fstar.w[1] == 0x8000000000000000ull
+	   && fstar.w[0] > 0x0ull)) {
+	// f* > 1/2 and the result may be exact
+	tmp64 = fstar.w[1] - 0x8000000000000000ull;	// f* - 1/2
+	if ((tmp64 > ten2mk128[ind - 1].w[1] ||
+	     (tmp64 == ten2mk128[ind - 1].w[1] &&
+	      fstar.w[0] >= ten2mk128[ind - 1].w[0]))) {
+	  // set the inexact flag
+	  *pfpsf |= INEXACT_EXCEPTION;
+	}	// else the result is exact
+      } else {	// the result is inexact; f2* <= 1/2
+	// set the inexact flag
+	*pfpsf |= INEXACT_EXCEPTION;
       }
-      BID_RETURN (res);
-    } else {	// x is not a NaN, so it must be infinity
-      if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
-	// return +inf
-	res.w[1] = 0x7800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      } else {	// x is -inf 
-	// return -inf
-	res.w[1] = 0xf800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
+    } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+      shift = shiftright128[ind - 1];	// 3 <= shift <= 63
+      res.w[1] = (P256.w[3] >> shift);
+      res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      if (fstar.w[2] > onehalf128[ind - 1] ||
+	  (fstar.w[2] == onehalf128[ind - 1]
+	   && (fstar.w[1] || fstar.w[0]))) {
+	// f2* > 1/2 and the result may be exact
+	// Calculate f2* - 1/2
+	tmp64 = fstar.w[2] - onehalf128[ind - 1];
+	if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+	    (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+	     fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	  // set the inexact flag
+	  *pfpsf |= INEXACT_EXCEPTION;
+	}	// else the result is exact
+      } else {	// the result is inexact; f2* <= 1/2
+	// set the inexact flag
+	*pfpsf |= INEXACT_EXCEPTION;
+      }
+    } else {	// 22 <= ind - 1 <= 33
+      shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
+      res.w[1] = 0;
+      res.w[0] = P256.w[3] >> shift;
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      if (fstar.w[3] > onehalf128[ind - 1] ||
+	  (fstar.w[3] == onehalf128[ind - 1] &&
+	   (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
+	// f2* > 1/2 and the result may be exact
+	// Calculate f2* - 1/2
+	tmp64 = fstar.w[3] - onehalf128[ind - 1];
+	if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
+	    || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+		&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	  // set the inexact flag
+	  *pfpsf |= INEXACT_EXCEPTION;
+	}	// else the result is exact
+      } else {	// the result is inexact; f2* <= 1/2
+	// set the inexact flag
+	*pfpsf |= INEXACT_EXCEPTION;
       }
-      BID_RETURN (res);
-    }
-  }
-  // unpack x
-  x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
-  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bit positions
-  C1.w[1] = x.w[1] & MASK_COEFF;
-  C1.w[0] = x.w[0];
-
-  // test for non-canonical values:
-  // - values whose encoding begins with x00, x01, or x10 and whose
-  //   coefficient is larger than 10^34 -1, or
-  // - values whose encoding begins with x1100, x1101, x1110 (if NaNs
-  //   and infinitis were eliminated already this test is reduced to
-  //   checking for x10x)
-
-  // test for non-canonical values of the argument x
-  if ((((C1.w[1] > 0x0001ed09bead87c0ull)
-       || ((C1.w[1] == 0x0001ed09bead87c0ull)
-	   && (C1.w[0] > 0x378d8e63ffffffffull)))
-      && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull))
-      || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) {
-    x.w[1] = x.w[1] & 0x8000000000000000ull;	// preserve the sign bit
-    x.w[0] = 0;
-    x_exp = 0x1820ull << 49;
-    C1.w[1] = 0;
-    C1.w[0] = 0;
-  }
-  // test for input equal to zero
-  if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
-    // x is 0
-    // return 0 preserving the sign bit and the preferred exponent
-    // of MAX(Q(x), 0)
-    if (x_exp <= (0x1820ull << 49)) {
-      res.w[1] =
-	(x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
-    } else {
-      res.w[1] = x.w[1] & 0xfffe000000000000ull;
     }
-    res.w[0] = 0x0000000000000000ull;
+    // if the result was a midpoint, it was already rounded away from zero
+    res.w[1] |= x_sign | 0x3040000000000000ull;
     BID_RETURN (res);
-  }
-  // x is not special and is not zero
-
-  // if (exp <= -p) return -1.0 or +0.0
-  if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
-    if (x_sign) {
-      // if negative, return negative 1, because we know the coefficient
-      // is non-zero (would have been caught above)
-      res.w[1] = 0xb040000000000000ull;
-      res.w[0] = 0x0000000000000001ull;
-    } else {
-      // if positive, return positive 0, because we know coefficient is
-      // non-zero (would have been caught above)
-      res.w[1] = 0x3040000000000000ull;
-      res.w[0] = 0x0000000000000000ull;
-    }
+  } else {	// if ((q + exp) < 0) <=> q < -exp
+    // the result is +0 or -0
+    res.w[1] = x_sign | 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
-  // q = nr. of decimal digits in x
-  // determine first the nr. of bits in x
-  if (C1.w[1] == 0) {
-    if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
-      // split the 64-bit value in two 32-bit halves to avoid rounding errors
-      if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
-	tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
-	x_nr_bits =
-	  33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      } else {	// x < 2^32
-	tmp1.d = (double) (C1.w[0]);	// exact conversion
-	x_nr_bits =
-	  1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      }
-    } else {	// if x < 2^53
-      tmp1.d = (double) C1.w[0];	// exact conversion
-      x_nr_bits =
-	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-    }
-  } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
-    tmp1.d = (double) C1.w[1];	// exact conversion
-    x_nr_bits =
-      65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-  }
-  q = __bid_nr_digits[x_nr_bits - 1].digits;
-  if (q == 0) {
-    q = __bid_nr_digits[x_nr_bits - 1].digits1;
-    if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	|| (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	&& C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo))
-      q++;
-  }
-  exp = (x_exp >> 49) - 6176;
-  if (exp >= 0) {	// -exp <= 0
-    // the argument is an integer already
-    res.w[1] = x.w[1];
-    res.w[0] = x.w[0];
-    BID_RETURN (res);
-  } else if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
-    // need to shift right -exp digits from the coefficient; the exp will be 0
+  break;
+case ROUNDING_DOWN:
+  if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
+    // need to shift right -exp digits from the coefficient; exp will be 0
     ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x' 
     // (number of digits to be chopped off)
     // chop off ind digits from the lower part of C1 
@@ -1189,82 +515,85 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_negative, x)
     // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
     // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
     // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
-    //tmp64 = C1.w[0];
+    // tmp64 = C1.w[0];
     // if (ind <= 19) {
-    //   C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
+    //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
     // } else {
-    //   C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-    //   C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
+    //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
     // }
     // if (C1.w[0] < tmp64) C1.w[1]++;
     // if carry-out from C1.w[0], increment C1.w[1]
     // calculate C* and f*
     // C* is actually floor(C*) in this case
     // C* and f* need shifting and masking, as shown by
-    // __bid_shiftright128[] and __bid_maskhigh128[]
+    // shiftright128[] and maskhigh128[]
     // 1 <= x <= 34
-    // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
+    // kx = 10^(-x) = ten2mk128[ind - 1]
     // C* = (C1 + 1/2 * 10^x) * 10^(-x)
     // the approximation of 10^(-x) was rounded up to 118 bits
-    __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
     if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
       res.w[1] = P256.w[3];
       res.w[0] = P256.w[2];
-      // if positive, the truncated value is already the correct result
-      if (x_sign) {	// if negative
-	// redundant fstar.w[3] = 0;
-	// redundant fstar.w[2] = 0;
-	// redundant fstar.w[1] = P256.w[1];
-	// redundant fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with
-	// 10^(-x) from __bid_ten2mk128[]
-	if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1])
-	    || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1]
-		&& (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) {
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1];
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
+	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+	*pfpsf |= INEXACT_EXCEPTION;
+	// if positive, the truncated value is already the correct result
+	if (x_sign) {	// if negative
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-      shift = __bid_shiftright128[ind - 1];	// 0 <= shift <= 102
+      shift = shiftright128[ind - 1];	// 0 <= shift <= 102
       res.w[1] = (P256.w[3] >> shift);
       res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
-      // if positive, the truncated value is already the correct result
-      if (x_sign) {	// if negative
-	// redundant fstar.w[3] = 0;
-	fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	*pfpsf |= INEXACT_EXCEPTION;
+	// if positive, the truncated value is already the correct result
+	if (x_sign) {	// if negative
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     } else {	// 22 <= ind - 1 <= 33
-      shift = __bid_shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
+      shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
       res.w[1] = 0;
       res.w[0] = P256.w[3] >> shift;
-      // if positive, the truncated value is already the correct result
-      if (x_sign) {	// if negative
-	fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1];
-	fstar.w[2] = P256.w[2];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[3] || fstar.w[2]
-	    || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
+	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	*pfpsf |= INEXACT_EXCEPTION;
+	// if positive, the truncated value is already the correct result
+	if (x_sign) {	// if negative
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
@@ -1281,155 +610,12 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_negative, x)
       res.w[1] = 0x3040000000000000ull;
       res.w[0] = 0x0000000000000000ull;
     }
+    *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
-}
-
-/*****************************************************************************
- *  BID128_round_integral_positive
- ****************************************************************************/
-
-BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_positive, x)
-
-  UINT128 res;
-  UINT64 x_sign;
-  UINT64 x_exp;
-  int exp;			// unbiased exponent
-  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo 
-  // (all are UINT64)
-  BID_UI64DOUBLE tmp1;
-  unsigned int x_nr_bits;
-  int q, ind, shift;
-  UINT128 C1;
-  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 
-  // 113 bits
-  UINT256 fstar;
-  UINT256 P256;
-
-  // check for NaN or Infinity
-  if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
-    // x is special
-    if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
-      if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
-	// set invalid flag
-	*pfpsf |= INVALID_EXCEPTION;
-	// return Quiet (SNaN)
-	res.w[1] = x.w[1] & 0xfdffffffffffffffull;
-	res.w[0] = x.w[0];
-      } else {	// x is QNaN
-	// return the input QNaN
-	res.w[1] = x.w[1];
-	res.w[0] = x.w[0];
-      }
-      BID_RETURN (res);
-    } else {	// x is not a NaN, so it must be infinity
-      if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
-	// return +inf
-	res.w[1] = 0x7800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      } else {	// x is -inf 
-	// return -inf
-	res.w[1] = 0xf800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      }
-      BID_RETURN (res);
-    }
-  }
-  // unpack x
-  x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
-  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bit positions
-  C1.w[1] = x.w[1] & MASK_COEFF;
-  C1.w[0] = x.w[0];
-
-  // test for non-canonical values:
-  // - values whose encoding begins with x00, x01, or x10 and whose
-  //   coefficient is larger than 10^34 -1, or
-  // - values whose encoding begins with x1100, x1101, x1110 (if NaNs
-  //   and infinitis were eliminated already this test is reduced to
-  //   checking for x10x)
-
-  // test for non-canonical values of the argument x
-  if ((((C1.w[1] > 0x0001ed09bead87c0ull)
-       || ((C1.w[1] == 0x0001ed09bead87c0ull)
-	   && (C1.w[0] > 0x378d8e63ffffffffull)))
-      && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull))
-      || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) {
-    x.w[1] = x.w[1] & 0x8000000000000000ull;	// preserve the sign bit
-    x.w[0] = 0;
-    x_exp = 0x1820ull << 49;
-    C1.w[1] = 0;
-    C1.w[0] = 0;
-  }
-  // test for input equal to zero
-  if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
-    // x is 0
-    // return 0 preserving the sign bit and the preferred exponent 
-    // of MAX(Q(x), 0)
-    if (x_exp <= (0x1820ull << 49)) {
-      res.w[1] =
-	(x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
-    } else {
-      res.w[1] = x.w[1] & 0xfffe000000000000ull;
-    }
-    res.w[0] = 0x0000000000000000ull;
-    BID_RETURN (res);
-  }
-  // x is not special and is not zero
-
-  // if (exp <= -p) return -0.0 or +1.0
-  if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
-    if (x_sign) {
-      // if negative, return negative 0, because we know the coefficient 
-      // is non-zero (would have been caught above)
-      res.w[1] = 0xb040000000000000ull;
-      res.w[0] = 0x0000000000000000ull;
-    } else {
-      // if positive, return positive 1, because we know coefficient is 
-      // non-zero (would have been caught above)
-      res.w[1] = 0x3040000000000000ull;
-      res.w[0] = 0x0000000000000001ull;
-    }
-    BID_RETURN (res);
-  }
-  // q = nr. of decimal digits in x
-  // determine first the nr. of bits in x
-  if (C1.w[1] == 0) {
-    if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
-      // split 64-bit value in two 32-bit halves to avoid rounding errors
-      if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
-	tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
-	x_nr_bits =
-	  33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      } else {	// x < 2^32
-	tmp1.d = (double) (C1.w[0]);	// exact conversion
-	x_nr_bits =
-	  1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      }
-    } else {	// if x < 2^53
-      tmp1.d = (double) C1.w[0];	// exact conversion
-      x_nr_bits =
-	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-    }
-  } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
-    tmp1.d = (double) C1.w[1];	// exact conversion
-    x_nr_bits =
-      65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-  }
-  q = __bid_nr_digits[x_nr_bits - 1].digits;
-  if (q == 0) {
-    q = __bid_nr_digits[x_nr_bits - 1].digits1;
-    if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	|| (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	&& C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo))
-      q++;
-  }
-  exp = (x_exp >> 49) - 6176;
-  if (exp >= 0) {	// -exp <= 0
-    // the argument is an integer already
-    res.w[1] = x.w[1];
-    res.w[0] = x.w[0];
-    BID_RETURN (res);
-  } else if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
+  break;
+case ROUNDING_UP:
+  if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
     // need to shift right -exp digits from the coefficient; exp will be 0
     ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x' 
     // (number of digits to be chopped off)
@@ -1440,80 +626,83 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_positive, x)
     // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
     // tmp64 = C1.w[0];
     // if (ind <= 19) {
-    //   C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
+    //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
     // } else {
-    //   C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-    //   C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
+    //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
     // }
     // if (C1.w[0] < tmp64) C1.w[1]++;  
     // if carry-out from C1.w[0], increment C1.w[1]
     // calculate C* and f*
     // C* is actually floor(C*) in this case
     // C* and f* need shifting and masking, as shown by
-    // __bid_shiftright128[] and __bid_maskhigh128[]
+    // shiftright128[] and maskhigh128[]
     // 1 <= x <= 34
-    // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
+    // kx = 10^(-x) = ten2mk128[ind - 1]
     // C* = C1 * 10^(-x)
     // the approximation of 10^(-x) was rounded up to 118 bits
-    __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
     if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
       res.w[1] = P256.w[3];
       res.w[0] = P256.w[2];
-      // if negative, the truncated value is already the correct result
-      if (!x_sign) {	// if positive
-	// redundant fstar.w[3] = 0;
-	// redundant fstar.w[2] = 0;
-	// redundant fstar.w[1] = P256.w[1]; 
-	// redundant fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with 
-	// 10^(-x) from __bid_ten2mk128[]
-	if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1])
-	    || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1]
-		&& (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) {
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1]; 
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
+	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+	*pfpsf |= INEXACT_EXCEPTION;
+	// if negative, the truncated value is already the correct result
+	if (!x_sign) {	// if positive
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-      shift = __bid_shiftright128[ind - 1];	// 3 <= shift <= 63
+      shift = shiftright128[ind - 1];	// 3 <= shift <= 63
       res.w[1] = (P256.w[3] >> shift);
       res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
-      // if negative, the truncated value is already the correct result
-      if (!x_sign) {	// if positive
-	// redundant fstar.w[3] = 0;
-	fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with 
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	*pfpsf |= INEXACT_EXCEPTION;
+	// if negative, the truncated value is already the correct result
+	if (!x_sign) {	// if positive
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
 	}
       }
     } else {	// 22 <= ind - 1 <= 33
-      shift = __bid_shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
+      shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
       res.w[1] = 0;
       res.w[0] = P256.w[3] >> shift;
-      // if negative, the truncated value is already the correct result
-      if (!x_sign) {	// if positive
-	fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1];
-	fstar.w[2] = P256.w[2];
-	fstar.w[1] = P256.w[1];
-	fstar.w[0] = P256.w[0];
-	// fraction f* > 10^(-x) <=> inexact
-	// f* is in the right position to be compared with 
-	// 10^(-x) from __bid_ten2mk128[]
-	if (fstar.w[3] || fstar.w[2]
-	    || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1]
-	    || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]
-	    && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) {
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
+	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	*pfpsf |= INEXACT_EXCEPTION;
+	// if negative, the truncated value is already the correct result
+	if (!x_sign) {	// if positive
 	  if (++res.w[0] == 0) {
 	    res.w[1]++;
 	  }
@@ -1530,146 +719,13 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_positive, x)
       res.w[1] = 0x3040000000000000ull;
       res.w[0] = 0x0000000000000001ull;
     }
+    *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
-}
-
-/*****************************************************************************
- *  BID128_round_integral_zero
- ****************************************************************************/
-
-BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_zero, x)
-
-  UINT128 res;
-  UINT64 x_sign;
-  UINT64 x_exp;
-  int exp;			// unbiased exponent
-  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
-  // (all are UINT64)
-  BID_UI64DOUBLE tmp1;
-  unsigned int x_nr_bits;
-  int q, ind, shift;
-  UINT128 C1;
-  // UINT128 res is C* at first - represents up to 34 decimal digits ~
-  // 113 bits
-  UINT256 P256;
-
-  // check for NaN or Infinity
-  if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
-    // x is special
-    if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
-      if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
-	// set invalid flag
-	*pfpsf |= INVALID_EXCEPTION;
-	// return Quiet (SNaN)
-	res.w[1] = x.w[1] & 0xfdffffffffffffffull;
-	res.w[0] = x.w[0];
-      } else {	// x is QNaN
-	// return the input QNaN
-	res.w[1] = x.w[1];
-	res.w[0] = x.w[0];
-      }
-      BID_RETURN (res);
-    } else {	// x is not a NaN, so it must be infinity
-      if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
-	// return +inf
-	res.w[1] = 0x7800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      } else {	// x is -inf 
-	// return -inf
-	res.w[1] = 0xf800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      }
-      BID_RETURN (res);
-    }
-  }
-  // unpack x
-  x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
-  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bit positions
-  C1.w[1] = x.w[1] & MASK_COEFF;
-  C1.w[0] = x.w[0];
-
-  // test for non-canonical values:
-  // - values whose encoding begins with x00, x01, or x10 and whose
-  //   coefficient is larger than 10^34 -1, or
-  // - values whose encoding begins with x1100, x1101, x1110 (if NaNs
-  //   and infinitis were eliminated already this test is reduced to
-  //   checking for x10x)
-
-  // test for non-canonical values of the argument x
-  if ((((C1.w[1] > 0x0001ed09bead87c0ull)
-       || ((C1.w[1] == 0x0001ed09bead87c0ull)
-	   && (C1.w[0] > 0x378d8e63ffffffffull)))
-      && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull))
-      || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) {
-    x.w[1] = x.w[1] & 0x8000000000000000ull;	// preserve the sign bit
-    x.w[0] = 0;
-    x_exp = 0x1820ull << 49;
-    C1.w[1] = 0;
-    C1.w[0] = 0;
-  }
-  // test for input equal to zero
-  if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
-    // x is 0
-    // return 0 preserving the sign bit and the preferred exponent
-    // of MAX(Q(x), 0)
-    if (x_exp <= (0x1820ull << 49)) {
-      res.w[1] =
-	(x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
-    } else {
-      res.w[1] = x.w[1] & 0xfffe000000000000ull;
-    }
-    res.w[0] = 0x0000000000000000ull;
-    BID_RETURN (res);
-  }
-  // x is not special and is not zero
-
-  // if (exp <= -p) return -0.0 or +0.0
-  if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
-    res.w[1] = x_sign | 0x3040000000000000ull;
-    res.w[0] = 0x0000000000000000ull;
-    BID_RETURN (res);
-  }
-  // q = nr. of decimal digits in x
-  // determine first the nr. of bits in x
-  if (C1.w[1] == 0) {
-    if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
-      // split the 64-bit value in two 32-bit halves to avoid rounding errors
-      if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
-	tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
-	x_nr_bits =
-	  33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      } else {	// x < 2^32
-	tmp1.d = (double) (C1.w[0]);	// exact conversion
-	x_nr_bits =
-	  1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      }
-    } else {	// if x < 2^53
-      tmp1.d = (double) C1.w[0];	// exact conversion
-      x_nr_bits =
-	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-    }
-  } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
-    tmp1.d = (double) C1.w[1];	// exact conversion
-    x_nr_bits =
-      65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-  }
-  q = __bid_nr_digits[x_nr_bits - 1].digits;
-  if (q == 0) {
-    q = __bid_nr_digits[x_nr_bits - 1].digits1;
-    if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	|| (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	&& C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo))
-      q++;
-  }
-  exp = (x_exp >> 49) - 6176;
-  if (exp >= 0) {	// -exp <= 0
-    // the argument is an integer already
-    res.w[1] = x.w[1];
-    res.w[0] = x.w[0];
-    BID_RETURN (res);
-  } else if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
-    // need to shift right -exp digits from the coefficient; the exp will be 0
+  break;
+case ROUNDING_TO_ZERO:
+  if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
+    // need to shift right -exp digits from the coefficient; exp will be 0
     ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
     // (number of digits to be chopped off)
     // chop off ind digits from the lower part of C1 
@@ -1679,234 +735,1222 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_zero, x)
     // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
     //tmp64 = C1.w[0];
     // if (ind <= 19) {
-    //   C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
+    //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
     // } else {
-    //   C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-    //   C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
+    //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
     // }
     // if (C1.w[0] < tmp64) C1.w[1]++;  
     // if carry-out from C1.w[0], increment C1.w[1]
     // calculate C* and f*
     // C* is actually floor(C*) in this case
     // C* and f* need shifting and masking, as shown by
-    // __bid_shiftright128[] and __bid_maskhigh128[]
+    // shiftright128[] and maskhigh128[]
     // 1 <= x <= 34
-    // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
+    // kx = 10^(-x) = ten2mk128[ind - 1]
     // C* = (C1 + 1/2 * 10^x) * 10^(-x)
     // the approximation of 10^(-x) was rounded up to 118 bits
-    __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
+    __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
     if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
       res.w[1] = P256.w[3];
       res.w[0] = P256.w[2];
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1]; 
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
+	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+	*pfpsf |= INEXACT_EXCEPTION;
+      }
     } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-      shift = __bid_shiftright128[ind - 1];	// 3 <= shift <= 63
+      shift = shiftright128[ind - 1];	// 3 <= shift <= 63
       res.w[1] = (P256.w[3] >> shift);
       res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	*pfpsf |= INEXACT_EXCEPTION;
+      }
     } else {	// 22 <= ind - 1 <= 33
-      shift = __bid_shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
+      shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
       res.w[1] = 0;
       res.w[0] = P256.w[3] >> shift;
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
+	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	*pfpsf |= INEXACT_EXCEPTION;
+      }
     }
     res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
     BID_RETURN (res);
   } else {	// if exp < 0 and q + exp <= 0 the result is +0 or -0
     res.w[1] = x_sign | 0x3040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
+    *pfpsf |= INEXACT_EXCEPTION;
     BID_RETURN (res);
   }
+  break;
+}
+
+BID_RETURN (res);
 }
 
 /*****************************************************************************
- *  BID128_round_integral_nearest_away
+ *  BID128_round_integral_nearest_even
  ****************************************************************************/
 
-BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_nearest_away, x)
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;			// unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
+     UINT64 tmp64;
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
+  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits
+     UINT256 fstar;
+     UINT256 P256;
 
-  UINT128 res;
-  UINT64 x_sign;
-  UINT64 x_exp;
-  int exp;			// unbiased exponent
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+    // x is special
+if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
+    res.w[0] = x.w[0];
+  } else {	// x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {	// x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {	// x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
+
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
+  C1.w[1] = 0;	// significand high
+  C1.w[0] = 0;	// significand low
+} else {	// G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {	// canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+  // if (exp <= -(p+1)) return 0
+if (x_exp <= 0x2ffa000000000000ull) {	// 0x2ffa000000000000ull == -35
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // q = nr. of decimal digits in x
+  //  determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
+      x_nr_bits =
+	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {	// x < 2^32
+      tmp1.d = (double) (C1.w[0]);	// exact conversion
+      x_nr_bits =
+	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else {	// if x < 2^53
+    tmp1.d = (double) C1.w[0];	// exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];	// exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi
+      || (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+	  C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {	// -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
+  // need to shift right -exp digits from the coefficient; the exp will be 0
+  ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
+  // chop off ind digits from the lower part of C1 
+  // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
+  tmp64 = C1.w[0];
+  if (ind <= 19) {
+    C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  } else {
+    C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  }
+  if (C1.w[0] < tmp64)
+    C1.w[1]++;
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  // determine the value of res and fstar
+  if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
+    // redundant shift = shiftright128[ind - 1]; // shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+    // redundant fstar.w[3] = 0;
+    // redundant fstar.w[2] = 0;
+    // redundant fstar.w[1] = P256.w[1];
+    // redundant fstar.w[0] = P256.w[0];
+    // fraction f* < 10^(-x) <=> midpoint
+    // f* is in the right position to be compared with
+    // 10^(-x) from ten2mk128[]
+    // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
+    if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
+	((P256.w[1] < (ten2mk128[ind - 1].w[1]))
+	 || ((P256.w[1] == ten2mk128[ind - 1].w[1])
+	     && (P256.w[0] < ten2mk128[ind - 1].w[0])))) {
+      // subract 1 to make even
+      if (res.w[0]-- == 0) {
+	res.w[1]--;
+      }
+    }
+  } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];	// 3 <= shift <= 63
+    res.w[1] = (P256.w[3] >> shift);
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+    // redundant fstar.w[3] = 0;
+    fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+    fstar.w[1] = P256.w[1];
+    fstar.w[0] = P256.w[0];
+    // fraction f* < 10^(-x) <=> midpoint
+    // f* is in the right position to be compared with
+    // 10^(-x) from ten2mk128[]
+    if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
+	fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
+			    (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+			     fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
+      // subract 1 to make even
+      if (res.w[0]-- == 0) {
+	res.w[1]--;
+      }
+    }
+  } else {	// 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = P256.w[3] >> shift;
+    fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+    fstar.w[2] = P256.w[2];
+    fstar.w[1] = P256.w[1];
+    fstar.w[0] = P256.w[0];
+    // fraction f* < 10^(-x) <=> midpoint
+    // f* is in the right position to be compared with
+    // 10^(-x) from ten2mk128[]
+    if ((res.w[0] & 0x0000000000000001ull) &&	// is result odd?
+	fstar.w[3] == 0 && fstar.w[2] == 0
+	&& (fstar.w[1] < ten2mk128[ind - 1].w[1]
+	    || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+		&& fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
+      // subract 1 to make even
+      if (res.w[0]-- == 0) {
+	res.w[1]--;
+      }
+    }
+  }
+  res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+  BID_RETURN (res);
+} else {	// if ((q + exp) < 0) <=> q < -exp
+  // the result is +0 or -0
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+}
+
+/*****************************************************************************
+ *  BID128_round_integral_negative
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;			// unbiased exponent
   // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo 
   // (all are UINT64)
-  UINT64 tmp64;
-  BID_UI64DOUBLE tmp1;
-  unsigned int x_nr_bits;
-  int q, ind, shift;
-  UINT128 C1;
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
   // UINT128 res is C* at first - represents up to 34 decimal digits ~ 
   // 113 bits
-  // UINT256 fstar;
-  UINT256 P256;
+     UINT256 fstar;
+     UINT256 P256;
 
   // check for NaN or Infinity
-  if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
     // x is special
-    if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
-      if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
-	// set invalid flag
-	*pfpsf |= INVALID_EXCEPTION;
-	// return Quiet (SNaN)
-	res.w[1] = x.w[1] & 0xfdffffffffffffffull;
-	res.w[0] = x.w[0];
-      } else {	// x is QNaN
-	// return the input QNaN
-	res.w[1] = x.w[1];
-	res.w[0] = x.w[0];
+if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
+    res.w[0] = x.w[0];
+  } else {	// x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {	// x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {	// x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
+
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
+  C1.w[1] = 0;	// significand high
+  C1.w[0] = 0;	// significand low
+} else {	// G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {	// canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+  // if (exp <= -p) return -1.0 or +0.0
+if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
+  if (x_sign) {
+    // if negative, return negative 1, because we know the coefficient
+    // is non-zero (would have been caught above)
+    res.w[1] = 0xb040000000000000ull;
+    res.w[0] = 0x0000000000000001ull;
+  } else {
+    // if positive, return positive 0, because we know coefficient is
+    // non-zero (would have been caught above)
+    res.w[1] = 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+  // q = nr. of decimal digits in x
+  // determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
+      x_nr_bits =
+	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {	// x < 2^32
+      tmp1.d = (double) (C1.w[0]);	// exact conversion
+      x_nr_bits =
+	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else {	// if x < 2^53
+    tmp1.d = (double) C1.w[0];	// exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];	// exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {	// -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
+  // need to shift right -exp digits from the coefficient; the exp will be 0
+  ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x' 
+  // (number of digits to be chopped off)
+  // chop off ind digits from the lower part of C1 
+  // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
+  // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
+  // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
+  // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
+  //tmp64 = C1.w[0];
+  // if (ind <= 19) {
+  //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  // } else {
+  //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+  //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  // }
+  // if (C1.w[0] < tmp64) C1.w[1]++;
+  // if carry-out from C1.w[0], increment C1.w[1]
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+    // if positive, the truncated value is already the correct result
+    if (x_sign) {	// if negative
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1];
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
+	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+	if (++res.w[0] == 0) {
+	  res.w[1]++;
+	}
       }
-      BID_RETURN (res);
-    } else {	// x is not a NaN, so it must be infinity
-      if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
-	// return +inf
-	res.w[1] = 0x7800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
-      } else {	// x is -inf 
-	// return -inf
-	res.w[1] = 0xf800000000000000ull;
-	res.w[0] = 0x0000000000000000ull;
+    }
+  } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];	// 0 <= shift <= 102
+    res.w[1] = (P256.w[3] >> shift);
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+    // if positive, the truncated value is already the correct result
+    if (x_sign) {	// if negative
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	if (++res.w[0] == 0) {
+	  res.w[1]++;
+	}
+      }
+    }
+  } else {	// 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = P256.w[3] >> shift;
+    // if positive, the truncated value is already the correct result
+    if (x_sign) {	// if negative
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
+	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	if (++res.w[0] == 0) {
+	  res.w[1]++;
+	}
       }
-      BID_RETURN (res);
     }
   }
-  // unpack x
-  x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
-  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bit positions
-  C1.w[1] = x.w[1] & MASK_COEFF;
-  C1.w[0] = x.w[0];
+  res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+  BID_RETURN (res);
+} else {	// if exp < 0 and q + exp <= 0
+  if (x_sign) {	// negative rounds down to -1.0
+    res.w[1] = 0xb040000000000000ull;
+    res.w[0] = 0x0000000000000001ull;
+  } else {	// positive rpunds down to +0.0
+    res.w[1] = 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+
+/*****************************************************************************
+ *  BID128_round_integral_positive
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;			// unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo 
+  // (all are UINT64)
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
+  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 
+  // 113 bits
+     UINT256 fstar;
+     UINT256 P256;
 
-  // test for non-canonical values:
-  // - values whose encoding begins with x00, x01, or x10 and whose
-  //   coefficient is larger than 10^34 -1, or
-  // - values whose encoding begins with x1100, x1101, x1110 (if NaNs
-  //   and infinitis were eliminated already this test is reduced to
-  //   checking for x10x)
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+    // x is special
+if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
+    res.w[0] = x.w[0];
+  } else {	// x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {	// x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {	// x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
 
-  // test for non-canonical values of the argument x
-  if ((((C1.w[1] > 0x0001ed09bead87c0ull)
-       || ((C1.w[1] == 0x0001ed09bead87c0ull)
-	   && (C1.w[0] > 0x378d8e63ffffffffull)))
-      && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull))
-      || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) {
-    x.w[1] = x.w[1] & 0x8000000000000000ull;	// preserve the sign bit
-    x.w[0] = 0;
-    x_exp = 0x1820ull << 49;
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
+  C1.w[1] = 0;	// significand high
+  C1.w[0] = 0;	// significand low
+} else {	// G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
     C1.w[1] = 0;
     C1.w[0] = 0;
+  } else {	// canonical
+    ;
   }
+}
+
   // test for input equal to zero
-  if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
-    // x is 0
-    // return 0 preserving the sign bit and the preferred exponent
-    // of MAX(Q(x), 0)
-    if (x_exp <= (0x1820ull << 49)) {
-      res.w[1] =
-	(x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
-    } else {
-      res.w[1] = x.w[1] & 0xfffe000000000000ull;
-    }
-    res.w[0] = 0x0000000000000000ull;
-    BID_RETURN (res);
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent 
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
   }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
   // x is not special and is not zero
 
-  // if (exp <= -(p+1)) return 0.0
-  if (x_exp <= 0x2ffa000000000000ull) {	// 0x2ffa000000000000ull == -35
-    res.w[1] = x_sign | 0x3040000000000000ull;
+  // if (exp <= -p) return -0.0 or +1.0
+if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
+  if (x_sign) {
+    // if negative, return negative 0, because we know the coefficient 
+    // is non-zero (would have been caught above)
+    res.w[1] = 0xb040000000000000ull;
     res.w[0] = 0x0000000000000000ull;
-    BID_RETURN (res);
+  } else {
+    // if positive, return positive 1, because we know coefficient is 
+    // non-zero (would have been caught above)
+    res.w[1] = 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000001ull;
   }
+  BID_RETURN (res);
+}
   // q = nr. of decimal digits in x
-  //  determine first the nr. of bits in x
-  if (C1.w[1] == 0) {
-    if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
-      // split the 64-bit value in two 32-bit halves to avoid rounding errors
-      if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
-	tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
-	x_nr_bits =
-	  33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      } else {	// x < 2^32
-	tmp1.d = (double) (C1.w[0]);	// exact conversion
-	x_nr_bits =
-	  1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
-      }
-    } else {	// if x < 2^53
-      tmp1.d = (double) C1.w[0];	// exact conversion
+  // determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
+    // split 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
+      x_nr_bits =
+	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {	// x < 2^32
+      tmp1.d = (double) (C1.w[0]);	// exact conversion
       x_nr_bits =
 	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
-  } else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
-    tmp1.d = (double) C1.w[1];	// exact conversion
+  } else {	// if x < 2^53
+    tmp1.d = (double) C1.w[0];	// exact conversion
     x_nr_bits =
-      65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];	// exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {	// -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
+  // need to shift right -exp digits from the coefficient; exp will be 0
+  ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x' 
+  // (number of digits to be chopped off)
+  // chop off ind digits from the lower part of C1 
+  // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
+  // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
+  // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
+  // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
+  // tmp64 = C1.w[0];
+  // if (ind <= 19) {
+  //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  // } else {
+  //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+  //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  // }
+  // if (C1.w[0] < tmp64) C1.w[1]++;  
+  // if carry-out from C1.w[0], increment C1.w[1]
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = C1 * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+    // if negative, the truncated value is already the correct result
+    if (!x_sign) {	// if positive
+      // redundant fstar.w[3] = 0;
+      // redundant fstar.w[2] = 0;
+      // redundant fstar.w[1] = P256.w[1]; 
+      // redundant fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if ((P256.w[1] > ten2mk128[ind - 1].w[1])
+	  || (P256.w[1] == ten2mk128[ind - 1].w[1]
+	      && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
+	if (++res.w[0] == 0) {
+	  res.w[1]++;
+	}
+      }
+    }
+  } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];	// 3 <= shift <= 63
+    res.w[1] = (P256.w[3] >> shift);
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+    // if negative, the truncated value is already the correct result
+    if (!x_sign) {	// if positive
+      // redundant fstar.w[3] = 0;
+      fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
+	  (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
+	   fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	if (++res.w[0] == 0) {
+	  res.w[1]++;
+	}
+      }
+    }
+  } else {	// 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = P256.w[3] >> shift;
+    // if negative, the truncated value is already the correct result
+    if (!x_sign) {	// if positive
+      fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
+      fstar.w[2] = P256.w[2];
+      fstar.w[1] = P256.w[1];
+      fstar.w[0] = P256.w[0];
+      // fraction f* > 10^(-x) <=> inexact
+      // f* is in the right position to be compared with 
+      // 10^(-x) from ten2mk128[]
+      if (fstar.w[3] || fstar.w[2]
+	  || fstar.w[1] > ten2mk128[ind - 1].w[1]
+	  || (fstar.w[1] == ten2mk128[ind - 1].w[1]
+	      && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
+	if (++res.w[0] == 0) {
+	  res.w[1]++;
+	}
+      }
+    }
   }
-  q = __bid_nr_digits[x_nr_bits - 1].digits;
-  if (q == 0) {
-    q = __bid_nr_digits[x_nr_bits - 1].digits1;
-    if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	|| (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi
-	&& C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo))
-      q++;
+  res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+  BID_RETURN (res);
+} else {	// if exp < 0 and q + exp <= 0
+  if (x_sign) {	// negative rounds up to -0.0
+    res.w[1] = 0xb040000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {	// positive rpunds up to +1.0
+    res.w[1] = 0x3040000000000000ull;
+    res.w[0] = 0x0000000000000001ull;
   }
-  exp = (x_exp >> 49) - 6176;
-  if (exp >= 0) {	// -exp <= 0
-    // the argument is an integer already
-    res.w[1] = x.w[1];
+  BID_RETURN (res);
+}
+}
+
+/*****************************************************************************
+ *  BID128_round_integral_zero
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;			// unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
+  // (all are UINT64)
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
+  // UINT128 res is C* at first - represents up to 34 decimal digits ~
+  // 113 bits
+     UINT256 P256;
+
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+    // x is special
+if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
     res.w[0] = x.w[0];
-    BID_RETURN (res);
-  } else if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
-    // need to shift right -exp digits from the coefficient; the exp will be 0
-    ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
-    // chop off ind digits from the lower part of C1 
-    // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
-    tmp64 = C1.w[0];
-    if (ind <= 19) {
-      C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1];
-    } else {
-      C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0];
-      C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1];
-    }
-    if (C1.w[0] < tmp64)
-      C1.w[1]++;
-    // calculate C* and f*
-    // C* is actually floor(C*) in this case
-    // C* and f* need shifting and masking, as shown by
-    // __bid_shiftright128[] and __bid_maskhigh128[]
-    // 1 <= x <= 34
-    // kx = 10^(-x) = __bid_ten2mk128[ind - 1]
-    // C* = (C1 + 1/2 * 10^x) * 10^(-x)
-    // the approximation of 10^(-x) was rounded up to 118 bits
-    __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]);
-    // the top Ex bits of 10^(-x) are T* = __bid_ten2mk128trunc[ind], e.g.
-    // if x=1, T*=__bid_ten2mk128trunc[0]=0x19999999999999999999999999999999
-    // if (0 < f* < 10^(-x)) then the result is a midpoint
-    //   if floor(C*) is even then C* = floor(C*) - logical right
-    //       shift; C* has p decimal digits, correct by Prop. 1)
-    //   else if floor(C*) is odd C* = floor(C*)-1 (logical right
-    //       shift; C* has p decimal digits, correct by Pr. 1)
-    // else
-    //   C* = floor(C*) (logical right shift; C has p decimal digits,
-    //       correct by Property 1)
-    // n = C* * 10^(e+x)
+  } else {	// x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {	// x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {	// x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
 
-    // shift right C* by Ex-128 = __bid_shiftright128[ind]
-    if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
-      res.w[1] = P256.w[3];
-      res.w[0] = P256.w[2];
-    } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
-      shift = __bid_shiftright128[ind - 1];	// 3 <= shift <= 63
-      res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
-      res.w[1] = (P256.w[3] >> shift);
-    } else {	// 22 <= ind - 1 <= 33
-      shift = __bid_shiftright128[ind - 1];	// 2 <= shift <= 38
-      res.w[1] = 0;
-      res.w[0] = (P256.w[3] >> (shift - 64));	// 2 <= shift - 64 <= 38
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
+  C1.w[1] = 0;	// significand high
+  C1.w[0] = 0;	// significand low
+} else {	// G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {	// canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+  // if (exp <= -p) return -0.0 or +0.0
+if (x_exp <= 0x2ffc000000000000ull) {	// 0x2ffc000000000000ull == -34
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // q = nr. of decimal digits in x
+  // determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
+      x_nr_bits =
+	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {	// x < 2^32
+      tmp1.d = (double) (C1.w[0]);	// exact conversion
+      x_nr_bits =
+	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
     }
-    // if the result was a midpoint, it was already rounded away from zero
-    res.w[1] |= x_sign | 0x3040000000000000ull;
-    BID_RETURN (res);
-  } else {	// if ((q + exp) < 0) <=> q < -exp
-    // the result is +0 or -0
-    res.w[1] = x_sign | 0x3040000000000000ull;
+  } else {	// if x < 2^53
+    tmp1.d = (double) C1.w[0];	// exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];	// exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {	// -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) > 0) {	// exp < 0 and 1 <= -exp < q
+  // need to shift right -exp digits from the coefficient; the exp will be 0
+  ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
+  // (number of digits to be chopped off)
+  // chop off ind digits from the lower part of C1 
+  // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
+  // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
+  // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
+  // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
+  //tmp64 = C1.w[0];
+  // if (ind <= 19) {
+  //   C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  // } else {
+  //   C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+  //   C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  // }
+  // if (C1.w[0] < tmp64) C1.w[1]++;  
+  // if carry-out from C1.w[0], increment C1.w[1]
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+  } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];	// 3 <= shift <= 63
+    res.w[1] = (P256.w[3] >> shift);
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+  } else {	// 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1] - 64;	// 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = P256.w[3] >> shift;
+  }
+  res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
+  BID_RETURN (res);
+} else {	// if exp < 0 and q + exp <= 0 the result is +0 or -0
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+}
+
+/*****************************************************************************
+ *  BID128_round_integral_nearest_away
+ ****************************************************************************/
+
+BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x)
+
+     UINT128 res;
+     UINT64 x_sign;
+     UINT64 x_exp;
+     int exp;			// unbiased exponent
+  // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo 
+  // (all are UINT64)
+     UINT64 tmp64;
+     BID_UI64DOUBLE tmp1;
+     unsigned int x_nr_bits;
+     int q, ind, shift;
+     UINT128 C1;
+  // UINT128 res is C* at first - represents up to 34 decimal digits ~ 
+  // 113 bits
+  // UINT256 fstar;
+     UINT256 P256;
+
+  // check for NaN or Infinity
+if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
+    // x is special
+if ((x.w[1] & MASK_NAN) == MASK_NAN) {	// x is NAN
+  // if x = NaN, then res = Q (x)
+  // check first for non-canonical NaN payload
+  if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
+      (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
+       (x.w[0] > 0x38c15b09ffffffffull))) {
+    x.w[1] = x.w[1] & 0xffffc00000000000ull;
+    x.w[0] = 0x0ull;
+  }
+  if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {	// x is SNAN
+    // set invalid flag
+    *pfpsf |= INVALID_EXCEPTION;
+    // return quiet (x)
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out also G[6]-G[16]
+    res.w[0] = x.w[0];
+  } else {	// x is QNaN
+    // return x
+    res.w[1] = x.w[1] & 0xfc003fffffffffffull;	// clear out G[6]-G[16]
+    res.w[0] = x.w[0];
+  }
+  BID_RETURN (res)
+} else {	// x is not a NaN, so it must be infinity
+  if ((x.w[1] & MASK_SIGN) == 0x0ull) {	// x is +inf
+    // return +inf
+    res.w[1] = 0x7800000000000000ull;
+    res.w[0] = 0x0000000000000000ull;
+  } else {	// x is -inf 
+    // return -inf
+    res.w[1] = 0xf800000000000000ull;
     res.w[0] = 0x0000000000000000ull;
-    BID_RETURN (res);
   }
+  BID_RETURN (res);
+}
+}
+  // unpack x
+x_sign = x.w[1] & MASK_SIGN;	// 0 for positive, MASK_SIGN for negative
+C1.w[1] = x.w[1] & MASK_COEFF;
+C1.w[0] = x.w[0];
+
+  // check for non-canonical values (treated as zero)
+if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {	// G0_G1=11
+  // non-canonical
+  x_exp = (x.w[1] << 2) & MASK_EXP;	// biased and shifted left 49 bits
+  C1.w[1] = 0;	// significand high
+  C1.w[0] = 0;	// significand low
+} else {	// G0_G1 != 11
+  x_exp = x.w[1] & MASK_EXP;	// biased and shifted left 49 bits
+  if (C1.w[1] > 0x0001ed09bead87c0ull ||
+      (C1.w[1] == 0x0001ed09bead87c0ull
+       && C1.w[0] > 0x378d8e63ffffffffull)) {
+    // x is non-canonical if coefficient is larger than 10^34 -1
+    C1.w[1] = 0;
+    C1.w[0] = 0;
+  } else {	// canonical
+    ;
+  }
+}
+
+  // test for input equal to zero
+if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
+  // x is 0
+  // return 0 preserving the sign bit and the preferred exponent
+  // of MAX(Q(x), 0)
+  if (x_exp <= (0x1820ull << 49)) {
+    res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
+  } else {
+    res.w[1] = x_sign | x_exp;
+  }
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // x is not special and is not zero
+
+  // if (exp <= -(p+1)) return 0.0
+if (x_exp <= 0x2ffa000000000000ull) {	// 0x2ffa000000000000ull == -35
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
+  // q = nr. of decimal digits in x
+  //  determine first the nr. of bits in x
+if (C1.w[1] == 0) {
+  if (C1.w[0] >= 0x0020000000000000ull) {	// x >= 2^53
+    // split the 64-bit value in two 32-bit halves to avoid rounding errors
+    if (C1.w[0] >= 0x0000000100000000ull) {	// x >= 2^32
+      tmp1.d = (double) (C1.w[0] >> 32);	// exact conversion
+      x_nr_bits =
+	33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    } else {	// x < 2^32
+      tmp1.d = (double) (C1.w[0]);	// exact conversion
+      x_nr_bits =
+	1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+    }
+  } else {	// if x < 2^53
+    tmp1.d = (double) C1.w[0];	// exact conversion
+    x_nr_bits =
+      1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+  }
+} else {	// C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
+  tmp1.d = (double) C1.w[1];	// exact conversion
+  x_nr_bits =
+    65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
+}
+
+q = nr_digits[x_nr_bits - 1].digits;
+if (q == 0) {
+  q = nr_digits[x_nr_bits - 1].digits1;
+  if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
+      (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
+       C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
+    q++;
+}
+exp = (x_exp >> 49) - 6176;
+if (exp >= 0) {	// -exp <= 0
+  // the argument is an integer already
+  res.w[1] = x.w[1];
+  res.w[0] = x.w[0];
+  BID_RETURN (res);
+} else if ((q + exp) >= 0) {	// exp < 0 and 1 <= -exp <= q
+  // need to shift right -exp digits from the coefficient; the exp will be 0
+  ind = -exp;	// 1 <= ind <= 34; ind is a synonym for 'x'
+  // chop off ind digits from the lower part of C1 
+  // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
+  tmp64 = C1.w[0];
+  if (ind <= 19) {
+    C1.w[0] = C1.w[0] + midpoint64[ind - 1];
+  } else {
+    C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
+    C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
+  }
+  if (C1.w[0] < tmp64)
+    C1.w[1]++;
+  // calculate C* and f*
+  // C* is actually floor(C*) in this case
+  // C* and f* need shifting and masking, as shown by
+  // shiftright128[] and maskhigh128[]
+  // 1 <= x <= 34
+  // kx = 10^(-x) = ten2mk128[ind - 1]
+  // C* = (C1 + 1/2 * 10^x) * 10^(-x)
+  // the approximation of 10^(-x) was rounded up to 118 bits
+  __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
+  // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
+  // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
+  // if (0 < f* < 10^(-x)) then the result is a midpoint
+  //   if floor(C*) is even then C* = floor(C*) - logical right
+  //       shift; C* has p decimal digits, correct by Prop. 1)
+  //   else if floor(C*) is odd C* = floor(C*)-1 (logical right
+  //       shift; C* has p decimal digits, correct by Pr. 1)
+  // else
+  //   C* = floor(C*) (logical right shift; C has p decimal digits,
+  //       correct by Property 1)
+  // n = C* * 10^(e+x)
+
+  // shift right C* by Ex-128 = shiftright128[ind]
+  if (ind - 1 <= 2) {	// 0 <= ind - 1 <= 2 => shift = 0
+    res.w[1] = P256.w[3];
+    res.w[0] = P256.w[2];
+  } else if (ind - 1 <= 21) {	// 3 <= ind - 1 <= 21 => 3 <= shift <= 63
+    shift = shiftright128[ind - 1];	// 3 <= shift <= 63
+    res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
+    res.w[1] = (P256.w[3] >> shift);
+  } else {	// 22 <= ind - 1 <= 33
+    shift = shiftright128[ind - 1];	// 2 <= shift <= 38
+    res.w[1] = 0;
+    res.w[0] = (P256.w[3] >> (shift - 64));	// 2 <= shift - 64 <= 38
+  }
+  // if the result was a midpoint, it was already rounded away from zero
+  res.w[1] |= x_sign | 0x3040000000000000ull;
+  BID_RETURN (res);
+} else {	// if ((q + exp) < 0) <=> q < -exp
+  // the result is +0 or -0
+  res.w[1] = x_sign | 0x3040000000000000ull;
+  res.w[0] = 0x0000000000000000ull;
+  BID_RETURN (res);
+}
 }