From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- libgcc/config/libbid/bid128_fma.c | 4460 +++++++++++++++++++++++++++++++++++++ 1 file changed, 4460 insertions(+) create mode 100644 libgcc/config/libbid/bid128_fma.c (limited to 'libgcc/config/libbid/bid128_fma.c') diff --git a/libgcc/config/libbid/bid128_fma.c b/libgcc/config/libbid/bid128_fma.c new file mode 100644 index 000000000..016f71cc3 --- /dev/null +++ b/libgcc/config/libbid/bid128_fma.c @@ -0,0 +1,4460 @@ +/* Copyright (C) 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/***************************************************************************** + * + * BID128 fma x * y + z + * + ****************************************************************************/ + +#include "bid_internal.h" + +static void +rounding_correction (unsigned int rnd_mode, + unsigned int is_inexact_lt_midpoint, + unsigned int is_inexact_gt_midpoint, + unsigned int is_midpoint_lt_even, + unsigned int is_midpoint_gt_even, + int unbexp, + UINT128 * ptrres, _IDEC_flags * ptrfpsf) { + // unbiased true exponent unbexp may be larger than emax + + UINT128 res = *ptrres; // expected to have the correct sign and coefficient + // (the exponent field is ignored, as unbexp is used instead) + UINT64 sign, exp; + UINT64 C_hi, C_lo; + + // general correction from RN to RA, RM, RP, RZ + // Note: if the result is negative, then is_inexact_lt_midpoint, + // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even + // have to be considered as if determined for the absolute value of the + // result (so they seem to be reversed) + + if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even) { + *ptrfpsf |= INEXACT_EXCEPTION; + } + // apply correction to result calculated with unbounded exponent + sign = res.w[1] & MASK_SIGN; + exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax + C_hi = res.w[1] & MASK_COEFF; + C_lo = res.w[0]; + if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || + ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && + is_midpoint_gt_even))) || + (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || + ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && + is_midpoint_gt_even)))) { + // C = C + 1 + C_lo = C_lo + 1; + if (C_lo == 0) + C_hi = C_hi + 1; + if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) { + // C = 10^34 => rounding overflow + C_hi = 0x0000314dc6448d93ull; + C_lo = 0x38c15b0a00000000ull; // 10^33 + // exp = exp + EXP_P1; + unbexp = unbexp + 1; + exp = (UINT64) (unbexp + 6176) << 49; + } + } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && + ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || + (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { + // C = C - 1 + C_lo = C_lo - 1; + if (C_lo == 0xffffffffffffffffull) + C_hi--; + // check if we crossed into the lower decade + if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { + // C = 10^33 - 1 + if (exp > 0) { + C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 + C_lo = 0x378d8e63ffffffffull; + // exp = exp - EXP_P1; + unbexp = unbexp - 1; + exp = (UINT64) (unbexp + 6176) << 49; + } else { // if exp = 0 + if (is_midpoint_lt_even || is_midpoint_lt_even || + is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact + *ptrfpsf |= UNDERFLOW_EXCEPTION; + } + } + } else { + ; // the result is already correct + } + if (unbexp > expmax) { // 6111 + *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + exp = 0; + if (!sign) { // result is positive + if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf + C_hi = 0x7800000000000000ull; + C_lo = 0x0000000000000000ull; + } else { // res = +MAXFP = (10^34-1) * 10^emax + C_hi = 0x5fffed09bead87c0ull; + C_lo = 0x378d8e63ffffffffull; + } + } else { // result is negative + if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf + C_hi = 0xf800000000000000ull; + C_lo = 0x0000000000000000ull; + } else { // res = -MAXFP = -(10^34-1) * 10^emax + C_hi = 0xdfffed09bead87c0ull; + C_lo = 0x378d8e63ffffffffull; + } + } + } + // assemble the result + res.w[1] = sign | exp | C_hi; + res.w[0] = C_lo; + *ptrres = res; +} + +static void +add256 (UINT256 x, UINT256 y, UINT256 * pz) { + // *z = x + yl assume the sum fits in 256 bits + UINT256 z; + z.w[0] = x.w[0] + y.w[0]; + if (z.w[0] < x.w[0]) { + x.w[1]++; + if (x.w[1] == 0x0000000000000000ull) { + x.w[2]++; + if (x.w[2] == 0x0000000000000000ull) { + x.w[3]++; + } + } + } + z.w[1] = x.w[1] + y.w[1]; + if (z.w[1] < x.w[1]) { + x.w[2]++; + if (x.w[2] == 0x0000000000000000ull) { + x.w[3]++; + } + } + z.w[2] = x.w[2] + y.w[2]; + if (z.w[2] < x.w[2]) { + x.w[3]++; + } + z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible + *pz = z; +} + +static void +sub256 (UINT256 x, UINT256 y, UINT256 * pz) { + // *z = x - y; assume x >= y + UINT256 z; + z.w[0] = x.w[0] - y.w[0]; + if (z.w[0] > x.w[0]) { + x.w[1]--; + if (x.w[1] == 0xffffffffffffffffull) { + x.w[2]--; + if (x.w[2] == 0xffffffffffffffffull) { + x.w[3]--; + } + } + } + z.w[1] = x.w[1] - y.w[1]; + if (z.w[1] > x.w[1]) { + x.w[2]--; + if (x.w[2] == 0xffffffffffffffffull) { + x.w[3]--; + } + } + z.w[2] = x.w[2] - y.w[2]; + if (z.w[2] > x.w[2]) { + x.w[3]--; + } + z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y + *pz = z; +} + + +static int +nr_digits256 (UINT256 R256) { + int ind; + // determine the number of decimal digits in R256 + if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { + // between 1 and 19 digits + for (ind = 1; ind <= 19; ind++) { + if (R256.w[0] < ten2k64[ind]) { + break; + } + } + // ind digits + } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && + (R256.w[1] < ten2k128[0].w[1] || + (R256.w[1] == ten2k128[0].w[1] + && R256.w[0] < ten2k128[0].w[0]))) { + // 20 digits + ind = 20; + } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { + // between 21 and 38 digits + for (ind = 1; ind <= 18; ind++) { + if (R256.w[1] < ten2k128[ind].w[1] || + (R256.w[1] == ten2k128[ind].w[1] && + R256.w[0] < ten2k128[ind].w[0])) { + break; + } + } + // ind + 20 digits + ind = ind + 20; + } else if (R256.w[3] == 0x0 && + (R256.w[2] < ten2k256[0].w[2] || + (R256.w[2] == ten2k256[0].w[2] && + R256.w[1] < ten2k256[0].w[1]) || + (R256.w[2] == ten2k256[0].w[2] && + R256.w[1] == ten2k256[0].w[1] && + R256.w[0] < ten2k256[0].w[0]))) { + // 39 digits + ind = 39; + } else { + // between 40 and 68 digits + for (ind = 1; ind <= 29; ind++) { + if (R256.w[3] < ten2k256[ind].w[3] || + (R256.w[3] == ten2k256[ind].w[3] && + R256.w[2] < ten2k256[ind].w[2]) || + (R256.w[3] == ten2k256[ind].w[3] && + R256.w[2] == ten2k256[ind].w[2] && + R256.w[1] < ten2k256[ind].w[1]) || + (R256.w[3] == ten2k256[ind].w[3] && + R256.w[2] == ten2k256[ind].w[2] && + R256.w[1] == ten2k256[ind].w[1] && + R256.w[0] < ten2k256[ind].w[0])) { + break; + } + } + // ind + 39 digits + ind = ind + 39; + } + return (ind); +} + +// add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so +// use the rounding information from ptr_is_* to avoid a double rounding error +static void +add_and_round (int q3, + int q4, + int e4, + int delta, + int p34, + UINT64 z_sign, + UINT64 p_sign, + UINT128 C3, + UINT256 C4, + int rnd_mode, + int *ptr_is_midpoint_lt_even, + int *ptr_is_midpoint_gt_even, + int *ptr_is_inexact_lt_midpoint, + int *ptr_is_inexact_gt_midpoint, + _IDEC_flags * ptrfpsf, UINT128 * ptrres) { + + int scale; + int x0; + int ind; + UINT64 R64; + UINT128 P128, R128; + UINT192 P192, R192; + UINT256 R256; + int is_midpoint_lt_even = 0; + int is_midpoint_gt_even = 0; + int is_inexact_lt_midpoint = 0; + int is_inexact_gt_midpoint = 0; + int is_midpoint_lt_even0 = 0; + int is_midpoint_gt_even0 = 0; + int is_inexact_lt_midpoint0 = 0; + int is_inexact_gt_midpoint0 = 0; + int incr_exp = 0; + int is_tiny = 0; + int lt_half_ulp = 0; + int eq_half_ulp = 0; + // int gt_half_ulp = 0; + UINT128 res = *ptrres; + + // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66 + scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this + // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1 + + // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for + // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6)) + if (scale == 0) { + R256.w[3] = 0x0ull; + R256.w[2] = 0x0ull; + R256.w[1] = C3.w[1]; + R256.w[0] = C3.w[0]; + } else if (scale <= 19) { // 10^scale fits in 64 bits + P128.w[1] = 0; + P128.w[0] = ten2k64[scale]; + __mul_128x128_to_256 (R256, P128, C3); + } else if (scale <= 38) { // 10^scale fits in 128 bits + __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3); + } else if (scale <= 57) { // 39 <= scale <= 57 + // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits + // (10^67 has 223 bits; 10^69 has 230 bits); + // must split the computation: + // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 + // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty + // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits + __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3); + // now multiply R128 by 10^38 + __mul_128x128_to_256 (R256, R128, ten2k128[18]); + } else { // 58 <= scale <= 66 + // 10^scale takes between 193 and 220 bits, + // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) + // must split the computation: + // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 + // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty + // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits + // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because + // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 + __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]); + // now calculate 10*38 * 10^(scale-38) * C3 + __mul_128x128_to_256 (R256, R128, ten2k128[18]); + } + // C3 * 10^scale is now in R256 + + // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least + // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is + // possible + // add/subtract C4 and C3 * 10^scale; the exponent is e4 + if (p_sign == z_sign) { // R256 = C4 + R256 + // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact, + // but may require rounding + add256 (C4, R256, &R256); + } else { // if (p_sign != z_sign) { // R256 = C4 - R256 + // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or + // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact, + // but may require rounding + + // compare first R256 = C3 * 10^scale and C4 + if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || + (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || + (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && + R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 + // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact, + // but may require rounding + sub256 (R256, C4, &R256); + // flip p_sign too, because the result has the sign of z + p_sign = z_sign; + } else { // if C4 > C3 * 10^scale + // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact, + // but may require rounding + sub256 (C4, R256, &R256); + } + // if the result is pure zero, the sign depends on the rounding mode + // (x*y and z had opposite signs) + if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull && + R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) { + if (rnd_mode != ROUNDING_DOWN) + p_sign = 0x0000000000000000ull; + else + p_sign = 0x8000000000000000ull; + // the exponent is max (e4, expmin) + if (e4 < -6176) + e4 = expmin; + // assemble result + res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49); + res.w[0] = 0x0; + *ptrres = res; + return; + } + } + + // determine the number of decimal digits in R256 + ind = nr_digits256 (R256); + + // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; + // round to the destination precision, with unbounded exponent + + if (ind <= p34) { + // result rounded to the destination precision with unbounded exponent + // is exact + if (ind + e4 < p34 + expmin) { + is_tiny = 1; // applies to all rounding modes + } + res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1]; + res.w[0] = R256.w[0]; + // Note: res is correct only if expmin <= e4 <= expmax + } else { // if (ind > p34) + // if more than P digits, round to nearest to P digits + // round R256 to p34 digits + x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 + if (ind <= 38) { + P128.w[1] = R256.w[1]; + P128.w[0] = R256.w[0]; + round128_19_38 (ind, x0, P128, &R128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + } else if (ind <= 57) { + P192.w[2] = R256.w[2]; + P192.w[1] = R256.w[1]; + P192.w[0] = R256.w[0]; + round192_39_57 (ind, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + R128.w[1] = R192.w[1]; + R128.w[0] = R192.w[0]; + } else { // if (ind <= 68) + round256_58_76 (ind, x0, R256, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + R128.w[1] = R256.w[1]; + R128.w[0] = R256.w[0]; + } + // the rounded result has p34 = 34 digits + e4 = e4 + x0 + incr_exp; + if (rnd_mode == ROUNDING_TO_NEAREST) { + if (e4 < expmin) { + is_tiny = 1; // for other rounding modes apply correction + } + } else { + // for RM, RP, RZ, RA apply correction in order to determine tininess + // but do not save the result; apply the correction to + // (-1)^p_sign * significand * 10^0 + P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; + P128.w[0] = R128.w[0]; + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, is_midpoint_lt_even, + is_midpoint_gt_even, 0, &P128, ptrfpsf); + scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 + // the number of digits in the significand is p34 = 34 + if (e4 + scale < expmin) { + is_tiny = 1; + } + } + ind = p34; // the number of decimal digits in the signifcand of res + res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN + res.w[0] = R128.w[0]; + // Note: res is correct only if expmin <= e4 <= expmax + // set the inexact flag after rounding with bounded exponent, if any + } + // at this point we have the result rounded with unbounded exponent in + // res and we know its tininess: + // res = (-1)^p_sign * significand * 10^e4, + // where q (significand) = ind <= p34 + // Note: res is correct only if expmin <= e4 <= expmax + + // check for overflow if RN + if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { + res.w[1] = p_sign | 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + *ptrres = res; + *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + return; // BID_RETURN (res) + } // else not overflow or not RN, so continue + + // if (e4 >= expmin) we have the result rounded with bounded exponent + if (e4 < expmin) { + x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res + // where the result rounded [at most] once is + // (-1)^p_sign * significand_res * 10^e4 + + // avoid double rounding error + is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; + is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; + is_midpoint_lt_even0 = is_midpoint_lt_even; + is_midpoint_gt_even0 = is_midpoint_gt_even; + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + + if (x0 > ind) { + // nothing is left of res when moving the decimal point left x0 digits + is_inexact_lt_midpoint = 1; + res.w[1] = p_sign | 0x0000000000000000ull; + res.w[0] = 0x0000000000000000ull; + e4 = expmin; + } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 + // this is <, =, or > 1/2 ulp + // compare the ind-digit value in the significand of res with + // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is + // less than, equal to, or greater than 1/2 ulp (significand of res) + R128.w[1] = res.w[1] & MASK_COEFF; + R128.w[0] = res.w[0]; + if (ind <= 19) { + if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp + lt_half_ulp = 1; + is_inexact_lt_midpoint = 1; + } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp + eq_half_ulp = 1; + is_midpoint_gt_even = 1; + } else { // > 1/2 ulp + // gt_half_ulp = 1; + is_inexact_gt_midpoint = 1; + } + } else { // if (ind <= 38) { + if (R128.w[1] < midpoint128[ind - 20].w[1] || + (R128.w[1] == midpoint128[ind - 20].w[1] && + R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp + lt_half_ulp = 1; + is_inexact_lt_midpoint = 1; + } else if (R128.w[1] == midpoint128[ind - 20].w[1] && + R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp + eq_half_ulp = 1; + is_midpoint_gt_even = 1; + } else { // > 1/2 ulp + // gt_half_ulp = 1; + is_inexact_gt_midpoint = 1; + } + } + if (lt_half_ulp || eq_half_ulp) { + // res = +0.0 * 10^expmin + res.w[1] = 0x0000000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // if (gt_half_ulp) + // res = +1 * 10^expmin + res.w[1] = 0x0000000000000000ull; + res.w[0] = 0x0000000000000001ull; + } + res.w[1] = p_sign | res.w[1]; + e4 = expmin; + } else { // if (1 <= x0 <= ind - 1 <= 33) + // round the ind-digit result to ind - x0 digits + + if (ind <= 18) { // 2 <= ind <= 18 + round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + res.w[1] = 0x0; + res.w[0] = R64; + } else if (ind <= 38) { + P128.w[1] = res.w[1] & MASK_COEFF; + P128.w[0] = res.w[0]; + round128_19_38 (ind, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + } + e4 = e4 + x0; // expmin + // we want the exponent to be expmin, so if incr_exp = 1 then + // multiply the rounded result by 10 - it will still fit in 113 bits + if (incr_exp) { + // 64 x 128 -> 128 + P128.w[1] = res.w[1] & MASK_COEFF; + P128.w[0] = res.w[0]; + __mul_64x128_to_128 (res, ten2k64[1], P128); + } + res.w[1] = + p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res.w[0]--; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + // Note: a double rounding error upward is not possible; for this + // the result after the first rounding would have to be 99...95 + // (35 digits in all), possibly followed by a number of zeros; this + // is not possible in Cases (2)-(6) or (15)-(17) which may get here + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res.w[0]++; + if (res.w[0] == 0) + res.w[1]++; + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // if this second rounding was exact the result may still be + // inexact because of the first rounding + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + } + } + // res contains the correct result + // apply correction if not rounding to nearest + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, ptrfpsf); + } + if (is_midpoint_lt_even || is_midpoint_gt_even || + is_inexact_lt_midpoint || is_inexact_gt_midpoint) { + // set the inexact flag + *ptrfpsf |= INEXACT_EXCEPTION; + if (is_tiny) + *ptrfpsf |= UNDERFLOW_EXCEPTION; + } + + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + *ptrres = res; + return; +} + + +#if DECIMAL_CALL_BY_REFERENCE +static void +bid128_ext_fma (int *ptr_is_midpoint_lt_even, + int *ptr_is_midpoint_gt_even, + int *ptr_is_inexact_lt_midpoint, + int *ptr_is_inexact_gt_midpoint, UINT128 * pres, + UINT128 * px, UINT128 * py, + UINT128 * + pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT128 x = *px, y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +static UINT128 +bid128_ext_fma (int *ptr_is_midpoint_lt_even, + int *ptr_is_midpoint_gt_even, + int *ptr_is_inexact_lt_midpoint, + int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y, + UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM + _EXC_MASKS_PARAM _EXC_INFO_PARAM) { +#endif + + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign; + UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; + int true_p_exp; + UINT128 C1, C2, C3; + UINT256 C4; + int q1 = 0, q2 = 0, q3 = 0, q4; + int e1, e2, e3, e4; + int scale, ind, delta, x0; + int p34 = P34; // used to modify the limit on the number of digits + BID_UI64DOUBLE tmp; + int x_nr_bits, y_nr_bits, z_nr_bits; + unsigned int save_fpsf; + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; + int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0; + int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; + int incr_exp = 0; + int lsb; + int lt_half_ulp = 0; + int eq_half_ulp = 0; + int gt_half_ulp = 0; + int is_tiny = 0; + UINT64 R64, tmp64; + UINT128 P128, R128; + UINT192 P192, R192; + UINT256 R256; + + // the following are based on the table of special cases for fma; the NaN + // behavior is similar to that of the IA-64 Architecture fma + + // identify cases where at least one operand is NaN + + BID_SWAP128 (x); + BID_SWAP128 (y); + BID_SWAP128 (z); + if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN + // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) + // check first for non-canonical NaN payload + if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (y.w[0] > 0x38c15b09ffffffffull))) { + y.w[1] = y.w[1] & 0xffffc00000000000ull; + y.w[0] = 0x0ull; + } + if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (y) + res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = y.w[0]; + } else { // y is QNaN + // return y + res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = y.w[0]; + // if z = SNaN or x = SNaN signal invalid exception + if ((z.w[1] & MASK_SNAN) == MASK_SNAN || + (x.w[1] & MASK_SNAN) == MASK_SNAN) { + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + } + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN + // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) + // check first for non-canonical NaN payload + if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (z.w[0] > 0x38c15b09ffffffffull))) { + z.w[1] = z.w[1] & 0xffffc00000000000ull; + z.w[0] = 0x0ull; + } + if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (z) + res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = z.w[0]; + } else { // z is QNaN + // return z + res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = z.w[0]; + // if x = SNaN signal invalid exception + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + } + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check + // for non-canonical values + + x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + C1.w[1] = x.w[1] & MASK_COEFF; + C1.w[0] = x.w[0]; + if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf + // if x is not infinity check for non-canonical values - treated as zero + if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low + } else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull && + C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } + } + } + y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + C2.w[1] = y.w[1] & MASK_COEFF; + C2.w[0] = y.w[0]; + if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf + // if y is not infinity check for non-canonical values - treated as zero + if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C2.w[1] = 0; // significand high + C2.w[0] = 0; // significand low + } else { // G0_G1 != 11 + y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C2.w[1] > 0x0001ed09bead87c0ull || + (C2.w[1] == 0x0001ed09bead87c0ull && + C2.w[0] > 0x378d8e63ffffffffull)) { + // y is non-canonical if coefficient is larger than 10^34 -1 + C2.w[1] = 0; + C2.w[0] = 0; + } else { // canonical + ; + } + } + } + z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + C3.w[1] = z.w[1] & MASK_COEFF; + C3.w[0] = z.w[0]; + if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf + // if z is not infinity check for non-canonical values - treated as zero + if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C3.w[1] = 0; // significand high + C3.w[0] = 0; // significand low + } else { // G0_G1 != 11 + z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C3.w[1] > 0x0001ed09bead87c0ull || + (C3.w[1] == 0x0001ed09bead87c0ull && + C3.w[0] > 0x378d8e63ffffffffull)) { + // z is non-canonical if coefficient is larger than 10^34 -1 + C3.w[1] = 0; + C3.w[0] = 0; + } else { // canonical + ; + } + } + } + + p_sign = x_sign ^ y_sign; // sign of the product + + // identify cases where at least one operand is infinity + + if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf + if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf + if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf + if (p_sign == z_sign) { + res.w[1] = z_sign | MASK_INF; + res.w[0] = 0x0; + } else { + // return QNaN Indefinite + res.w[1] = 0x7c00000000000000ull; + res.w[0] = 0x0000000000000000ull; + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + } + } else { // z = 0 or z = f + res.w[1] = p_sign | MASK_INF; + res.w[0] = 0x0; + } + } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f + if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf + if (p_sign == z_sign) { + res.w[1] = z_sign | MASK_INF; + res.w[0] = 0x0; + } else { + // return QNaN Indefinite + res.w[1] = 0x7c00000000000000ull; + res.w[0] = 0x0000000000000000ull; + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + } + } else { // z = 0 or z = f + res.w[1] = p_sign | MASK_INF; + res.w[0] = 0x0; + } + } else { // y = 0 + // return QNaN Indefinite + res.w[1] = 0x7c00000000000000ull; + res.w[0] = 0x0000000000000000ull; + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf + if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf + // x = f, necessarily + if ((p_sign != z_sign) + || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) { + // return QNaN Indefinite + res.w[1] = 0x7c00000000000000ull; + res.w[0] = 0x0000000000000000ull; + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + } else { + res.w[1] = z_sign | MASK_INF; + res.w[0] = 0x0; + } + } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 + // z = 0, f, inf + // return QNaN Indefinite + res.w[1] = 0x7c00000000000000ull; + res.w[0] = 0x0000000000000000ull; + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + } else { + // x = f and z = 0, f, necessarily + res.w[1] = p_sign | MASK_INF; + res.w[0] = 0x0; + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf + // x = 0, f and y = 0, f, necessarily + res.w[1] = z_sign | MASK_INF; + res.w[0] = 0x0; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + + true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; + if (true_p_exp < -6176) + p_exp = 0; // cannot be less than EXP_MIN + else + p_exp = (UINT64) (true_p_exp + 6176) << 49; + + if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 + // the result is 0 + if (p_exp < z_exp) + res.w[1] = p_exp; // preferred exponent + else + res.w[1] = z_exp; // preferred exponent + if (p_sign == z_sign) { + res.w[1] |= z_sign; + res.w[0] = 0x0; + } else { // x * y and z have opposite signs + if (rnd_mode == ROUNDING_DOWN) { + // res = -0.0 + res.w[1] |= MASK_SIGN; + res.w[0] = 0x0; + } else { + // res = +0.0 + // res.w[1] |= 0x0; + res.w[0] = 0x0; + } + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + // from this point on, we may need to know the number of decimal digits + // in the significands of x, y, z when x, y, z != 0 + + if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite) + // q1 = nr. of decimal digits in x + // determine first the nr. of bits in x + if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + q1 = nr_digits[x_nr_bits - 1].digits; + if (q1 == 0) { + q1 = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q1++; + } + } + + if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite) + if (C2.w[1] == 0) { + if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32 + tmp.d = (double) (C2.w[0] >> 32); // exact conversion + y_nr_bits = + 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // y < 2^32 + tmp.d = (double) C2.w[0]; // exact conversion + y_nr_bits = + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if y < 2^53 + tmp.d = (double) C2.w[0]; // exact conversion + y_nr_bits = + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) + tmp.d = (double) C2.w[1]; // exact conversion + y_nr_bits = + 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + + q2 = nr_digits[y_nr_bits].digits; + if (q2 == 0) { + q2 = nr_digits[y_nr_bits].digits1; + if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi || + (C2.w[1] == nr_digits[y_nr_bits].threshold_hi && + C2.w[0] >= nr_digits[y_nr_bits].threshold_lo)) + q2++; + } + } + + if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite) + if (C3.w[1] == 0) { + if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32 + tmp.d = (double) (C3.w[0] >> 32); // exact conversion + z_nr_bits = + 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // z < 2^32 + tmp.d = (double) C3.w[0]; // exact conversion + z_nr_bits = + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if z < 2^53 + tmp.d = (double) C3.w[0]; // exact conversion + z_nr_bits = + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1]) + tmp.d = (double) C3.w[1]; // exact conversion + z_nr_bits = + 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + + q3 = nr_digits[z_nr_bits].digits; + if (q3 == 0) { + q3 = nr_digits[z_nr_bits].digits1; + if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi || + (C3.w[1] == nr_digits[z_nr_bits].threshold_hi && + C3.w[0] >= nr_digits[z_nr_bits].threshold_lo)) + q3++; + } + } + + if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || + (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { + // x = 0 or y = 0 + // z = f, necessarily; for 0 + z return z, with the preferred exponent + // the result is z, but need to get the preferred exponent + if (z_exp <= p_exp) { // the preferred exponent is z_exp + res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1]; + res.w[0] = C3.w[0]; + } else { // if (p_exp < z_exp) the preferred exponent is p_exp + // return (C3 * 10^scale) * 10^(z_exp - scale) + // where scale = min (p34-q3, (z_exp-p_exp) >> 49) + scale = p34 - q3; + ind = (z_exp - p_exp) >> 49; + if (ind < scale) + scale = ind; + if (scale == 0) { + res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant + res.w[0] = z.w[0]; + } else if (q3 <= 19) { // z fits in 64 bits + if (scale <= 19) { // 10^scale fits in 64 bits + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); + } else { // 10^scale fits in 128 bits + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); + } + } else { // z fits in 128 bits, but 10^scale must fit in 64 bits + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); + } + // subtract scale from the exponent + z_exp = z_exp - ((UINT64) scale << 49); + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } else { + ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f + } + + e1 = (x_exp >> 49) - 6176; // unbiased exponent of x + e2 = (y_exp >> 49) - 6176; // unbiased exponent of y + e3 = (z_exp >> 49) - 6176; // unbiased exponent of z + e4 = e1 + e2; // unbiased exponent of the exact x * y + + // calculate C1 * C2 and its number of decimal digits, q4 + + // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits + // where 2 <= q1 + q2 <= 68 + // calculate C4 = C1 * C2 and determine q + C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0; + if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits + C4.w[0] = C1.w[0] * C2.w[0]; + // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 + if (C4.w[0] < ten2k64[q1 + q2 - 1]) + q4 = q1 + q2 - 1; // q4 in [1, 18] + else + q4 = q1 + q2; // q4 in [2, 19] + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; + } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits + // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits + __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); + // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 + if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1 + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; + q4 = 19; // 19 = q1 + q2 - 1 + } else { + // if (C4.w[1] == 0) + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; + // else + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; + q4 = 20; // 20 = q1 + q2 + } + } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 + // C4 = C1 * C2 fits in 64 or 128 bits + // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits) + // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits + if (q1 <= 19) { + __mul_128x64_to_128 (C4, C1.w[0], C2); + } else { // q2 <= 19 + __mul_128x64_to_128 (C4, C2.w[0], C1); + } + // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 + if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] || + (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] && + C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) { + // if (C4.w[1] == 0) // q4 = 20, necessarily + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; + // else + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; + q4 = q1 + q2 - 1; // q4 in [20, 37] + } else { + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; + q4 = q1 + q2; // q4 in [21, 38] + } + } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits + // both C1 and C2 fit in 128 bits (actually in 113 bits) + // may replace this by 128x128_to192 + __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 + // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 + if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] || + (C4.w[1] == ten2k128[18].w[1] + && C4.w[0] < ten2k128[18].w[0]))) { + // 18 = 38 - 20 = q1+q2-1 - 20 + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; + q4 = 38; // 38 = q1 + q2 - 1 + } else { + // if (C4.w[2] == 0) + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; + // else + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; + q4 = 39; // 39 = q1 + q2 + } + } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 + // C4 = C1 * C2 fits in 128 or 192 bits + // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits) + // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one + // may fit in 64 bits + if (C1.w[1] == 0) { // C1 fits in 64 bits + // __mul_64x128_full (REShi64, RESlo128, A64, B128) + __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); + } else if (C2.w[1] == 0) { // C2 fits in 64 bits + // __mul_64x128_full (REShi64, RESlo128, A64, B128) + __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); + } else { // both C1 and C2 require 128 bits + // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); + __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 + } + // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 + if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || + (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && + (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || + (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && + C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) { + // if (C4.w[2] == 0) // q4 = 39, necessarily + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; + // else + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; + q4 = q1 + q2 - 1; // q4 in [39, 56] + } else { + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; + q4 = q1 + q2; // q4 in [40, 57] + } + } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits + // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one + // may fit in 64 bits + if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits + __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192 + } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits + __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192 + } else { // C1 * C2 will fit in 192 bits or in 256 bits + __mul_128x128_to_256 (C4, C1, C2); + } + // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 + if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] || + (C4.w[2] == ten2k256[18].w[2] + && (C4.w[1] < ten2k256[18].w[1] + || (C4.w[1] == ten2k256[18].w[1] + && C4.w[0] < ten2k256[18].w[0]))))) { + // 18 = 57 - 39 = q1+q2-1 - 39 + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; + q4 = 57; // 57 = q1 + q2 - 1 + } else { + // if (C4.w[3] == 0) + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; + // else + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; + q4 = 58; // 58 = q1 + q2 + } + } else { // if 59 <= q1 + q2 <= 68 + // C4 = C1 * C2 fits in 192 or 256 bits + // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits) + // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in + // 64 bits + // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); + __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 + // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 + if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] || + (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] && + (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || + (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && + (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || + (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && + C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) { + // if (C4.w[3] == 0) // q4 = 58, necessarily + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; + // else + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; + q4 = q1 + q2 - 1; // q4 in [58, 67] + } else { + // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; + q4 = q1 + q2; // q4 in [59, 68] + } + } + + if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0 + save_fpsf = *pfpsf; // sticky bits - caller value must be preserved + *pfpsf = 0; + + if (q4 > p34) { + + // truncate C4 to p34 digits into res + // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 + x0 = q4 - p34; + if (q4 <= 38) { + P128.w[1] = C4.w[1]; + P128.w[0] = C4.w[0]; + round128_19_38 (q4, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + } else if (q4 <= 57) { // 35 <= q4 <= 57 + P192.w[2] = C4.w[2]; + P192.w[1] = C4.w[1]; + P192.w[0] = C4.w[0]; + round192_39_57 (q4, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + res.w[0] = R192.w[0]; + res.w[1] = R192.w[1]; + } else { // if (q4 <= 68) + round256_58_76 (q4, x0, C4, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + res.w[0] = R256.w[0]; + res.w[1] = R256.w[1]; + } + e4 = e4 + x0; + if (incr_exp) { + e4 = e4 + 1; + } + q4 = p34; + // res is now the coefficient of the result rounded to the destination + // precision, with unbounded exponent; the exponent is e4; q4=digits(res) + } else { // if (q4 <= p34) + // C4 * 10^e4 is the result rounded to the destination precision, with + // unbounded exponent (which is exact) + + if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) { + // e4 is too large, but can be brought within range by scaling up C4 + scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2 + // res = (C4 * 10^scale) * 10^expmax + if (q4 <= 19) { // C4 fits in 64 bits + if (scale <= 19) { // 10^scale fits in 64 bits + // 64 x 64 C4.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]); + } else { // 10^scale fits in 128 bits + // 64 x 128 C4.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]); + } + } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits + // 64 x 128 ten2k64[scale] * CC43 + __mul_128x64_to_128 (res, ten2k64[scale], C4); + } + e4 = e4 - scale; // expmax + q4 = q4 + scale; + } else { + res.w[1] = C4.w[1]; + res.w[0] = C4.w[0]; + } + // res is the coefficient of the result rounded to the destination + // precision, with unbounded exponent (it has q4 digits); the exponent + // is e4 (exact result) + } + + // check for overflow + if (q4 + e4 > p34 + expmax) { + if (rnd_mode == ROUNDING_TO_NEAREST) { + res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf + res.w[0] = 0x0000000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + } else { + res.w[1] = p_sign | res.w[1]; + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); + } + *pfpsf |= save_fpsf; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + // check for underflow + if (q4 + e4 < expmin + P34) { + is_tiny = 1; // the result is tiny + if (e4 < expmin) { + // if e4 < expmin, we must truncate more of res + x0 = expmin - e4; // x0 >= 1 + is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; + is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; + is_midpoint_lt_even0 = is_midpoint_lt_even; + is_midpoint_gt_even0 = is_midpoint_gt_even; + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + // the number of decimal digits in res is q4 + if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits + if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 + round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + if (incr_exp) { + // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 + R64 = ten2k64[q4 - x0]; + } + // res.w[1] = 0; (from above) + res.w[0] = R64; + } else { // if (q4 <= 34) + // 19 <= q4 <= 38 + P128.w[1] = res.w[1]; + P128.w[0] = res.w[0]; + round128_19_38 (q4, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + if (incr_exp) { + // increase coefficient by a factor of 10; this will be <= 10^33 + // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 + if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 + // res.w[1] = 0; + res.w[0] = ten2k64[q4 - x0]; + } else { // 20 <= q4 - x0 <= 37 + res.w[0] = ten2k128[q4 - x0 - 20].w[0]; + res.w[1] = ten2k128[q4 - x0 - 20].w[1]; + } + } + } + e4 = e4 + x0; // expmin + } else if (x0 == q4) { + // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin + // determine relationship with 1/2 ulp + if (q4 <= 19) { + if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp + lt_half_ulp = 1; + is_inexact_lt_midpoint = 1; + } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp + eq_half_ulp = 1; + is_midpoint_gt_even = 1; + } else { // > 1/2 ulp + // gt_half_ulp = 1; + is_inexact_gt_midpoint = 1; + } + } else { // if (q4 <= 34) + if (res.w[1] < midpoint128[q4 - 20].w[1] || + (res.w[1] == midpoint128[q4 - 20].w[1] && + res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp + lt_half_ulp = 1; + is_inexact_lt_midpoint = 1; + } else if (res.w[1] == midpoint128[q4 - 20].w[1] && + res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp + eq_half_ulp = 1; + is_midpoint_gt_even = 1; + } else { // > 1/2 ulp + // gt_half_ulp = 1; + is_inexact_gt_midpoint = 1; + } + } + if (lt_half_ulp || eq_half_ulp) { + // res = +0.0 * 10^expmin + res.w[1] = 0x0000000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // if (gt_half_ulp) + // res = +1 * 10^expmin + res.w[1] = 0x0000000000000000ull; + res.w[0] = 0x0000000000000001ull; + } + e4 = expmin; + } else { // if (x0 > q4) + // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin + res.w[1] = 0; + res.w[0] = 0; + e4 = expmin; + is_inexact_lt_midpoint = 1; + } + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res.w[0]--; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + // Note: a double rounding error upward is not possible; for this + // the result after the first rounding would have to be 99...95 + // (35 digits in all), possibly followed by a number of zeros; this + // not possible for f * f + 0 + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res.w[0]++; + if (res.w[0] == 0) + res.w[1]++; + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // if this second rounding was exact the result may still be + // inexact because of the first rounding + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + } else { // if e4 >= emin then q4 < P and the result is tiny and exact + if (e3 < e4) { + // if (e3 < e4) the preferred exponent is e3 + // return (C4 * 10^scale) * 10^(e4 - scale) + // where scale = min (p34-q4, (e4 - e3)) + scale = p34 - q4; + ind = e4 - e3; + if (ind < scale) + scale = ind; + if (scale == 0) { + ; // res and e4 are unchanged + } else if (q4 <= 19) { // C4 fits in 64 bits + if (scale <= 19) { // 10^scale fits in 64 bits + // 64 x 64 res.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]); + } else { // 10^scale fits in 128 bits + // 64 x 128 res.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]); + } + } else { // res fits in 128 bits, but 10^scale must fit in 64 bits + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], res); + } + // subtract scale from the exponent + e4 = e4 - scale; + } + } + + // check for inexact result + if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even) { + // set the inexact flag and the underflow flag + *pfpsf |= INEXACT_EXCEPTION; + *pfpsf |= UNDERFLOW_EXCEPTION; + } + res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); + } + *pfpsf |= save_fpsf; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + // no overflow, and no underflow for rounding to nearest + res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; + + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); + // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ) + if (e4 == expmin) { + if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || + ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && + res.w[0] < 0x38c15b0a00000000ull)) { + is_tiny = 1; + } + } + } + + if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + if (is_tiny) + *pfpsf |= UNDERFLOW_EXCEPTION; + } + + if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact + // need to ensure that the result has the preferred exponent + p_exp = res.w[1] & MASK_EXP; + if (z_exp < p_exp) { // the preferred exponent is z_exp + // signficand of res in C3 + C3.w[1] = res.w[1] & MASK_COEFF; + C3.w[0] = res.w[0]; + // the number of decimal digits of x * y is q4 <= 34 + // Note: the coefficient fits in 128 bits + + // return (C3 * 10^scale) * 10^(p_exp - scale) + // where scale = min (p34-q4, (p_exp-z_exp) >> 49) + scale = p34 - q4; + ind = (p_exp - z_exp) >> 49; + if (ind < scale) + scale = ind; + // subtract scale from the exponent + p_exp = p_exp - ((UINT64) scale << 49); + if (scale == 0) { + ; // leave res unchanged + } else if (q4 <= 19) { // x * y fits in 64 bits + if (scale <= 19) { // 10^scale fits in 64 bits + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); + } else { // 10^scale fits in 128 bits + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); + } + res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; + } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); + res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; + } + } // else leave the result as it is, because p_exp <= z_exp + } + *pfpsf |= save_fpsf; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } // else we have f * f + f + + // continue with x = f, y = f, z = f + + delta = q3 + e3 - q4 - e4; +delta_ge_zero: + if (delta >= 0) { + + if (p34 <= delta - 1 || // Case (1') + (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) + // check for overflow, which can occur only in Case (1') + if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { + // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary + // condition for (q3 + e3) > (p34 + expmax) + if (rnd_mode == ROUNDING_TO_NEAREST) { + res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf + res.w[0] = 0x0000000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + } else { + if (p_sign == z_sign) { + is_inexact_lt_midpoint = 1; + } else { + is_inexact_gt_midpoint = 1; + } + // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3) + scale = p34 - q3; + if (scale == 0) { + res.w[1] = z_sign | C3.w[1]; + res.w[0] = C3.w[0]; + } else { + if (q3 <= 19) { // C3 fits in 64 bits + if (scale <= 19) { // 10^scale fits in 64 bits + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); + } else { // 10^scale fits in 128 bits + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], + ten2k128[scale - 20]); + } + } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); + } + // the coefficient in res has q3 + scale = p34 digits + } + e3 = e3 - scale; + res.w[1] = z_sign | res.w[1]; + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e3, &res, pfpsf); + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + // res = z + if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3) + // return (C3 * 10^scale) * 10^(z_exp - scale) + // where scale = min (p34-q3, z_exp-EMIN) + scale = p34 - q3; + ind = e3 + 6176; + if (ind < scale) + scale = ind; + if (scale == 0) { + res.w[1] = C3.w[1]; + res.w[0] = C3.w[0]; + } else if (q3 <= 19) { // z fits in 64 bits + if (scale <= 19) { // 10^scale fits in 64 bits + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); + } else { // 10^scale fits in 128 bits + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); + } + } else { // z fits in 128 bits, but 10^scale must fit in 64 bits + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); + } + // the coefficient in res has q3 + scale digits + // subtract scale from the exponent + z_exp = z_exp - ((UINT64) scale << 49); + e3 = e3 - scale; + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + if (scale + q3 < p34) + *pfpsf |= UNDERFLOW_EXCEPTION; + } else { + scale = 0; + res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1]; + res.w[0] = C3.w[0]; + } + + // use the following to avoid double rounding errors when operating on + // mixed formats in rounding to nearest, and for correcting the result + // if not rounding to nearest + if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { + // there is a gap of exactly one digit between the scaled C3 and C4 + // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case + if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) || + (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) || + (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] || + C3.w[0] != ten2k128[q3 - 21].w[0]))) { + // C3 * 10^ scale != 10^(q3-1) + // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || + // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 + is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value + } else { // if C3 * 10^scale = 10^(q3+scale-1) + // ok from above e3 = (z_exp >> 49) - 6176; + // the result is always inexact + if (q4 == 1) { + R64 = C4.w[0]; + } else { + // if q4 > 1 then truncate C4 from q4 digits to 1 digit; + // x = q4-1, 1 <= x <= 67 and check if this operation is exact + if (q4 <= 18) { // 2 <= q4 <= 18 + round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + } else if (q4 <= 38) { + P128.w[1] = C4.w[1]; + P128.w[0] = C4.w[0]; + round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R64 = R128.w[0]; // one decimal digit + } else if (q4 <= 57) { + P192.w[2] = C4.w[2]; + P192.w[1] = C4.w[1]; + P192.w[0] = C4.w[0]; + round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R64 = R192.w[0]; // one decimal digit + } else { // if (q4 <= 68) + round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R64 = R256.w[0]; // one decimal digit + } + if (incr_exp) { + R64 = 10; + } + } + if (q4 == 1 && C4.w[0] == 5) { + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 1; + is_midpoint_gt_even = 0; + } else if ((e3 == expmin) || + R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { + // result does not change + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + // result decremented is 10^(q3+scale) - 1 + if ((q3 + scale) <= 19) { + res.w[1] = 0; + res.w[0] = ten2k64[q3 + scale]; + } else { // if ((q3 + scale + 1) <= 35) + res.w[1] = ten2k128[q3 + scale - 20].w[1]; + res.w[0] = ten2k128[q3 + scale - 20].w[0]; + } + res.w[0] = res.w[0] - 1; // borrow never occurs + z_exp = z_exp - EXP_P1; + e3 = e3 - 1; + res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; + } + if (e3 == expmin) { + if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) { + ; // result not tiny (in round-to-nearest mode) + } else { + *pfpsf |= UNDERFLOW_EXCEPTION; + } + } + } // end 10^(q3+scale-1) + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } else { + if (p_sign == z_sign) { + // if (z_sign), set as if for absolute value + is_inexact_lt_midpoint = 1; + } else { // if (p_sign != z_sign) + // if (z_sign), set as if for absolute value + is_inexact_gt_midpoint = 1; + } + *pfpsf |= INEXACT_EXCEPTION; + } + // the result is always inexact => set the inexact flag + // Determine tininess: + // if (exp > expmin) + // the result is not tiny + // else // if exp = emin + // if (q3 + scale < p34) + // the result is tiny + // else // if (q3 + scale = p34) + // if (C3 * 10^scale > 10^33) + // the result is not tiny + // else // if C3 * 10^scale = 10^33 + // if (xy * z > 0) + // the result is not tiny + // else // if (xy * z < 0) + // if (z > 0) + // if rnd_mode != RP + // the result is tiny + // else // if RP + // the result is not tiny + // else // if (z < 0) + // if rnd_mode != RM + // the result is tiny + // else // if RM + // the result is not tiny + // endif + // endif + // endif + // endif + // endif + // endif + if ((e3 == expmin && (q3 + scale) < p34) || + (e3 == expmin && (q3 + scale) == p34 && + (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high + res.w[0] == 0x38c15b0a00000000ull && // 10^33_low + z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) || + (z_sign && rnd_mode != ROUNDING_DOWN)))) { + *pfpsf |= UNDERFLOW_EXCEPTION; + } + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e3, &res, pfpsf); + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + + } else if (p34 == delta) { // Case (1''B) + + // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3 + // and C3 can be scaled up to p34 digits if needed + + // scale C3 to p34 digits if needed + scale = p34 - q3; // 0 <= scale <= p34 - 1 + if (scale == 0) { + res.w[1] = C3.w[1]; + res.w[0] = C3.w[0]; + } else if (q3 <= 19) { // z fits in 64 bits + if (scale <= 19) { // 10^scale fits in 64 bits + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); + } else { // 10^scale fits in 128 bits + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); + } + } else { // z fits in 128 bits, but 10^scale must fit in 64 bits + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); + } + // subtract scale from the exponent + z_exp = z_exp - ((UINT64) scale << 49); + e3 = e3 - scale; + // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits + + // determine whether x * y is less than, equal to, or greater than + // 1/2 ulp (z) + if (q4 <= 19) { + if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp + lt_half_ulp = 1; + } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp + eq_half_ulp = 1; + } else { // > 1/2 ulp + gt_half_ulp = 1; + } + } else if (q4 <= 38) { + if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] || + (C4.w[1] == midpoint128[q4 - 20].w[1] && + C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp + lt_half_ulp = 1; + } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] && + C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp + eq_half_ulp = 1; + } else { // > 1/2 ulp + gt_half_ulp = 1; + } + } else if (q4 <= 58) { + if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] || + (C4.w[2] == midpoint192[q4 - 39].w[2] && + C4.w[1] < midpoint192[q4 - 39].w[1]) || + (C4.w[2] == midpoint192[q4 - 39].w[2] && + C4.w[1] == midpoint192[q4 - 39].w[1] && + C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp + lt_half_ulp = 1; + } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] && + C4.w[1] == midpoint192[q4 - 39].w[1] && + C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp + eq_half_ulp = 1; + } else { // > 1/2 ulp + gt_half_ulp = 1; + } + } else { + if (C4.w[3] < midpoint256[q4 - 59].w[3] || + (C4.w[3] == midpoint256[q4 - 59].w[3] && + C4.w[2] < midpoint256[q4 - 59].w[2]) || + (C4.w[3] == midpoint256[q4 - 59].w[3] && + C4.w[2] == midpoint256[q4 - 59].w[2] && + C4.w[1] < midpoint256[q4 - 59].w[1]) || + (C4.w[3] == midpoint256[q4 - 59].w[3] && + C4.w[2] == midpoint256[q4 - 59].w[2] && + C4.w[1] == midpoint256[q4 - 59].w[1] && + C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp + lt_half_ulp = 1; + } else if (C4.w[3] == midpoint256[q4 - 59].w[3] && + C4.w[2] == midpoint256[q4 - 59].w[2] && + C4.w[1] == midpoint256[q4 - 59].w[1] && + C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp + eq_half_ulp = 1; + } else { // > 1/2 ulp + gt_half_ulp = 1; + } + } + + if (p_sign == z_sign) { + if (lt_half_ulp) { + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + // use the following to avoid double rounding errors when operating on + // mixed formats in rounding to nearest + is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value + } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { + // add 1 ulp to the significand + res.w[0]++; + if (res.w[0] == 0x0ull) + res.w[1]++; + // check for rounding overflow, when coeff == 10^34 + if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && + res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34 + e3 = e3 + 1; + // coeff = 10^33 + z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP; + res.w[1] = 0x0000314dc6448d93ull; + res.w[0] = 0x38c15b0a00000000ull; + } + // end add 1 ulp + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + if (eq_half_ulp) { + is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value + } else { + is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value + } + } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) + // leave unchanged + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value + } + // the result is always inexact, and never tiny + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + // check for overflow + if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) { + res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf + res.w[0] = 0x0000000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e3, &res, pfpsf); + z_exp = res.w[1] & MASK_EXP; + } + } else { // if (p_sign != z_sign) + // consider two cases, because C3 * 10^scale = 10^33 is a special case + if (res.w[1] != 0x0000314dc6448d93ull || + res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 + if (lt_half_ulp) { + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + // use the following to avoid double rounding errors when operating + // on mixed formats in rounding to nearest + is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value + } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { + // subtract 1 ulp from the significand + res.w[0]--; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + if (eq_half_ulp) { + is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value + } else { + is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value + } + } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) + // leave unchanged + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value + } + // the result is always inexact, and never tiny + // check for overflow for RN + if (e3 > expmax) { + if (rnd_mode == ROUNDING_TO_NEAREST) { + res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf + res.w[0] = 0x0000000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + } else { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, + pfpsf); + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, pfpsf); + } + z_exp = res.w[1] & MASK_EXP; + } else { // if C3 * 10^scale = 10^33 + e3 = (z_exp >> 49) - 6176; + if (e3 > expmin) { + // the result is exact if exp > expmin and C4 = d*10^(q4-1), + // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact + if (q4 == 1) { + // if q4 = 1 the result is exact + // result coefficient = 10^34 - C4 + res.w[1] = 0x0001ed09bead87c0ull; + res.w[0] = 0x378d8e6400000000ull - C4.w[0]; + z_exp = z_exp - EXP_P1; + e3 = e3 - 1; + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + } else { + // if q4 > 1 then truncate C4 from q4 digits to 1 digit; + // x = q4-1, 1 <= x <= 67 and check if this operation is exact + if (q4 <= 18) { // 2 <= q4 <= 18 + round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + } else if (q4 <= 38) { + P128.w[1] = C4.w[1]; + P128.w[0] = C4.w[0]; + round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R64 = R128.w[0]; // one decimal digit + } else if (q4 <= 57) { + P192.w[2] = C4.w[2]; + P192.w[1] = C4.w[1]; + P192.w[0] = C4.w[0]; + round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R64 = R192.w[0]; // one decimal digit + } else { // if (q4 <= 68) + round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, + &is_midpoint_lt_even, + &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R64 = R256.w[0]; // one decimal digit + } + if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // the result is exact: 10^34 - R64 + // incr_exp = 0 with certainty + z_exp = z_exp - EXP_P1; + e3 = e3 - 1; + res.w[1] = + z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; + res.w[0] = 0x378d8e6400000000ull - R64; + } else { + // We want R64 to be the top digit of C4, but we actually + // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, + // because the top digit is (C4 * 10^(-q4+1))RZ + // however, if incr_exp = 1 then R64 = 10 with certainty + if (incr_exp) { + R64 = 10; + } + // the result is inexact as C4 has more than 1 significant digit + // and C3 * 10^scale = 10^33 + // example of case that is treated here: + // 100...0 * 10^e3 - 0.41 * 10^e3 = + // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) + // note that (e3 > expmin} + // in order to round, subtract R64 from 10^34 and then compare + // C4 - R64 * 10^(q4-1) with 1/2 ulp + // calculate 10^34 - R64 + res.w[1] = 0x0001ed09bead87c0ull; + res.w[0] = 0x378d8e6400000000ull - R64; + z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand + // calculate C4 - R64 * 10^(q4-1); this is a rare case and + // R64 is small, 1 <= R64 <= 9 + e3 = e3 - 1; + if (is_inexact_lt_midpoint) { + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + } else if (is_inexact_gt_midpoint) { + is_inexact_gt_midpoint = 0; + is_inexact_lt_midpoint = 1; + } else if (is_midpoint_lt_even) { + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 1; + } else if (is_midpoint_gt_even) { + is_midpoint_gt_even = 0; + is_midpoint_lt_even = 1; + } else { + ; + } + // the result is always inexact, and never tiny + // check for overflow for RN + if (e3 > expmax) { + if (rnd_mode == ROUNDING_TO_NEAREST) { + res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf + res.w[0] = 0x0000000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + } else { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, + pfpsf); + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + res.w[1] = + z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, + pfpsf); + } + z_exp = res.w[1] & MASK_EXP; + } // end result is inexact + } // end q4 > 1 + } else { // if (e3 = emin) + // if e3 = expmin the result is also tiny (the condition for + // tininess is C4 > 050...0 [q4 digits] which is met because + // the msd of C4 is not zero) + // the result is tiny and inexact in all rounding modes; + // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, + // gt_half_ulp to calculate) + // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged + + // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp + if (gt_half_ulp) { // res = 10^33 - 1 + res.w[1] = 0x0000314dc6448d93ull; + res.w[0] = 0x38c15b09ffffffffull; + } else { + res.w[1] = 0x0000314dc6448d93ull; + res.w[0] = 0x38c15b0a00000000ull; + } + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later + + if (eq_half_ulp) { + is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value + } else if (lt_half_ulp) { + is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value + } else { // if (gt_half_ulp) + is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value + } + + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, + is_midpoint_gt_even, e3, &res, + pfpsf); + z_exp = res.w[1] & MASK_EXP; + } + } // end e3 = emin + // set the inexact flag (if the result was not exact) + if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even) + *pfpsf |= INEXACT_EXCEPTION; + } // end 10^33 + } // end if (p_sign != z_sign) + res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + + } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) + (q3 <= delta && delta + q4 <= p34) || // Case (3) + (delta < q3 && p34 < delta + q4) || // Case (4) + (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) + (delta + q4 < q3)) && // Case (6) + !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) + + // the result has the sign of z + + if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) + (delta < q3 && p34 < delta + q4)) { // Case (4) + // round first the sum x * y + z with unbounded exponent + // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, + // 1 <= scale <= 33 + // calculate res = C3 * 10^scale + scale = p34 - q3; + x0 = delta + q4 - p34; + } else if (delta + q4 < q3) { // Case (6) + // make Case (6) look like Case (3) or Case (5) with scale = 0 + // by scaling up C4 by 10^(q3 - delta - q4) + scale = q3 - delta - q4; // 1 <= scale <= 33 + if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits + if (scale <= 19) { // 10^scale fits in 64 bits + // 64 x 64 C4.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]); + } else { // 10^scale fits in 128 bits + // 64 x 128 C4.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]); + } + } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits + // 64 x 128 ten2k64[scale] * C4 + __mul_128x64_to_128 (P128, ten2k64[scale], C4); + } + C4.w[0] = P128.w[0]; + C4.w[1] = P128.w[1]; + // e4 does not need adjustment, as it is not used from this point on + scale = 0; + x0 = 0; + // now Case (6) looks like Case (3) or Case (5) with scale = 0 + } else { // if Case (3) or Case (5) + // Note: Case (3) is similar to Case (2), but scale differs and the + // result is exact, unless it is tiny (so x0 = 0 when calculating the + // result with unbounded exponent) + + // calculate first the sum x * y + z with unbounded exponent (exact) + // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1, + // 1 <= scale <= 33 + // calculate res = C3 * 10^scale + scale = delta + q4 - q3; + x0 = 0; + // Note: the comments which follow refer [mainly] to Case (2)] + } + + case2_repeat: + if (scale == 0) { // this could happen e.g. if we return to case2_repeat + // or in Case (4) + res.w[1] = C3.w[1]; + res.w[0] = C3.w[0]; + } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits + if (scale <= 19) { // 10^scale fits in 64 bits + // 64 x 64 C3.w[0] * ten2k64[scale] + __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); + } else { // 10^scale fits in 128 bits + // 64 x 128 C3.w[0] * ten2k128[scale - 20] + __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); + } + } else { // z fits in 128 bits, but 10^scale must fit in 64 bits + // 64 x 128 ten2k64[scale] * C3 + __mul_128x64_to_128 (res, ten2k64[scale], C3); + } + // e3 is already calculated + e3 = e3 - scale; + // now res = C3 * 10^scale and e3 = e3 - scale + // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat + // because the result was too small + + // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34, + // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67) + // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of + // the rounding fits in 128 bits!) + // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) + // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 + if (x0 == 0) { // this could happen only if we return to case2_repeat, or + // for Case (3) or Case (6) + R128.w[1] = C4.w[1]; + R128.w[0] = C4.w[0]; + } else if (q4 <= 18) { + // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 + round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + if (incr_exp) { + // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 + R64 = ten2k64[q4 - x0]; + } + R128.w[1] = 0; + R128.w[0] = R64; + } else if (q4 <= 38) { + // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 + P128.w[1] = C4.w[1]; + P128.w[0] = C4.w[0]; + round128_19_38 (q4, x0, P128, &R128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + if (incr_exp) { + // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 + if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 + R128.w[0] = ten2k64[q4 - x0]; + // R128.w[1] stays 0 + } else { // 20 <= q4 - x0 <= 37 + R128.w[0] = ten2k128[q4 - x0 - 20].w[0]; + R128.w[1] = ten2k128[q4 - x0 - 20].w[1]; + } + } + } else if (q4 <= 57) { + // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56 + P192.w[2] = C4.w[2]; + P192.w[1] = C4.w[1]; + P192.w[0] = C4.w[0]; + round192_39_57 (q4, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + // R192.w[2] is always 0 + if (incr_exp) { + // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 + if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 + R192.w[0] = ten2k64[q4 - x0]; + // R192.w[1] stays 0 + // R192.w[2] stays 0 + } else { // 20 <= q4 - x0 <= 33 + R192.w[0] = ten2k128[q4 - x0 - 20].w[0]; + R192.w[1] = ten2k128[q4 - x0 - 20].w[1]; + // R192.w[2] stays 0 + } + } + R128.w[1] = R192.w[1]; + R128.w[0] = R192.w[0]; + } else { + // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 + round256_58_76 (q4, x0, C4, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + // R256.w[3] and R256.w[2] are always 0 + if (incr_exp) { + // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 + if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 + R256.w[0] = ten2k64[q4 - x0]; + // R256.w[1] stays 0 + // R256.w[2] stays 0 + // R256.w[3] stays 0 + } else { // 20 <= q4 - x0 <= 33 + R256.w[0] = ten2k128[q4 - x0 - 20].w[0]; + R256.w[1] = ten2k128[q4 - x0 - 20].w[1]; + // R256.w[2] stays 0 + // R256.w[3] stays 0 + } + } + R128.w[1] = R256.w[1]; + R128.w[0] = R256.w[0]; + } + // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4, + // rounded to nearest, which were copied into R128 + if (z_sign == p_sign) { + lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale + // the sum can result in [up to] p34 or p34 + 1 digits + res.w[0] = res.w[0] + R128.w[0]; + res.w[1] = res.w[1] + R128.w[1]; + if (res.w[0] < R128.w[0]) + res.w[1]++; // carry + // if res > 10^34 - 1 need to increase x0 and decrease scale by 1 + if (res.w[1] > 0x0001ed09bead87c0ull || + (res.w[1] == 0x0001ed09bead87c0ull && + res.w[0] > 0x378d8e63ffffffffull)) { + // avoid double rounding error + is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; + is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; + is_midpoint_lt_even0 = is_midpoint_lt_even; + is_midpoint_gt_even0 = is_midpoint_gt_even; + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + P128.w[1] = res.w[1]; + P128.w[0] = res.w[0]; + round128_19_38 (35, 1, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + // incr_exp is 0 with certainty in this case + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res.w[0]--; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + // Note: a double rounding error upward is not possible; for this + // the result after the first rounding would have to be 99...95 + // (35 digits in all), possibly followed by a number of zeros; this + // not possible in Cases (2)-(6) or (15)-(17) which may get here + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res.w[0]++; + if (res.w[0] == 0) + res.w[1]++; + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint + && !is_inexact_gt_midpoint) { + // if this second rounding was exact the result may still be + // inexact because of the first rounding + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 + || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 + || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + // adjust exponent + e3 = e3 + 1; + if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || + is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { + is_inexact_lt_midpoint = 1; + } + } + } else { + // this is the result rounded with unbounded exponent, unless a + // correction is needed + res.w[1] = res.w[1] & MASK_COEFF; + if (lsb == 1) { + if (is_midpoint_gt_even) { + // res = res + 1 + is_midpoint_gt_even = 0; + is_midpoint_lt_even = 1; + res.w[0]++; + if (res.w[0] == 0x0) + res.w[1]++; + // check for rounding overflow + if (res.w[1] == 0x0001ed09bead87c0ull && + res.w[0] == 0x378d8e6400000000ull) { + // res = 10^34 => rounding overflow + res.w[1] = 0x0000314dc6448d93ull; + res.w[0] = 0x38c15b0a00000000ull; // 10^33 + e3++; + } + } else if (is_midpoint_lt_even) { + // res = res - 1 + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 1; + res.w[0]--; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + // if the result is pure zero, the sign depends on the rounding + // mode (x*y and z had opposite signs) + if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { + if (rnd_mode != ROUNDING_DOWN) + z_sign = 0x0000000000000000ull; + else + z_sign = 0x8000000000000000ull; + // the exponent is max (e3, expmin) + res.w[1] = 0x0; + res.w[0] = 0x0; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + } else { + ; + } + } + } + } else { // if (z_sign != p_sign) + lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4 + // used to swap rounding indicators if p_sign != z_sign + // the sum can result in [up to] p34 or p34 - 1 digits + tmp64 = res.w[0]; + res.w[0] = res.w[0] - R128.w[0]; + res.w[1] = res.w[1] - R128.w[1]; + if (res.w[0] > tmp64) + res.w[1]--; // borrow + // if res < 10^33 and exp > expmin need to decrease x0 and + // increase scale by 1 + if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || + (res.w[1] == 0x0000314dc6448d93ull && + res.w[0] < 0x38c15b0a00000000ull)) || + (is_inexact_lt_midpoint + && res.w[1] == 0x0000314dc6448d93ull + && res.w[0] == 0x38c15b0a00000000ull)) + && x0 >= 1) { + x0 = x0 - 1; + // first restore e3, otherwise it will be too small + e3 = e3 + scale; + scale = scale + 1; + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + incr_exp = 0; + goto case2_repeat; + } + // else this is the result rounded with unbounded exponent; + // because the result has opposite sign to that of C4 which was + // rounded, need to change the rounding indicators + if (is_inexact_lt_midpoint) { + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + } else if (is_inexact_gt_midpoint) { + is_inexact_gt_midpoint = 0; + is_inexact_lt_midpoint = 1; + } else if (lsb == 0) { + if (is_midpoint_lt_even) { + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 1; + } else if (is_midpoint_gt_even) { + is_midpoint_gt_even = 0; + is_midpoint_lt_even = 1; + } else { + ; + } + } else if (lsb == 1) { + if (is_midpoint_lt_even) { + // res = res + 1 + res.w[0]++; + if (res.w[0] == 0x0) + res.w[1]++; + // check for rounding overflow + if (res.w[1] == 0x0001ed09bead87c0ull && + res.w[0] == 0x378d8e6400000000ull) { + // res = 10^34 => rounding overflow + res.w[1] = 0x0000314dc6448d93ull; + res.w[0] = 0x38c15b0a00000000ull; // 10^33 + e3++; + } + } else if (is_midpoint_gt_even) { + // res = res - 1 + res.w[0]--; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + // if the result is pure zero, the sign depends on the rounding + // mode (x*y and z had opposite signs) + if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { + if (rnd_mode != ROUNDING_DOWN) + z_sign = 0x0000000000000000ull; + else + z_sign = 0x8000000000000000ull; + // the exponent is max (e3, expmin) + res.w[1] = 0x0; + res.w[0] = 0x0; + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } + } else { + ; + } + } else { + ; + } + } + // check for underflow + if (e3 == expmin) { // and if significand < 10^33 => result is tiny + if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || + ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && + res.w[0] < 0x38c15b0a00000000ull)) { + is_tiny = 1; + } + } else if (e3 < expmin) { + // the result is tiny, so we must truncate more of res + is_tiny = 1; + x0 = expmin - e3; + is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; + is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; + is_midpoint_lt_even0 = is_midpoint_lt_even; + is_midpoint_gt_even0 = is_midpoint_gt_even; + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + // determine the number of decimal digits in res + if (res.w[1] == 0x0) { + // between 1 and 19 digits + for (ind = 1; ind <= 19; ind++) { + if (res.w[0] < ten2k64[ind]) { + break; + } + } + // ind digits + } else if (res.w[1] < ten2k128[0].w[1] || + (res.w[1] == ten2k128[0].w[1] + && res.w[0] < ten2k128[0].w[0])) { + // 20 digits + ind = 20; + } else { // between 21 and 38 digits + for (ind = 1; ind <= 18; ind++) { + if (res.w[1] < ten2k128[ind].w[1] || + (res.w[1] == ten2k128[ind].w[1] && + res.w[0] < ten2k128[ind].w[0])) { + break; + } + } + // ind + 20 digits + ind = ind + 20; + } + + // at this point ind >= x0; because delta >= 2 on this path, the case + // ind = x0 can occur only in Case (2) or case (3), when C3 has one + // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), + // the signs of x * y and z are opposite, and through cancellation + // the most significant decimal digit in res has the weight + // 10^(emin-1); however, it is clear that in this case the most + // significant digit is 9, so the result before rounding is + // 0.9... * 10^emin + // Otherwise, ind > x0 because there are non-zero decimal digits in the + // result with weight of at least 10^emin, and correction for underflow + // can be carried out using the round*_*_2_* () routines + if (x0 == ind) { // the result before rounding is 0.9... * 10^emin + res.w[1] = 0x0; + res.w[0] = 0x1; + is_inexact_gt_midpoint = 1; + } else if (ind <= 18) { // check that 2 <= ind + // 2 <= ind <= 18, 1 <= x0 <= 17 + round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + if (incr_exp) { + // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17 + R64 = ten2k64[ind - x0]; + } + res.w[1] = 0; + res.w[0] = R64; + } else if (ind <= 38) { + // 19 <= ind <= 38 + P128.w[1] = res.w[1]; + P128.w[0] = res.w[0]; + round128_19_38 (ind, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + if (incr_exp) { + // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37 + if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19 + res.w[0] = ten2k64[ind - x0]; + // res.w[1] stays 0 + } else { // 20 <= ind - x0 <= 37 + res.w[0] = ten2k128[ind - x0 - 20].w[0]; + res.w[1] = ten2k128[ind - x0 - 20].w[1]; + } + } + } + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res.w[0]--; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + // Note: a double rounding error upward is not possible; for this + // the result after the first rounding would have to be 99...95 + // (35 digits in all), possibly followed by a number of zeros; this + // not possible in Cases (2)-(6) which may get here + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res.w[0]++; + if (res.w[0] == 0) + res.w[1]++; + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // if this second rounding was exact the result may still be + // inexact because of the first rounding + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + // adjust exponent + e3 = e3 + x0; + if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || + is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { + is_inexact_lt_midpoint = 1; + } + } + } else { + ; // not underflow + } + // check for inexact result + if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + if (is_tiny) + *pfpsf |= UNDERFLOW_EXCEPTION; + } + // now check for significand = 10^34 (may have resulted from going + // back to case2_repeat) + if (res.w[1] == 0x0001ed09bead87c0ull && + res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34 + res.w[1] = 0x0000314dc6448d93ull; // res = 10^33 + res.w[0] = 0x38c15b0a00000000ull; + e3 = e3 + 1; + } + res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; + // check for overflow + if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) { + res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf + res.w[0] = 0x0000000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + } + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e3, &res, pfpsf); + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + + } else { + + // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and + // the signs of x*y and z are opposite; in these cases massive + // cancellation can occur, so it is better to scale either C3 or C4 and + // to perform the subtraction before rounding; rounding is performed + // next, depending on the number of decimal digits in the result and on + // the exponent value + // Note: overlow is not possible in this case + // this is similar to Cases (15), (16), and (17) + + if (delta + q4 < q3) { // from Case (6) + // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and + // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign) + // and call add_and_round; delta stays positive + // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 + P128.w[1] = C3.w[1]; + P128.w[0] = C3.w[0]; + C3.w[1] = C4.w[1]; + C3.w[0] = C4.w[0]; + C4.w[1] = P128.w[1]; + C4.w[0] = P128.w[0]; + ind = q3; + q3 = q4; + q4 = ind; + ind = e3; + e3 = e4; + e4 = ind; + tmp_sign = z_sign; + z_sign = p_sign; + p_sign = tmp_sign; + } else { // from Cases (2), (3), (4), (5) + // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be + // scaled up by q4 + delta - q3; this is the same as in Cases (15), + // (16), and (17) if we just change the sign of delta + delta = -delta; + } + add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, + rnd_mode, &is_midpoint_lt_even, + &is_midpoint_gt_even, &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, pfpsf, &res); + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + + } + + } else { // if delta < 0 + + delta = -delta; + + if (p34 < q4 && q4 <= delta) { // Case (7) + + // truncate C4 to p34 digits into res + // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 + x0 = q4 - p34; + if (q4 <= 38) { + P128.w[1] = C4.w[1]; + P128.w[0] = C4.w[0]; + round128_19_38 (q4, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + } else if (q4 <= 57) { // 35 <= q4 <= 57 + P192.w[2] = C4.w[2]; + P192.w[1] = C4.w[1]; + P192.w[0] = C4.w[0]; + round192_39_57 (q4, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + res.w[0] = R192.w[0]; + res.w[1] = R192.w[1]; + } else { // if (q4 <= 68) + round256_58_76 (q4, x0, C4, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + res.w[0] = R256.w[0]; + res.w[1] = R256.w[1]; + } + e4 = e4 + x0; + if (incr_exp) { + e4 = e4 + 1; + } + if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // if C4 rounded to p34 digits is exact then the result is inexact, + // in a way that depends on the signs of x * y and z + if (p_sign == z_sign) { + is_inexact_lt_midpoint = 1; + } else { // if (p_sign != z_sign) + if (res.w[1] != 0x0000314dc6448d93ull || + res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33 + is_inexact_gt_midpoint = 1; + } else { // res = 10^33 and exact is a special case + // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1 + // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1 + // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1 + // Note: ulp is really ulp/10 (after borrow which propagates to msd) + if (delta > p34 + 1) { // C3 < 1/2 + // res = 10^33, unchanged + is_inexact_gt_midpoint = 1; + } else { // if (delta == p34 + 1) + if (q3 <= 19) { + if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp + // res = 10^33, unchanged + is_inexact_gt_midpoint = 1; + } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp + // res = 10^33, unchanged + is_midpoint_lt_even = 1; + } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp + res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 + res.w[0] = 0x378d8e63ffffffffull; + e4 = e4 - 1; + is_inexact_lt_midpoint = 1; + } + } else { // if (20 <= q3 <=34) + if (C3.w[1] < midpoint128[q3 - 20].w[1] || + (C3.w[1] == midpoint128[q3 - 20].w[1] && + C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp + // res = 10^33, unchanged + is_inexact_gt_midpoint = 1; + } else if (C3.w[1] == midpoint128[q3 - 20].w[1] && + C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp + // res = 10^33, unchanged + is_midpoint_lt_even = 1; + } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp + res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 + res.w[0] = 0x378d8e63ffffffffull; + e4 = e4 - 1; + is_inexact_lt_midpoint = 1; + } + } + } + } + } + } else if (is_midpoint_lt_even) { + if (z_sign != p_sign) { + // needs correction: res = res - 1 + res.w[0] = res.w[0] - 1; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + // if it is (10^33-1)*10^e4 then the corect result is + // (10^34-1)*10(e4-1) + if (res.w[1] == 0x0000314dc6448d93ull && + res.w[0] == 0x38c15b09ffffffffull) { + res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 + res.w[0] = 0x378d8e63ffffffffull; + e4 = e4 - 1; + } + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + } else { // if (z_sign == p_sign) + is_midpoint_lt_even = 0; + is_inexact_gt_midpoint = 1; + } + } else if (is_midpoint_gt_even) { + if (z_sign == p_sign) { + // needs correction: res = res + 1 (cannot cross in the next binade) + res.w[0] = res.w[0] + 1; + if (res.w[0] == 0x0000000000000000ull) + res.w[1]++; + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else { // if (z_sign != p_sign) + is_midpoint_gt_even = 0; + is_inexact_lt_midpoint = 1; + } + } else { + ; // the rounded result is already correct + } + // check for overflow + if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) { + res.w[1] = p_sign | 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION); + } else { // no overflow or not RN + p_exp = ((UINT64) (e4 + 6176) << 49); + res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; + } + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); + } + if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + + } else if ((q4 <= p34 && p34 <= delta) || // Case (8) + (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9) + (q4 <= delta && delta + q3 <= p34) || // Case (10) + (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13) + (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14) + (delta + q3 < q4 && q4 <= p34)) { // Case (18) + + // Case (8) is similar to Case (1), with C3 and C4 swapped + // Case (9) is similar to Case (2), with C3 and C4 swapped + // Case (10) is similar to Case (3), with C3 and C4 swapped + // Case (13) is similar to Case (4), with C3 and C4 swapped + // Case (14) is similar to Case (5), with C3 and C4 swapped + // Case (18) is similar to Case (6), with C3 and C4 swapped + + // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp) + // and go back to delta_ge_zero + // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 + P128.w[1] = C3.w[1]; + P128.w[0] = C3.w[0]; + C3.w[1] = C4.w[1]; + C3.w[0] = C4.w[0]; + C4.w[1] = P128.w[1]; + C4.w[0] = P128.w[0]; + ind = q3; + q3 = q4; + q4 = ind; + ind = e3; + e3 = e4; + e4 = ind; + tmp_sign = z_sign; + z_sign = p_sign; + p_sign = tmp_sign; + tmp.ui64 = z_exp; + z_exp = p_exp; + p_exp = tmp.ui64; + goto delta_ge_zero; + + } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11) + (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12) + + // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3, + // 1 <= x0 <= q3 - 1 <= p34 - 1 + x0 = e4 - e3; // or x0 = delta + q3 - q4 + if (q3 <= 18) { // 2 <= q3 <= 18 + round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + // C3.w[1] = 0; + C3.w[0] = R64; + } else if (q3 <= 38) { + round128_19_38 (q3, x0, C3, &R128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + C3.w[1] = R128.w[1]; + C3.w[0] = R128.w[0]; + } + // the rounded result has q3 - x0 digits + // we want the exponent to be e4, so if incr_exp = 1 then + // multiply the rounded result by 10 - it will still fit in 113 bits + if (incr_exp) { + // 64 x 128 -> 128 + P128.w[1] = C3.w[1]; + P128.w[0] = C3.w[0]; + __mul_64x128_to_128 (C3, ten2k64[1], P128); + } + e3 = e3 + x0; // this is e4 + // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; + // the result will have the sign of x * y; the exponent is e4 + R256.w[3] = 0; + R256.w[2] = 0; + R256.w[1] = C3.w[1]; + R256.w[0] = C3.w[0]; + if (p_sign == z_sign) { // R256 = C4 + R256 + add256 (C4, R256, &R256); + } else { // if (p_sign != z_sign) { // R256 = C4 - R256 + sub256 (C4, R256, &R256); // the result cannot be pure zero + // because the result has opposite sign to that of R256 which was + // rounded, need to change the rounding indicators + lsb = C4.w[0] & 0x01; + if (is_inexact_lt_midpoint) { + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + } else if (is_inexact_gt_midpoint) { + is_inexact_gt_midpoint = 0; + is_inexact_lt_midpoint = 1; + } else if (lsb == 0) { + if (is_midpoint_lt_even) { + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 1; + } else if (is_midpoint_gt_even) { + is_midpoint_gt_even = 0; + is_midpoint_lt_even = 1; + } else { + ; + } + } else if (lsb == 1) { + if (is_midpoint_lt_even) { + // res = res + 1 + R256.w[0]++; + if (R256.w[0] == 0x0) { + R256.w[1]++; + if (R256.w[1] == 0x0) { + R256.w[2]++; + if (R256.w[2] == 0x0) { + R256.w[3]++; + } + } + } + // no check for rounding overflow - R256 was a difference + } else if (is_midpoint_gt_even) { + // res = res - 1 + R256.w[0]--; + if (R256.w[0] == 0xffffffffffffffffull) { + R256.w[1]--; + if (R256.w[1] == 0xffffffffffffffffull) { + R256.w[2]--; + if (R256.w[2] == 0xffffffffffffffffull) { + R256.w[3]--; + } + } + } + } else { + ; + } + } else { + ; + } + } + // determine the number of decimal digits in R256 + ind = nr_digits256 (R256); // ind >= p34 + // if R256 is sum, then ind > p34; if R256 is a difference, then + // ind >= p34; this means that we can calculate the result rounded to + // the destination precision, with unbounded exponent, starting from R256 + // and using the indicators from the rounding of C3 to avoid a double + // rounding error + + if (ind < p34) { + ; + } else if (ind == p34) { + // the result rounded to the destination precision with + // unbounded exponent + // is (-1)^p_sign * R256 * 10^e4 + res.w[1] = R256.w[1]; + res.w[0] = R256.w[0]; + } else { // if (ind > p34) + // if more than P digits, round to nearest to P digits + // round R256 to p34 digits + x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 + // save C3 rounding indicators to help avoid double rounding error + is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; + is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; + is_midpoint_lt_even0 = is_midpoint_lt_even; + is_midpoint_gt_even0 = is_midpoint_gt_even; + // initialize rounding indicators + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + // round to p34 digits; the result fits in 113 bits + if (ind <= 38) { + P128.w[1] = R256.w[1]; + P128.w[0] = R256.w[0]; + round128_19_38 (ind, x0, P128, &R128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + } else if (ind <= 57) { + P192.w[2] = R256.w[2]; + P192.w[1] = R256.w[1]; + P192.w[0] = R256.w[0]; + round192_39_57 (ind, x0, P192, &R192, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R128.w[1] = R192.w[1]; + R128.w[0] = R192.w[0]; + } else { // if (ind <= 68) + round256_58_76 (ind, x0, R256, &R256, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + R128.w[1] = R256.w[1]; + R128.w[0] = R256.w[0]; + } + // the rounded result has p34 = 34 digits + e4 = e4 + x0 + incr_exp; + + res.w[1] = R128.w[1]; + res.w[0] = R128.w[0]; + + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res.w[0]--; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + // Note: a double rounding error upward is not possible; for this + // the result after the first rounding would have to be 99...95 + // (35 digits in all), possibly followed by a number of zeros; this + // not possible in Cases (2)-(6) or (15)-(17) which may get here + // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent + if (res.w[1] == 0x0000314dc6448d93ull && + res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 + res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 + res.w[0] = 0x378d8e63ffffffffull; + e4--; + } + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res.w[0]++; + if (res.w[0] == 0) + res.w[1]++; + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // if this second rounding was exact the result may still be + // inexact because of the first rounding + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + } + + // determine tininess + if (rnd_mode == ROUNDING_TO_NEAREST) { + if (e4 < expmin) { + is_tiny = 1; // for other rounding modes apply correction + } + } else { + // for RM, RP, RZ, RA apply correction in order to determine tininess + // but do not save the result; apply the correction to + // (-1)^p_sign * res * 10^0 + P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1]; + P128.w[0] = res.w[0]; + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + 0, &P128, pfpsf); + scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 + // the number of digits in the significand is p34 = 34 + if (e4 + scale < expmin) { + is_tiny = 1; + } + } + + // the result rounded to the destination precision with unbounded exponent + // is (-1)^p_sign * res * 10^e4 + res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN + // res.w[0] unchanged; + // Note: res is correct only if expmin <= e4 <= expmax + ind = p34; // the number of decimal digits in the signifcand of res + + // at this point we have the result rounded with unbounded exponent in + // res and we know its tininess: + // res = (-1)^p_sign * significand * 10^e4, + // where q (significand) = ind = p34 + // Note: res is correct only if expmin <= e4 <= expmax + + // check for overflow if RN + if (rnd_mode == ROUNDING_TO_NEAREST + && (ind + e4) > (p34 + expmax)) { + res.w[1] = p_sign | 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + } // else not overflow or not RN, so continue + + // from this point on this is similar to the last part of the computation + // for Cases (15), (16), (17) + + // if (e4 >= expmin) we have the result rounded with bounded exponent + if (e4 < expmin) { + x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res + // where the result rounded [at most] once is + // (-1)^p_sign * significand_res * 10^e4 + + // avoid double rounding error + is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; + is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; + is_midpoint_lt_even0 = is_midpoint_lt_even; + is_midpoint_gt_even0 = is_midpoint_gt_even; + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + + if (x0 > ind) { + // nothing is left of res when moving the decimal point left x0 digits + is_inexact_lt_midpoint = 1; + res.w[1] = p_sign | 0x0000000000000000ull; + res.w[0] = 0x0000000000000000ull; + e4 = expmin; + } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 + // this is <, =, or > 1/2 ulp + // compare the ind-digit value in the significand of res with + // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is + // less than, equal to, or greater than 1/2 ulp (significand of res) + R128.w[1] = res.w[1] & MASK_COEFF; + R128.w[0] = res.w[0]; + if (ind <= 19) { + if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp + lt_half_ulp = 1; + is_inexact_lt_midpoint = 1; + } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp + eq_half_ulp = 1; + is_midpoint_gt_even = 1; + } else { // > 1/2 ulp + gt_half_ulp = 1; + is_inexact_gt_midpoint = 1; + } + } else { // if (ind <= 38) + if (R128.w[1] < midpoint128[ind - 20].w[1] || + (R128.w[1] == midpoint128[ind - 20].w[1] && + R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp + lt_half_ulp = 1; + is_inexact_lt_midpoint = 1; + } else if (R128.w[1] == midpoint128[ind - 20].w[1] && + R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp + eq_half_ulp = 1; + is_midpoint_gt_even = 1; + } else { // > 1/2 ulp + gt_half_ulp = 1; + is_inexact_gt_midpoint = 1; + } + } + if (lt_half_ulp || eq_half_ulp) { + // res = +0.0 * 10^expmin + res.w[1] = 0x0000000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // if (gt_half_ulp) + // res = +1 * 10^expmin + res.w[1] = 0x0000000000000000ull; + res.w[0] = 0x0000000000000001ull; + } + res.w[1] = p_sign | res.w[1]; + e4 = expmin; + } else { // if (1 <= x0 <= ind - 1 <= 33) + // round the ind-digit result to ind - x0 digits + + if (ind <= 18) { // 2 <= ind <= 18 + round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + res.w[1] = 0x0; + res.w[0] = R64; + } else if (ind <= 38) { + P128.w[1] = res.w[1] & MASK_COEFF; + P128.w[0] = res.w[0]; + round128_19_38 (ind, x0, P128, &res, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint); + } + e4 = e4 + x0; // expmin + // we want the exponent to be expmin, so if incr_exp = 1 then + // multiply the rounded result by 10 - it will still fit in 113 bits + if (incr_exp) { + // 64 x 128 -> 128 + P128.w[1] = res.w[1] & MASK_COEFF; + P128.w[0] = res.w[0]; + __mul_64x128_to_128 (res, ten2k64[1], P128); + } + res.w[1] = + p_sign | ((UINT64) (e4 + 6176) << 49) | (res. + w[1] & MASK_COEFF); + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res.w[0]--; + if (res.w[0] == 0xffffffffffffffffull) + res.w[1]--; + // Note: a double rounding error upward is not possible; for this + // the result after the first rounding would have to be 99...95 + // (35 digits in all), possibly followed by a number of zeros; this + // not possible in this underflow case + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res.w[0]++; + if (res.w[0] == 0) + res.w[1]++; + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint + && !is_inexact_gt_midpoint) { + // if this second rounding was exact the result may still be + // inexact because of the first rounding + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 + || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 + || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + } + } + // res contains the correct result + // apply correction if not rounding to nearest + if (rnd_mode != ROUNDING_TO_NEAREST) { + rounding_correction (rnd_mode, + is_inexact_lt_midpoint, + is_inexact_gt_midpoint, + is_midpoint_lt_even, is_midpoint_gt_even, + e4, &res, pfpsf); + } + if (is_midpoint_lt_even || is_midpoint_gt_even || + is_inexact_lt_midpoint || is_inexact_gt_midpoint) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + if (is_tiny) + *pfpsf |= UNDERFLOW_EXCEPTION; + } + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + + } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15) + (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16) + (delta + q3 <= p34 && p34 < q4)) { // Case (17) + + // calculate first the result rounded to the destination precision, with + // unbounded exponent + + add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, + rnd_mode, &is_midpoint_lt_even, + &is_midpoint_gt_even, &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, pfpsf, &res); + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + + } else { + ; + } + + } // end if delta < 0 + + *ptr_is_midpoint_lt_even = is_midpoint_lt_even; + *ptr_is_midpoint_gt_even = is_midpoint_gt_even; + *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; + *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; + BID_SWAP128 (res); + BID_RETURN (res) + +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT128 x = *px, y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128_fma (UINT128 x, UINT128 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even, is_midpoint_gt_even, + is_inexact_lt_midpoint, is_inexact_gt_midpoint; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + +#if DECIMAL_CALL_BY_REFERENCE + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x, &y, &z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x, y, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 x1, y1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x1, &y1, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x1, y1, + z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, y = *py; + UINT128 z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 x1, y1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x1, &y1, &z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x1, y1, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 x1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x1, py, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x1, y, + z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 x1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, &x1, py, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x1, y, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 y1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, px, &y1, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x, y1, + z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 y1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, px, &y1, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x, y1, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, + &res, px, py, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, + &is_inexact_gt_midpoint, x, y, + z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} + +// Note: bid128qqq_fma is represented by bid128_fma + +// Note: bid64ddd_fma is represented by bid64_fma + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 x1, y1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, &x1, &y1, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x1, y1, z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 x1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, &x1, py, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x1, y, z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 x1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, &x1, py, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x1, y, z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 y1, z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, px, &y1, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x, y1, z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 y1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, px, &y1, pz + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x, y1, z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res1 = 0xbaddbaddbaddbaddull; + UINT128 z1; + +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qqq_fma (&res1, px, py, &z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res1 = bid64qqq_fma (x, y, z1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res1); +} + + +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT128 x = *px, y = *py, z = *pz; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0, + is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; + int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, + is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; + int incr_exp; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; + UINT64 res1 = 0xbaddbaddbaddbaddull; + unsigned int save_fpsf; // needed because of the call to bid128_ext_fma + UINT64 sign; + UINT64 exp; + int unbexp; + UINT128 C; + BID_UI64DOUBLE tmp; + int nr_bits; + int q, x0; + int scale; + int lt_half_ulp = 0, eq_half_ulp = 0; + + // Note: for rounding modes other than RN or RA, the result can be obtained + // by rounding first to BID128 and then to BID64 + + save_fpsf = *pfpsf; // sticky bits - caller value must be preserved + *pfpsf = 0; + +#if DECIMAL_CALL_BY_REFERENCE + bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, + &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0, + &res, &x, &y, &z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, + &is_inexact_lt_midpoint0, + &is_inexact_gt_midpoint0, x, y, + z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + + if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) || + (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible + ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN) + ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity +#if DECIMAL_CALL_BY_REFERENCE + bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); +#else + res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); +#endif + // determine the unbiased exponent of the result + unbexp = ((res1 >> 53) & 0x3ff) - 398; + + // if subnormal, res1 must have exp = -398 + // if tiny and inexact set underflow and inexact status flags + if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN + (unbexp == -398) + && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull) + && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 + || is_midpoint_lt_even0 || is_midpoint_gt_even0)) { + // set the inexact flag and the underflow flag + *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); + } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || + is_midpoint_lt_even0 || is_midpoint_gt_even0) { + // set the inexact flag and the underflow flag + *pfpsf |= INEXACT_EXCEPTION; + } + + *pfpsf |= save_fpsf; + BID_RETURN (res1); + } // else continue, and use rounding to nearest to round to 16 digits + + // at this point the result is rounded to nearest (even or away) to 34 digits + // (or less if exact), and it is zero or finite non-zero canonical [sub]normal + sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits + unbexp = (exp >> 49) - 6176; + C.w[1] = res.w[HIGH_128W] & MASK_COEFF; + C.w[0] = res.w[LOW_128W]; + + if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero + (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) { + // clear under/overflow +#if DECIMAL_CALL_BY_REFERENCE + bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); +#else + res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); +#endif + *pfpsf |= save_fpsf; + BID_RETURN (res1); + } // else continue + + // -398 - 34 <= unbexp <= 369 + 15 + if (rnd_mode == ROUNDING_TIES_AWAY) { + // apply correction, if needed, to make the result rounded to nearest-even + if (is_midpoint_gt_even) { + // res = res - 1 + res1--; // res1 is now even + } // else the result is already correctly rounded to nearest-even + } + // at this point the result is finite, non-zero canonical normal or subnormal, + // and in most cases overflow or underflow will not occur + + // determine the number of digits q in the result + // q = nr. of decimal digits in x + // determine first the nr. of bits in x + if (C.w[1] == 0) { + if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp.d = (double) (C.w[0] >> 32); // exact conversion + nr_bits = + 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp.d = (double) (C.w[0]); // exact conversion + nr_bits = + 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp.d = (double) C.w[0]; // exact conversion + nr_bits = + 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1]) + tmp.d = (double) C.w[1]; // exact conversion + nr_bits = + 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + q = nr_digits[nr_bits - 1].digits; + if (q == 0) { + q = nr_digits[nr_bits - 1].digits1; + if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi || + (C.w[1] == nr_digits[nr_bits - 1].threshold_hi && + C.w[0] >= nr_digits[nr_bits - 1].threshold_lo)) + q++; + } + // if q > 16, round to nearest even to 16 digits (but for underflow it may + // have to be truncated even more) + if (q > 16) { + x0 = q - 16; + if (q <= 18) { + round64_2_18 (q, x0, C.w[0], &res1, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + } else { // 19 <= q <= 34 + round128_19_38 (q, x0, C, &res128, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + res1 = res128.w[0]; // the result fits in 64 bits + } + unbexp = unbexp + x0; + if (incr_exp) + unbexp++; + q = 16; // need to set in case denormalization is necessary + } else { + // the result does not require a second rounding (and it must have + // been exact in the first rounding, since q <= 16) + res1 = C.w[0]; + } + + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res1--; // res1 becomes odd + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1 + res1 = 0x002386f26fc0ffffull; // 10^16 - 1 + unbexp--; + } + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res1++; // res1 becomes odd (so it cannot be 10^16) + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // if this second rounding was exact the result may still be + // inexact because of the first rounding + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + // this is the result rounded correctly to nearest even, with unbounded exp. + + // check for overflow + if (q + unbexp > P16 + expmax16) { + res1 = sign | 0x7800000000000000ull; + *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); + *pfpsf |= save_fpsf; + BID_RETURN (res1) + } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16 + // not overflow; the result must be exact, and we can multiply res1 by + // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits + scale = unbexp - expmax16; + res1 = res1 * ten2k64[scale]; // res1 * 10^scale + unbexp = expmax16; // unbexp - scale + } else { + ; // continue + } + + // check for underflow + if (q + unbexp < P16 + expmin16) { + if (unbexp < expmin16) { + // we must truncate more of res + x0 = expmin16 - unbexp; // x0 >= 1 + is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; + is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; + is_midpoint_lt_even0 = is_midpoint_lt_even; + is_midpoint_gt_even0 = is_midpoint_gt_even; + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + // the number of decimal digits in res1 is q + if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits + // 2 <= q <= 16, 1 <= x0 <= 15 + round64_2_18 (q, x0, res1, &res1, &incr_exp, + &is_midpoint_lt_even, &is_midpoint_gt_even, + &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); + if (incr_exp) { + // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15 + res1 = ten2k64[q - x0]; + } + unbexp = unbexp + x0; // expmin16 + } else if (x0 == q) { + // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin + // determine relationship with 1/2 ulp + // q <= 16 + if (res1 < midpoint64[q - 1]) { // < 1/2 ulp + lt_half_ulp = 1; + is_inexact_lt_midpoint = 1; + } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp + eq_half_ulp = 1; + is_midpoint_gt_even = 1; + } else { // > 1/2 ulp + // gt_half_ulp = 1; + is_inexact_gt_midpoint = 1; + } + if (lt_half_ulp || eq_half_ulp) { + // res = +0.0 * 10^expmin16 + res1 = 0x0000000000000000ull; + } else { // if (gt_half_ulp) + // res = +1 * 10^expmin16 + res1 = 0x0000000000000001ull; + } + unbexp = expmin16; + } else { // if (x0 > q) + // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin + res1 = 0x0000000000000000ull; + unbexp = expmin16; + is_inexact_lt_midpoint = 1; + } + // avoid a double rounding error + if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && + is_midpoint_lt_even) { // double rounding error upward + // res = res - 1 + res1--; // res1 becomes odd + is_midpoint_lt_even = 0; + is_inexact_lt_midpoint = 1; + } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && + is_midpoint_gt_even) { // double rounding error downward + // res = res + 1 + res1++; // res1 becomes odd + is_midpoint_gt_even = 0; + is_inexact_gt_midpoint = 1; + } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && + !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { + // if this rounding was exact the result may still be + // inexact because of the previous roundings + if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { + is_inexact_gt_midpoint = 1; + } + if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { + is_inexact_lt_midpoint = 1; + } + } else if (is_midpoint_gt_even && + (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { + // pulled up to a midpoint + is_inexact_lt_midpoint = 1; + is_inexact_gt_midpoint = 0; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else if (is_midpoint_lt_even && + (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { + // pulled down to a midpoint + is_inexact_lt_midpoint = 0; + is_inexact_gt_midpoint = 1; + is_midpoint_lt_even = 0; + is_midpoint_gt_even = 0; + } else { + ; + } + } + // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16) + // and the result is tiny and exact + + // check for inexact result + if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even || + is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || + is_midpoint_lt_even0 || is_midpoint_gt_even0) { + // set the inexact flag and the underflow flag + *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); + } + } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || + is_midpoint_lt_even || is_midpoint_gt_even) { + *pfpsf |= INEXACT_EXCEPTION; + } + // this is the result rounded correctly to nearest, with bounded exponent + + if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction + // res = res + 1 + res1++; // res1 is now odd + } // else the result is already correct + + // assemble the result + if (res1 < 0x0020000000000000ull) { // res < 2^53 + res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1; + } else { // res1 >= 2^53 + res1 = sign | MASK_STEERING_BITS | + ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2); + } + *pfpsf |= save_fpsf; + BID_RETURN (res1); +} -- cgit v1.2.3