/* Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation. This program is distributed in the hope that it would be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Further, this software is distributed without any warranty that it is free of the rightful claim of any third person regarding infringement or the like. Any license provided herein, whether implied or otherwise, applies only to this software file. Patent licenses, if any, provided herein do not apply to combinations of this program with other software, or any other product whatsoever. You should have received a copy of the GNU General Public License along with this program; if not, write the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, Mountain View, CA 94043, or: http://www.sgi.com For further information regarding this notice, see: http://oss.sgi.com/projects/GenInfo/NoticeExplan */ /* ======================================================================= * ======================================================================= * * Module: c_q_div.c * $Revision: 1.1 $ * $Date: 2001/10/30 16:59:45 $ * $Author: ciancu $ * $Source: bitbucket.org:berkeleylab/upc-translator.git/open64/osprey1.0/common/util/c_q_div.c $ * * ======================================================================= * ======================================================================= */ #include "defs.h" #include "quad.h" /* note that this routine must be kept in sync with the corresponding libc * routine, q_div. */ typedef union { struct { UINT32 hi; UINT32 lo; } word; double d; } du; /* const1 is 1.0 + 2^(53 - 53/2), i.e. 1.0 + 2^27 */ static const du const1 = {0x41a00000, 0x02000000}; static const du twopm968 = {0x03700000, 0x00000000}; static const du twopm54 = {0x3c900000, 0x00000000}; static const du twop52 = {0x43300000, 0x00000000}; static const du inf = {0x7ff00000, 0x00000000}; extern QUAD c_q_div(QUAD, QUAD, INT *); #pragma weak c_q_div = __c_q_div #define c_q_div __c_q_div double fabs(double); #pragma intrinsic (fabs) #define EXPBIAS 0x3ff /* computes the quotient of two long doubles */ QUAD c_q_div(QUAD x, QUAD y, INT *p_err ) { double xhi, xlo, yhi, ylo; INT32 n; INT64 ixhi, iyhi; INT64 xptxhi, xptyhi; INT64 ix, iy, iz; double c, cc, w, ww; double quarterulp; double xfactor, yfactor; double p, hc, tc; double hyhi, tyhi; double z, zz; QUAD u; QUAD result; /* adapted from T. J. Dekker's div2 subroutine */ #ifdef QUAD_DEBUG printf("c_q_div: xhi = %08x%08x\n", *(INT32 *)&xhi, *((INT32 *)&xhi + 1)); printf("c_q_div: xlo = %08x%08x\n", *(INT32 *)&xlo, *((INT32 *)&xlo + 1)); printf("c_q_div: yhi = %08x%08x\n", *(INT32 *)&yhi, *((INT32 *)&yhi + 1)); printf("c_q_div: ylo = %08x%08x\n", *(INT32 *)&ylo, *((INT32 *)&ylo + 1)); #endif *p_err = 0; xhi = x.hi; xlo = x.lo; yhi = y.hi; ylo = y.lo; /* extract exponents of y and x for some quick screening */ DBL2LL(yhi, iyhi); xptyhi = (iyhi >> DMANTWIDTH); xptyhi &= 0x7ff; DBL2LL(xhi, ixhi); xptxhi = (ixhi >> DMANTWIDTH); xptxhi &= 0x7ff; /* Avoid underflows and overflows in forming the products x*const1.d, y*const1.d, x*y, and hx*hy by scaling if necessary. x and y are also be scaled if tx*ty is a denormal. */ if ( (0x30d < xptxhi) && (xptxhi < 0x4f1) && (0x30d < xptyhi) && (xptyhi < 0x4f1) ) { /* normal case */ c = xhi/yhi; /* u.ld = __qprod(c, yhi); */ p = c*const1.d; hc = (c - p) + p; tc = c - hc; p = yhi*const1.d; hyhi = (yhi - p) + p; tyhi = yhi - hyhi; u.hi = c*yhi; u.lo = (((hc*hyhi - u.hi) + hc*tyhi) + hyhi*tc) + tc*tyhi; cc = ((xhi - u.hi - u.lo + xlo) - c*ylo)/yhi; z = c + cc; zz = (c - z) + cc; #ifdef QUAD_DEBUG printf("c_q_div: z = %08x%08x\n", *(INT32 *)&z, *((INT32 *)&z + 1)); printf("c_q_div: zz = %08x%08x\n", *(INT32 *)&zz, *((INT32 *)&zz + 1)); #endif /* if necessary, round zz so that the sum of z and zz has at most 107 significant bits */ if ( fabs(z) >= twopm968.d ) { /* determine true exponent of z + zz as a 107 bit number */ DBL2LL(z, iz); iz >>= DMANTWIDTH; iz <<= DMANTWIDTH; LL2DBL(iz, w); /* w = dtwofloor(z) */ if ( (z == w) && ((z > 0.0 && zz < 0.0) || (z < 0.0 && zz > 0.0)) ) w *= 0.5; /* round zz if it's less than 1/4 ulp of w */ quarterulp = twopm54.d*fabs(w); if ( fabs(zz) < quarterulp ) { if ( zz >= 0.0 ) { zz = (quarterulp + zz) - quarterulp; } else { zz = quarterulp + (zz - quarterulp); } w = z + zz; ww = (z - w) + zz; z = w; zz = ww; } } result.hi = z; result.lo = zz; #ifdef QUAD_DEBUG printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); #endif return ( result ); } if ( (xptxhi < 0x7ff) && (xptyhi < 0x7ff) ) { if ( (xhi == 0.0) || (yhi == 0.0) ) { result.hi = xhi/yhi; result.lo = 0.0; #ifdef QUAD_DEBUG printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); #endif return ( result ); } xfactor = 1.0; yfactor = 1.0; /* Scale x and y appropriately and use previous algorithm. Then unscale result. */ if ( xptxhi <= 0x30d ) { xhi *= twop52.d; /* first, make sure x is normal */ xlo *= twop52.d; /* now scale x so that its exponent is 0x30e */ DBL2LL(xhi, ix); ix >>= DMANTWIDTH; n = (ix & 0x7ff); ix = (0x30e - n) + EXPBIAS; ix <<= DMANTWIDTH; LL2DBL(ix, c); xhi *= c; xlo *= c; ix = (n - 52 - 0x30e) + EXPBIAS; ix <<= DMANTWIDTH; LL2DBL(ix, xfactor); } if ( xptyhi <= 0x30d ) { yhi *= twop52.d; /* first, make sure y is normal */ ylo *= twop52.d; /* now scale y so that its exponent is 0x30e */ DBL2LL(yhi, iy); iy >>= DMANTWIDTH; n = (iy & 0x7ff); iy = (0x30e - n) + EXPBIAS; iy <<= DMANTWIDTH; LL2DBL(iy, c); yhi *= c; ylo *= c; iy = (0x30e - n + 52) + EXPBIAS; iy <<= DMANTWIDTH; LL2DBL(iy, yfactor); } if ( xptxhi >= 0x4f1 ) { /* scale x so that its exponent is 0x4f1 */ DBL2LL(xhi, ix); ix >>= DMANTWIDTH; n = (ix & 0x7ff); ix = (0x4f1 - n) + EXPBIAS; ix <<= DMANTWIDTH; LL2DBL(ix, c); xhi *= c; xlo *= c; ix = (n - 0x4f1) + EXPBIAS; ix <<= DMANTWIDTH; LL2DBL(ix, xfactor); } if ( xptyhi >= 0x4f1 ) { /* scale y so that its exponent is 0x4f1 */ DBL2LL(yhi, iy); iy >>= DMANTWIDTH; n = (iy & 0x7ff); iy = (0x4f1 - n) + EXPBIAS; iy <<= DMANTWIDTH; LL2DBL(iy, c); yhi *= c; ylo *= c; iy = (0x4f1 - n) + EXPBIAS; iy <<= DMANTWIDTH; LL2DBL(iy, yfactor); } c = xhi/yhi; /* u.ld = __qprod(c, yhi); */ p = c*const1.d; hc = (c - p) + p; tc = c - hc; p = yhi*const1.d; hyhi = (yhi - p) + p; tyhi = yhi - hyhi; u.hi = c*yhi; u.lo = (((hc*hyhi - u.hi) + hc*tyhi) + hyhi*tc) + tc*tyhi; cc = ((xhi - u.hi - u.lo + xlo) - c*ylo)/yhi; z = c + cc; zz = (c - z) + cc; #ifdef QUAD_DEBUG printf("c_q_div: z = %08x%08x\n", *(INT32 *)&z, *((INT32 *)&z + 1)); printf("c_q_div: zz = %08x%08x\n", *(INT32 *)&zz, *((INT32 *)&zz + 1)); #endif /* if necessary, round zz so that the sum of z and zz has at most 107 significant bits */ if ( fabs(z) >= twopm968.d ) { /* determine true exponent of z + zz as a 107 bit number */ DBL2LL(z, iz); iz >>= DMANTWIDTH; iz <<= DMANTWIDTH; LL2DBL(iz, w); /* w = dtwofloor(z) */ if ( (z == w) && ((z > 0.0 && zz < 0.0) || (z < 0.0 && zz > 0.0)) ) w *= 0.5; /* round zz if it's less than 1/4 ulp of w */ quarterulp = twopm54.d*fabs(w); if ( fabs(zz) < quarterulp ) { if ( zz >= 0.0 ) { zz = (quarterulp + zz) - quarterulp; } else { zz = quarterulp + (zz - quarterulp); } w = z + zz; ww = (z - w) + zz; z = w; zz = ww; } } if ( ((xfactor <= 1.0) && (1.0 <= yfactor)) || ((yfactor <= 1.0) && (1.0 <= xfactor)) ) { z = z*(xfactor*yfactor); } else { z *= xfactor; z *= yfactor; } if ( (z == 0.0) || (fabs(z) == inf.d) ) { result.hi = z; result.lo = 0.0; #ifdef QUAD_DEBUG printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); #endif return ( result ); } if ( ((xfactor <= 1.0) && (1.0 <= yfactor)) || ((yfactor <= 1.0) && (1.0 <= xfactor)) ) { zz = zz*(xfactor*yfactor); } else { zz *= xfactor; zz *= yfactor; } result.hi = z + zz; result.lo = (z - result.hi) + zz; #ifdef QUAD_DEBUG printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); #endif return ( result ); } result.hi = xhi/yhi; result.lo = 0.0; #ifdef QUAD_DEBUG printf("c_q_div: result.hi = %08x%08x\n", *(INT32 *)&result.hi, *((INT32 *)&result.hi + 1)); printf("c_q_div: result.lo = %08x%08x\n", *(INT32 *)&result.lo, *((INT32 *)&result.lo + 1)); #endif return ( result ); }