arch/sh/kernel/cpu/sh4/softfloat.c

   1 /*
   2  * Floating point emulation support for subnormalised numbers on SH4
   3  * architecture This file is derived from the SoftFloat IEC/IEEE
   4  * Floating-point Arithmetic Package, Release 2 the original license of
   5  * which is reproduced below.
   6  *
   7  * ========================================================================
   8  *
   9  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
  10  * Arithmetic Package, Release 2.
  11  *
  12  * Written by John R. Hauser.  This work was made possible in part by the
  13  * International Computer Science Institute, located at Suite 600, 1947 Center
  14  * Street, Berkeley, California 94704.  Funding was partially provided by the
  15  * National Science Foundation under grant MIP-9311980.  The original version
  16  * of this code was written as part of a project to build a fixed-point vector
  17  * processor in collaboration with the University of California at Berkeley,
  18  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  19  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  20  * arithmetic/softfloat.html'.
  21  *
  22  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  23  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  24  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  25  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  26  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  27  *
  28  * Derivative works are acceptable, even for commercial purposes, so long as
  29  * (1) they include prominent notice that the work is derivative, and (2) they
  30  * include prominent notice akin to these three paragraphs for those parts of
  31  * this code that are retained.
  32  *
  33  * ========================================================================
  34  *
  35  * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
  36  * and Kamel Khelifi <kamel.khelifi@st.com>
  37  */
  38 #include <linux/kernel.h>
  39 #include <cpu/fpu.h>
  40
  41 #define LIT64( a ) a##LL
  42
  43 typedef char flag;
  44 typedef unsigned char uint8;
  45 typedef signed char int8;
  46 typedef int uint16;
  47 typedef int int16;
  48 typedef unsigned int uint32;
  49 typedef signed int int32;
  50
  51 typedef unsigned long long int bits64;
  52 typedef signed long long int sbits64;
  53
  54 typedef unsigned char bits8;
  55 typedef signed char sbits8;
  56 typedef unsigned short int bits16;
  57 typedef signed short int sbits16;
  58 typedef unsigned int bits32;
  59 typedef signed int sbits32;
  60
  61 typedef unsigned long long int uint64;
  62 typedef signed long long int int64;
  63
  64 typedef unsigned long int float32;
  65 typedef unsigned long long float64;
  66
  67 extern void float_raise(unsigned int flags);    /* in fpu.c */
  68 extern int float_rounding_mode(void);   /* in fpu.c */
  69
  70 inline bits64 extractFloat64Frac(float64 a);
  71 inline flag extractFloat64Sign(float64 a);
  72 inline int16 extractFloat64Exp(float64 a);
  73 inline int16 extractFloat32Exp(float32 a);
  74 inline flag extractFloat32Sign(float32 a);
  75 inline bits32 extractFloat32Frac(float32 a);
  76 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
  77 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
  78 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
  79 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
  80 float64 float64_sub(float64 a, float64 b);
  81 float32 float32_sub(float32 a, float32 b);
  82 float32 float32_add(float32 a, float32 b);
  83 float64 float64_add(float64 a, float64 b);
  84 float64 float64_div(float64 a, float64 b);
  85 float32 float32_div(float32 a, float32 b);
  86 float32 float32_mul(float32 a, float32 b);
  87 float64 float64_mul(float64 a, float64 b);
  88 float32 float64_to_float32(float64 a);
  89 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  90                    bits64 * z1Ptr);
  91 inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  92                    bits64 * z1Ptr);
  93 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
  94
  95 static int8 countLeadingZeros32(bits32 a);
  96 static int8 countLeadingZeros64(bits64 a);
  97 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
  98                                             bits64 zSig);
  99 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
 100 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
 101 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
 102 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
 103                                             bits32 zSig);
 104 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
 105 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
 106 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
 107 static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
 108                                       bits64 * zSigPtr);
 109 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
 110 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
 111                                       bits32 * zSigPtr);
 112
 113 inline bits64 extractFloat64Frac(float64 a)
 114 {
 115         return a & LIT64(0x000FFFFFFFFFFFFF);
 116 }
 117
 118 inline flag extractFloat64Sign(float64 a)
 119 {
 120         return a >> 63;
 121 }
 122
 123 inline int16 extractFloat64Exp(float64 a)
 124 {
 125         return (a >> 52) & 0x7FF;
 126 }
 127
 128 inline int16 extractFloat32Exp(float32 a)
 129 {
 130         return (a >> 23) & 0xFF;
 131 }
 132
 133 inline flag extractFloat32Sign(float32 a)
 134 {
 135         return a >> 31;
 136 }
 137
 138 inline bits32 extractFloat32Frac(float32 a)
 139 {
 140         return a & 0x007FFFFF;
 141 }
 142
 143 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
 144 {
 145         return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
 146 }
 147
 148 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
 149 {
 150         bits64 z;
 151
 152         if (count == 0) {
 153                 z = a;
 154         } else if (count < 64) {
 155                 z = (a >> count) | ((a << ((-count) & 63)) != 0);
 156         } else {
 157                 z = (a != 0);
 158         }
 159         *zPtr = z;
 160 }
 161
 162 static int8 countLeadingZeros32(bits32 a)
 163 {
 164         static const int8 countLeadingZerosHigh[] = {
 165                 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 166                 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 167                 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 168                 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 169                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 170                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 171                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 172                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 173                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 174                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 175                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 176                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 177                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 178                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 179                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 180                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 181         };
 182         int8 shiftCount;
 183
 184         shiftCount = 0;
 185         if (a < 0x10000) {
 186                 shiftCount += 16;
 187                 a <<= 16;
 188         }
 189         if (a < 0x1000000) {
 190                 shiftCount += 8;
 191                 a <<= 8;
 192         }
 193         shiftCount += countLeadingZerosHigh[a >> 24];
 194         return shiftCount;
 195
 196 }
 197
 198 static int8 countLeadingZeros64(bits64 a)
 199 {
 200         int8 shiftCount;
 201
 202         shiftCount = 0;
 203         if (a < ((bits64) 1) << 32) {
 204                 shiftCount += 32;
 205         } else {
 206                 a >>= 32;
 207         }
 208         shiftCount += countLeadingZeros32(a);
 209         return shiftCount;
 210
 211 }
 212
 213 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
 214 {
 215         int8 shiftCount;
 216
 217         shiftCount = countLeadingZeros64(zSig) - 1;
 218         return roundAndPackFloat64(zSign, zExp - shiftCount,
 219                                    zSig << shiftCount);
 220
 221 }
 222
 223 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
 224 {
 225         int16 aExp, bExp, zExp;
 226         bits64 aSig, bSig, zSig;
 227         int16 expDiff;
 228
 229         aSig = extractFloat64Frac(a);
 230         aExp = extractFloat64Exp(a);
 231         bSig = extractFloat64Frac(b);
 232         bExp = extractFloat64Exp(b);
 233         expDiff = aExp - bExp;
 234         aSig <<= 10;
 235         bSig <<= 10;
 236         if (0 < expDiff)
 237                 goto aExpBigger;
 238         if (expDiff < 0)
 239                 goto bExpBigger;
 240         if (aExp == 0) {
 241                 aExp = 1;
 242                 bExp = 1;
 243         }
 244         if (bSig < aSig)
 245                 goto aBigger;
 246         if (aSig < bSig)
 247                 goto bBigger;
 248         return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
 249       bExpBigger:
 250         if (bExp == 0x7FF) {
 251                 return packFloat64(zSign ^ 1, 0x7FF, 0);
 252         }
 253         if (aExp == 0) {
 254                 ++expDiff;
 255         } else {
 256                 aSig |= LIT64(0x4000000000000000);
 257         }
 258         shift64RightJamming(aSig, -expDiff, &aSig);
 259         bSig |= LIT64(0x4000000000000000);
 260       bBigger:
 261         zSig = bSig - aSig;
 262         zExp = bExp;
 263         zSign ^= 1;
 264         goto normalizeRoundAndPack;
 265       aExpBigger:
 266         if (aExp == 0x7FF) {
 267                 return a;
 268         }
 269         if (bExp == 0) {
 270                 --expDiff;
 271         } else {
 272                 bSig |= LIT64(0x4000000000000000);
 273         }
 274         shift64RightJamming(bSig, expDiff, &bSig);
 275         aSig |= LIT64(0x4000000000000000);
 276       aBigger:
 277         zSig = aSig - bSig;
 278         zExp = aExp;
 279       normalizeRoundAndPack:
 280         --zExp;
 281         return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
 282
 283 }
 284 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
 285 {
 286         int16 aExp, bExp, zExp;
 287         bits64 aSig, bSig, zSig;
 288         int16 expDiff;
 289
 290         aSig = extractFloat64Frac(a);
 291         aExp = extractFloat64Exp(a);
 292         bSig = extractFloat64Frac(b);
 293         bExp = extractFloat64Exp(b);
 294         expDiff = aExp - bExp;
 295         aSig <<= 9;
 296         bSig <<= 9;
 297         if (0 < expDiff) {
 298                 if (aExp == 0x7FF) {
 299                         return a;
 300                 }
 301                 if (bExp == 0) {
 302                         --expDiff;
 303                 } else {
 304                         bSig |= LIT64(0x2000000000000000);
 305                 }
 306                 shift64RightJamming(bSig, expDiff, &bSig);
 307                 zExp = aExp;
 308         } else if (expDiff < 0) {
 309                 if (bExp == 0x7FF) {
 310                         return packFloat64(zSign, 0x7FF, 0);
 311                 }
 312                 if (aExp == 0) {
 313                         ++expDiff;
 314                 } else {
 315                         aSig |= LIT64(0x2000000000000000);
 316                 }
 317                 shift64RightJamming(aSig, -expDiff, &aSig);
 318                 zExp = bExp;
 319         } else {
 320                 if (aExp == 0x7FF) {
 321                         return a;
 322                 }
 323                 if (aExp == 0)
 324                         return packFloat64(zSign, 0, (aSig + bSig) >> 9);
 325                 zSig = LIT64(0x4000000000000000) + aSig + bSig;
 326                 zExp = aExp;
 327                 goto roundAndPack;
 328         }
 329         aSig |= LIT64(0x2000000000000000);
 330         zSig = (aSig + bSig) << 1;
 331         --zExp;
 332         if ((sbits64) zSig < 0) {
 333                 zSig = aSig + bSig;
 334                 ++zExp;
 335         }
 336       roundAndPack:
 337         return roundAndPackFloat64(zSign, zExp, zSig);
 338
 339 }
 340
 341 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
 342 {
 343         return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
 344 }
 345
 346 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
 347 {
 348         bits32 z;
 349         if (count == 0) {
 350                 z = a;
 351         } else if (count < 32) {
 352                 z = (a >> count) | ((a << ((-count) & 31)) != 0);
 353         } else {
 354                 z = (a != 0);
 355         }
 356         *zPtr = z;
 357 }
 358
 359 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
 360 {
 361         flag roundNearestEven;
 362         int8 roundIncrement, roundBits;
 363         flag isTiny;
 364
 365         /* SH4 has only 2 rounding modes - round to nearest and round to zero */
 366         roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
 367         roundIncrement = 0x40;
 368         if (!roundNearestEven) {
 369                 roundIncrement = 0;
 370         }
 371         roundBits = zSig & 0x7F;
 372         if (0xFD <= (bits16) zExp) {
 373                 if ((0xFD < zExp)
 374                     || ((zExp == 0xFD)
 375                         && ((sbits32) (zSig + roundIncrement) < 0))
 376                     ) {
 377                         float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
 378                         return packFloat32(zSign, 0xFF,
 379                                            0) - (roundIncrement == 0);
 380                 }
 381                 if (zExp < 0) {
 382                         isTiny = (zExp < -1)
 383                             || (zSig + roundIncrement < 0x80000000);
 384                         shift32RightJamming(zSig, -zExp, &zSig);
 385                         zExp = 0;
 386                         roundBits = zSig & 0x7F;
 387                         if (isTiny && roundBits)
 388                                 float_raise(FPSCR_CAUSE_UNDERFLOW);
 389                 }
 390         }
 391         if (roundBits)
 392                 float_raise(FPSCR_CAUSE_INEXACT);
 393         zSig = (zSig + roundIncrement) >> 7;
 394         zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
 395         if (zSig == 0)
 396                 zExp = 0;
 397         return packFloat32(zSign, zExp, zSig);
 398
 399 }
 400
 401 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
 402 {
 403         int8 shiftCount;
 404
 405         shiftCount = countLeadingZeros32(zSig) - 1;
 406         return roundAndPackFloat32(zSign, zExp - shiftCount,
 407                                    zSig << shiftCount);
 408 }
 409
 410 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
 411 {
 412         flag roundNearestEven;
 413         int16 roundIncrement, roundBits;
 414         flag isTiny;
 415
 416         /* SH4 has only 2 rounding modes - round to nearest and round to zero */
 417         roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
 418         roundIncrement = 0x200;
 419         if (!roundNearestEven) {
 420                 roundIncrement = 0;
 421         }
 422         roundBits = zSig & 0x3FF;
 423         if (0x7FD <= (bits16) zExp) {
 424                 if ((0x7FD < zExp)
 425                     || ((zExp == 0x7FD)
 426                         && ((sbits64) (zSig + roundIncrement) < 0))
 427                     ) {
 428                         float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
 429                         return packFloat64(zSign, 0x7FF,
 430                                            0) - (roundIncrement == 0);
 431                 }
 432                 if (zExp < 0) {
 433                         isTiny = (zExp < -1)
 434                             || (zSig + roundIncrement <
 435                                 LIT64(0x8000000000000000));
 436                         shift64RightJamming(zSig, -zExp, &zSig);
 437                         zExp = 0;
 438                         roundBits = zSig & 0x3FF;
 439                         if (isTiny && roundBits)
 440                                 float_raise(FPSCR_CAUSE_UNDERFLOW);
 441                 }
 442         }
 443         if (roundBits)
 444                 float_raise(FPSCR_CAUSE_INEXACT);
 445         zSig = (zSig + roundIncrement) >> 10;
 446         zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
 447         if (zSig == 0)
 448                 zExp = 0;
 449         return packFloat64(zSign, zExp, zSig);
 450
 451 }
 452
 453 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
 454 {
 455         int16 aExp, bExp, zExp;
 456         bits32 aSig, bSig, zSig;
 457         int16 expDiff;
 458
 459         aSig = extractFloat32Frac(a);
 460         aExp = extractFloat32Exp(a);
 461         bSig = extractFloat32Frac(b);
 462         bExp = extractFloat32Exp(b);
 463         expDiff = aExp - bExp;
 464         aSig <<= 7;
 465         bSig <<= 7;
 466         if (0 < expDiff)
 467                 goto aExpBigger;
 468         if (expDiff < 0)
 469                 goto bExpBigger;
 470         if (aExp == 0) {
 471                 aExp = 1;
 472                 bExp = 1;
 473         }
 474         if (bSig < aSig)
 475                 goto aBigger;
 476         if (aSig < bSig)
 477                 goto bBigger;
 478         return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
 479       bExpBigger:
 480         if (bExp == 0xFF) {
 481                 return packFloat32(zSign ^ 1, 0xFF, 0);
 482         }
 483         if (aExp == 0) {
 484                 ++expDiff;
 485         } else {
 486                 aSig |= 0x40000000;
 487         }
 488         shift32RightJamming(aSig, -expDiff, &aSig);
 489         bSig |= 0x40000000;
 490       bBigger:
 491         zSig = bSig - aSig;
 492         zExp = bExp;
 493         zSign ^= 1;
 494         goto normalizeRoundAndPack;
 495       aExpBigger:
 496         if (aExp == 0xFF) {
 497                 return a;
 498         }
 499         if (bExp == 0) {
 500                 --expDiff;
 501         } else {
 502                 bSig |= 0x40000000;
 503         }
 504         shift32RightJamming(bSig, expDiff, &bSig);
 505         aSig |= 0x40000000;
 506       aBigger:
 507         zSig = aSig - bSig;
 508         zExp = aExp;
 509       normalizeRoundAndPack:
 510         --zExp;
 511         return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
 512
 513 }
 514
 515 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
 516 {
 517         int16 aExp, bExp, zExp;
 518         bits32 aSig, bSig, zSig;
 519         int16 expDiff;
 520
 521         aSig = extractFloat32Frac(a);
 522         aExp = extractFloat32Exp(a);
 523         bSig = extractFloat32Frac(b);
 524         bExp = extractFloat32Exp(b);
 525         expDiff = aExp - bExp;
 526         aSig <<= 6;
 527         bSig <<= 6;
 528         if (0 < expDiff) {
 529                 if (aExp == 0xFF) {
 530                         return a;
 531                 }
 532                 if (bExp == 0) {
 533                         --expDiff;
 534                 } else {
 535                         bSig |= 0x20000000;
 536                 }
 537                 shift32RightJamming(bSig, expDiff, &bSig);
 538                 zExp = aExp;
 539         } else if (expDiff < 0) {
 540                 if (bExp == 0xFF) {
 541                         return packFloat32(zSign, 0xFF, 0);
 542                 }
 543                 if (aExp == 0) {
 544                         ++expDiff;
 545                 } else {
 546                         aSig |= 0x20000000;
 547                 }
 548                 shift32RightJamming(aSig, -expDiff, &aSig);
 549                 zExp = bExp;
 550         } else {
 551                 if (aExp == 0xFF) {
 552                         return a;
 553                 }
 554                 if (aExp == 0)
 555                         return packFloat32(zSign, 0, (aSig + bSig) >> 6);
 556                 zSig = 0x40000000 + aSig + bSig;
 557                 zExp = aExp;
 558                 goto roundAndPack;
 559         }
 560         aSig |= 0x20000000;
 561         zSig = (aSig + bSig) << 1;
 562         --zExp;
 563         if ((sbits32) zSig < 0) {
 564                 zSig = aSig + bSig;
 565                 ++zExp;
 566         }
 567       roundAndPack:
 568         return roundAndPackFloat32(zSign, zExp, zSig);
 569
 570 }
 571
 572 float64 float64_sub(float64 a, float64 b)
 573 {
 574         flag aSign, bSign;
 575
 576         aSign = extractFloat64Sign(a);
 577         bSign = extractFloat64Sign(b);
 578         if (aSign == bSign) {
 579                 return subFloat64Sigs(a, b, aSign);
 580         } else {
 581                 return addFloat64Sigs(a, b, aSign);
 582         }
 583
 584 }
 585
 586 float32 float32_sub(float32 a, float32 b)
 587 {
 588         flag aSign, bSign;
 589
 590         aSign = extractFloat32Sign(a);
 591         bSign = extractFloat32Sign(b);
 592         if (aSign == bSign) {
 593                 return subFloat32Sigs(a, b, aSign);
 594         } else {
 595                 return addFloat32Sigs(a, b, aSign);
 596         }
 597
 598 }
 599
 600 float32 float32_add(float32 a, float32 b)
 601 {
 602         flag aSign, bSign;
 603
 604         aSign = extractFloat32Sign(a);
 605         bSign = extractFloat32Sign(b);
 606         if (aSign == bSign) {
 607                 return addFloat32Sigs(a, b, aSign);
 608         } else {
 609                 return subFloat32Sigs(a, b, aSign);
 610         }
 611
 612 }
 613
 614 float64 float64_add(float64 a, float64 b)
 615 {
 616         flag aSign, bSign;
 617
 618         aSign = extractFloat64Sign(a);
 619         bSign = extractFloat64Sign(b);
 620         if (aSign == bSign) {
 621                 return addFloat64Sigs(a, b, aSign);
 622         } else {
 623                 return subFloat64Sigs(a, b, aSign);
 624         }
 625 }
 626
 627 static void
 628 normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
 629 {
 630         int8 shiftCount;
 631
 632         shiftCount = countLeadingZeros64(aSig) - 11;
 633         *zSigPtr = aSig << shiftCount;
 634         *zExpPtr = 1 - shiftCount;
 635 }
 636
 637 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
 638                    bits64 * z1Ptr)
 639 {
 640         bits64 z1;
 641
 642         z1 = a1 + b1;
 643         *z1Ptr = z1;
 644         *z0Ptr = a0 + b0 + (z1 < a1);
 645 }
 646
 647 inline void
 648 sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
 649        bits64 * z1Ptr)
 650 {
 651         *z1Ptr = a1 - b1;
 652         *z0Ptr = a0 - b0 - (a1 < b1);
 653 }
 654
 655 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
 656 {
 657         bits64 b0, b1;
 658         bits64 rem0, rem1, term0, term1;
 659         bits64 z;
 660         if (b <= a0)
 661                 return LIT64(0xFFFFFFFFFFFFFFFF);
 662         b0 = b >> 32;
 663         z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32;
 664         mul64To128(b, z, &term0, &term1);
 665         sub128(a0, a1, term0, term1, &rem0, &rem1);
 666         while (((sbits64) rem0) < 0) {
 667                 z -= LIT64(0x100000000);
 668                 b1 = b << 32;
 669                 add128(rem0, rem1, b0, b1, &rem0, &rem1);
 670         }
 671         rem0 = (rem0 << 32) | (rem1 >> 32);
 672         z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
 673         return z;
 674 }
 675
 676 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
 677 {
 678         bits32 aHigh, aLow, bHigh, bLow;
 679         bits64 z0, zMiddleA, zMiddleB, z1;
 680
 681         aLow = a;
 682         aHigh = a >> 32;
 683         bLow = b;
 684         bHigh = b >> 32;
 685         z1 = ((bits64) aLow) * bLow;
 686         zMiddleA = ((bits64) aLow) * bHigh;
 687         zMiddleB = ((bits64) aHigh) * bLow;
 688         z0 = ((bits64) aHigh) * bHigh;
 689         zMiddleA += zMiddleB;
 690         z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
 691         zMiddleA <<= 32;
 692         z1 += zMiddleA;
 693         z0 += (z1 < zMiddleA);
 694         *z1Ptr = z1;
 695         *z0Ptr = z0;
 696
 697 }
 698
 699 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
 700                                       bits32 * zSigPtr)
 701 {
 702         int8 shiftCount;
 703
 704         shiftCount = countLeadingZeros32(aSig) - 8;
 705         *zSigPtr = aSig << shiftCount;
 706         *zExpPtr = 1 - shiftCount;
 707
 708 }
 709
 710 float64 float64_div(float64 a, float64 b)
 711 {
 712         flag aSign, bSign, zSign;
 713         int16 aExp, bExp, zExp;
 714         bits64 aSig, bSig, zSig;
 715         bits64 rem0, rem1;
 716         bits64 term0, term1;
 717
 718         aSig = extractFloat64Frac(a);
 719         aExp = extractFloat64Exp(a);
 720         aSign = extractFloat64Sign(a);
 721         bSig = extractFloat64Frac(b);
 722         bExp = extractFloat64Exp(b);
 723         bSign = extractFloat64Sign(b);
 724         zSign = aSign ^ bSign;
 725         if (aExp == 0x7FF) {
 726                 if (bExp == 0x7FF) {
 727                 }
 728                 return packFloat64(zSign, 0x7FF, 0);
 729         }
 730         if (bExp == 0x7FF) {
 731                 return packFloat64(zSign, 0, 0);
 732         }
 733         if (bExp == 0) {
 734                 if (bSig == 0) {
 735                         if ((aExp | aSig) == 0) {
 736                                 float_raise(FPSCR_CAUSE_INVALID);
 737                         }
 738                         return packFloat64(zSign, 0x7FF, 0);
 739                 }
 740                 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
 741         }
 742         if (aExp == 0) {
 743                 if (aSig == 0)
 744                         return packFloat64(zSign, 0, 0);
 745                 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
 746         }
 747         zExp = aExp - bExp + 0x3FD;
 748         aSig = (aSig | LIT64(0x0010000000000000)) << 10;
 749         bSig = (bSig | LIT64(0x0010000000000000)) << 11;
 750         if (bSig <= (aSig + aSig)) {
 751                 aSig >>= 1;
 752                 ++zExp;
 753         }
 754         zSig = estimateDiv128To64(aSig, 0, bSig);
 755         if ((zSig & 0x1FF) <= 2) {
 756                 mul64To128(bSig, zSig, &term0, &term1);
 757                 sub128(aSig, 0, term0, term1, &rem0, &rem1);
 758                 while ((sbits64) rem0 < 0) {
 759                         --zSig;
 760                         add128(rem0, rem1, 0, bSig, &rem0, &rem1);
 761                 }
 762                 zSig |= (rem1 != 0);
 763         }
 764         return roundAndPackFloat64(zSign, zExp, zSig);
 765
 766 }
 767
 768 float32 float32_div(float32 a, float32 b)
 769 {
 770         flag aSign, bSign, zSign;
 771         int16 aExp, bExp, zExp;
 772         bits32 aSig, bSig, zSig;
 773
 774         aSig = extractFloat32Frac(a);
 775         aExp = extractFloat32Exp(a);
 776         aSign = extractFloat32Sign(a);
 777         bSig = extractFloat32Frac(b);
 778         bExp = extractFloat32Exp(b);
 779         bSign = extractFloat32Sign(b);
 780         zSign = aSign ^ bSign;
 781         if (aExp == 0xFF) {
 782                 if (bExp == 0xFF) {
 783                 }
 784                 return packFloat32(zSign, 0xFF, 0);
 785         }
 786         if (bExp == 0xFF) {
 787                 return packFloat32(zSign, 0, 0);
 788         }
 789         if (bExp == 0) {
 790                 if (bSig == 0) {
 791                         return packFloat32(zSign, 0xFF, 0);
 792                 }
 793                 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
 794         }
 795         if (aExp == 0) {
 796                 if (aSig == 0)
 797                         return packFloat32(zSign, 0, 0);
 798                 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
 799         }
 800         zExp = aExp - bExp + 0x7D;
 801         aSig = (aSig | 0x00800000) << 7;
 802         bSig = (bSig | 0x00800000) << 8;
 803         if (bSig <= (aSig + aSig)) {
 804                 aSig >>= 1;
 805                 ++zExp;
 806         }
 807         zSig = (((bits64) aSig) << 32) / bSig;
 808         if ((zSig & 0x3F) == 0) {
 809                 zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
 810         }
 811         return roundAndPackFloat32(zSign, zExp, zSig);
 812
 813 }
 814
 815 float32 float32_mul(float32 a, float32 b)
 816 {
 817         char aSign, bSign, zSign;
 818         int aExp, bExp, zExp;
 819         unsigned int aSig, bSig;
 820         unsigned long long zSig64;
 821         unsigned int zSig;
 822
 823         aSig = extractFloat32Frac(a);
 824         aExp = extractFloat32Exp(a);
 825         aSign = extractFloat32Sign(a);
 826         bSig = extractFloat32Frac(b);
 827         bExp = extractFloat32Exp(b);
 828         bSign = extractFloat32Sign(b);
 829         zSign = aSign ^ bSign;
 830         if (aExp == 0) {
 831                 if (aSig == 0)
 832                         return packFloat32(zSign, 0, 0);
 833                 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
 834         }
 835         if (bExp == 0) {
 836                 if (bSig == 0)
 837                         return packFloat32(zSign, 0, 0);
 838                 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
 839         }
 840         if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
 841                 return roundAndPackFloat32(zSign, 0xff, 0);
 842
 843         zExp = aExp + bExp - 0x7F;
 844         aSig = (aSig | 0x00800000) << 7;
 845         bSig = (bSig | 0x00800000) << 8;
 846         shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
 847         zSig = zSig64;
 848         if (0 <= (signed int)(zSig << 1)) {
 849                 zSig <<= 1;
 850                 --zExp;
 851         }
 852         return roundAndPackFloat32(zSign, zExp, zSig);
 853
 854 }
 855
 856 float64 float64_mul(float64 a, float64 b)
 857 {
 858         char aSign, bSign, zSign;
 859         int aExp, bExp, zExp;
 860         unsigned long long int aSig, bSig, zSig0, zSig1;
 861
 862         aSig = extractFloat64Frac(a);
 863         aExp = extractFloat64Exp(a);
 864         aSign = extractFloat64Sign(a);
 865         bSig = extractFloat64Frac(b);
 866         bExp = extractFloat64Exp(b);
 867         bSign = extractFloat64Sign(b);
 868         zSign = aSign ^ bSign;
 869
 870         if (aExp == 0) {
 871                 if (aSig == 0)
 872                         return packFloat64(zSign, 0, 0);
 873                 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
 874         }
 875         if (bExp == 0) {
 876                 if (bSig == 0)
 877                         return packFloat64(zSign, 0, 0);
 878                 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
 879         }
 880         if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
 881                 return roundAndPackFloat64(zSign, 0x7ff, 0);
 882
 883         zExp = aExp + bExp - 0x3FF;
 884         aSig = (aSig | 0x0010000000000000LL) << 10;
 885         bSig = (bSig | 0x0010000000000000LL) << 11;
 886         mul64To128(aSig, bSig, &zSig0, &zSig1);
 887         zSig0 |= (zSig1 != 0);
 888         if (0 <= (signed long long int)(zSig0 << 1)) {
 889                 zSig0 <<= 1;
 890                 --zExp;
 891         }
 892         return roundAndPackFloat64(zSign, zExp, zSig0);
 893 }
 894
 895 /*
 896  * -------------------------------------------------------------------------------
 897  *  Returns the result of converting the double-precision floating-point value
 898  *  `a' to the single-precision floating-point format.  The conversion is
 899  *  performed according to the IEC/IEEE Standard for Binary Floating-point
 900  *  Arithmetic.
 901  *  -------------------------------------------------------------------------------
 902  *  */
 903 float32 float64_to_float32(float64 a)
 904 {
 905     flag aSign;
 906     int16 aExp;
 907     bits64 aSig;
 908     bits32 zSig;
 909
 910     aSig = extractFloat64Frac( a );
 911     aExp = extractFloat64Exp( a );
 912     aSign = extractFloat64Sign( a );
 913
 914     shift64RightJamming( aSig, 22, &aSig );
 915     zSig = aSig;
 916     if ( aExp || zSig ) {
 917         zSig |= 0x40000000;
 918         aExp -= 0x381;
 919     }
 920     return roundAndPackFloat32(aSign, aExp, zSig);
 921 }