nsz Git - libfirm/blob - ir/tv/fltcalc.c

   1 /*
   2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    tarval floating point calculations
  23  * @date     2003
  24  * @author   Mathias Heil
  25  * @version  $Id$
  26  */
  27
  28 #ifdef HAVE_CONFIG_H
  29 # include "config.h"
  30 #endif
  31
  32 #include "fltcalc.h"
  33 #include "strcalc.h"
  34
  35 #include <math.h>    /* need isnan() and isinf() (will be changed)*/
  36 /* undef some reused constants defined by math.h */
  37 #ifdef NAN
  38 #  undef NAN
  39 #endif
  40
  41 #ifdef HAVE_INTTYPES_H
  42 # include <inttypes.h>
  43 #endif
  44 #ifdef HAVE_STRING_H
  45 # include <string.h>
  46 #endif
  47 #ifdef HAVE_STDLIB_H
  48 # include <stdlib.h>
  49 #endif
  50 #include <stdio.h>
  51 #include <assert.h>
  52
  53 #include "xmalloc.h"
  54
  55 typedef uint32_t UINT32;
  56
  57 #ifdef HAVE_LONG_DOUBLE
  58 #ifdef WORDS_BIGENDIAN
  59 typedef union {
  60   struct {
  61     UINT32 high;
  62     UINT32 mid;
  63     UINT32 low;
  64   } val;
  65   volatile long double d;
  66 } value_t;
  67 #else
  68 typedef union {
  69   struct {
  70     UINT32 low;
  71     UINT32 mid;
  72     UINT32 high;
  73   } val;
  74   volatile long double d;
  75 } value_t;
  76 #endif
  77 #else
  78 #ifdef WORDS_BIGENDIAN
  79 typedef union {
  80   struct {
  81     UINT32 high;
  82     UINT32 low;
  83   } val;
  84   volatile double d;
  85 } value_t;
  86 #else
  87 typedef union {
  88   struct {
  89     UINT32 low;
  90     UINT32 high;
  91   } val;
  92   volatile double d;
  93 } value_t;
  94 #endif
  95 #endif
  96
  97 /**
  98  * possible float states
  99  */
 100 typedef enum {
 101   NORMAL,       /**< normal representation, implicit 1 */
 102   ZERO,         /**< +/-0 */
 103   SUBNORMAL,    /**< denormals, implicit 0 */
 104   INF,          /**< +/-oo */
 105   NAN,          /**< Not A Number */
 106 } value_class_t;
 107
 108 /** A descriptor for an IEEE float value. */
 109 typedef struct {
 110   unsigned char exponent_size;    /**< size of exponent in bits */
 111   unsigned char mantissa_size;    /**< size of mantissa in bits */
 112   value_class_t clss;             /**< state of this float */
 113 } descriptor_t;
 114
 115 #define CLEAR_BUFFER(buffer) memset(buffer, 0, calc_buffer_size)
 116
 117 /* because variable sized structs are impossible, the internal
 118  * value is represented as a pseudo-struct char array, addressed
 119  * by macros
 120  * struct {
 121  *   char sign;             //  0 for positive, 1 for negative
 122  *   char exp[value_size];
 123  *   char mant[value_size];
 124  *   descriptor_t desc;
 125  * };
 126  */
 127 #define _sign(a) (((char*)a)[SIGN_POS])
 128 #define _exp(a) (&((char*)a)[EXPONENT_POS])
 129 #define _mant(a) (&((char*)a)[MANTISSA_POS])
 130 #define _desc(a) (*(descriptor_t *)&((char *)a)[DESCRIPTOR_POS])
 131
 132 #define _save_result(x) memcpy((x), sc_get_buffer(), value_size)
 133 #define _shift_right(x, y, b) sc_shr((x), (y), value_size*4, 0, (b))
 134 #define _shift_left(x, y, b) sc_shl((x), (y), value_size*4, 0, (b))
 135
 136 #define FC_DEFINE1(code) \
 137 char *fc_##code(const void *a, void *result)  {                        \
 138   return _calc((const char*)a, NULL, FC_##code, (char*)result);        \
 139 }
 140
 141 #define FC_DEFINE2(code) \
 142 char *fc_##code(const void *a, const void *b, void *result) {             \
 143   return _calc((const char*)a, (const char*)b, FC_##code, (char*)result); \
 144 }
 145
 146 #define FUNC_PTR(code) fc_##code
 147
 148 #if FLTCALC_DEBUG
 149 #  define DEBUGPRINTF(x) printf x
 150 #else
 151 #  define DEBUGPRINTF(x) ((void)0)
 152 #endif
 153
 154 #if FLTCALC_TRACE_CALC
 155 #  define TRACEPRINTF(x) printf x
 156 #else
 157 #  define TRACEPRINTF(x) ((void)0)
 158 #endif
 159
 160 static char *calc_buffer = NULL;
 161
 162 static fc_rounding_mode_t rounding_mode;
 163
 164 static int calc_buffer_size;
 165 static int value_size;
 166 static int SIGN_POS;
 167 static int EXPONENT_POS;
 168 static int MANTISSA_POS;
 169 static int DESCRIPTOR_POS;
 170
 171 static int max_precision;
 172 /********
 173  * private functions
 174  ********/
 175 #if 0
 176 static void _fail_char(const char *str, unsigned int len, int pos)
 177 {
 178   if (*(str+pos))
 179     printf("ERROR: Unexpected character '%c'\n", *(str + pos));
 180   else
 181     printf("ERROR: Unexpected end of string\n");
 182   while (len-- && *str) printf("%c", *str++); printf("\n");
 183   while (pos--) printf(" "); printf("^\n");
 184   /* the front end has to to check constant strings */
 185   exit(-1);
 186 }
 187 #endif
 188
 189 /** pack machine-like */
 190 static char* _pack(const char *int_float, char *packed)
 191 {
 192   char *shift_val;
 193   char *temp;
 194   char *val_buffer;
 195
 196   temp = alloca(value_size);
 197   shift_val = alloca(value_size);
 198
 199   switch (_desc(int_float).clss) {
 200     case NAN:
 201       val_buffer = alloca(calc_buffer_size);
 202       fc_get_qnan(_desc(int_float).exponent_size, _desc(int_float).mantissa_size, val_buffer);
 203       int_float = val_buffer;
 204       break;
 205
 206     case INF:
 207       val_buffer = alloca(calc_buffer_size);
 208       fc_get_plusinf(_desc(int_float).exponent_size, _desc(int_float).mantissa_size, val_buffer);
 209       _sign(val_buffer) = _sign(int_float);
 210       int_float = val_buffer;
 211       break;
 212
 213     default:
 214       break;
 215   }
 216   /* pack sign */
 217   sc_val_from_ulong(_sign(int_float), temp);
 218
 219   sc_val_from_ulong(_desc(int_float).exponent_size + _desc(int_float).mantissa_size, NULL);
 220   _shift_left(temp, sc_get_buffer(), packed);
 221
 222   /* extract exponent */
 223   sc_val_from_ulong(_desc(int_float).mantissa_size, shift_val);
 224
 225   _shift_left(_exp(int_float), shift_val, temp);
 226
 227   sc_or(temp, packed, packed);
 228
 229   /* extract mantissa */
 230   /* remove 2 rounding bits */
 231   sc_val_from_ulong(2, shift_val);
 232   _shift_right(_mant(int_float), shift_val, temp);
 233
 234   /* remove leading 1 (or 0 if denormalized) */
 235   sc_max_from_bits(_desc(int_float).mantissa_size, 0, shift_val); /* all mantissa bits are 1's */
 236   sc_and(temp, shift_val, temp);
 237
 238   /* save result */
 239   sc_or(temp, packed, packed);
 240
 241   return packed;
 242 }
 243
 244 char* _normalize(const char *in_val, char *out_val, int sticky)
 245 {
 246   int hsb;
 247   char lsb, guard, round, round_dir = 0;
 248   char *temp;
 249
 250   temp = alloca(value_size);
 251
 252   /* +2: save two rounding bits at the end */
 253   hsb = 2 + _desc(in_val).mantissa_size - sc_get_highest_set_bit(_mant(in_val)) - 1;
 254
 255   if (in_val != out_val)
 256   {
 257     _sign(out_val) = _sign(in_val);
 258     memcpy(&_desc(out_val), &_desc(in_val), sizeof(descriptor_t));
 259   }
 260
 261   _desc(out_val).clss = NORMAL;
 262
 263   /* mantissa all zeros, so zero exponent (because of explicit one)*/
 264   if (hsb == 2 + _desc(in_val).mantissa_size)
 265   {
 266     sc_val_from_ulong(0, _exp(out_val));
 267     hsb = -1;
 268   }
 269
 270   /* shift the first 1 into the left of the radix point (i.e. hsb == -1) */
 271   if (hsb < -1)
 272   {
 273     /* shift right */
 274     sc_val_from_ulong(-hsb-1, temp);
 275
 276     _shift_right(_mant(in_val), temp, _mant(out_val));
 277
 278     /* remember if some bits were shifted away */
 279     if (!sticky) sticky = sc_had_carry();
 280
 281     sc_add(_exp(in_val), temp, _exp(out_val));
 282   }
 283   else if (hsb > -1)
 284   {
 285     /* shift left */
 286     sc_val_from_ulong(hsb+1, temp);
 287
 288     _shift_left(_mant(in_val), temp, _mant(out_val));
 289
 290     sc_sub(_exp(in_val), temp, _exp(out_val));
 291   }
 292
 293   /* check for exponent underflow */
 294   if (sc_is_negative(_exp(out_val)) || sc_is_zero(_exp(out_val))) {
 295     DEBUGPRINTF(("Exponent underflow!\n"));
 296     /* exponent underflow */
 297     /* shift the mantissa right to have a zero exponent */
 298     sc_val_from_ulong(1, temp);
 299     sc_sub(temp, _exp(out_val), NULL);
 300
 301     _shift_right(_mant(out_val), sc_get_buffer(), _mant(out_val));
 302     if (!sticky) sticky = sc_had_carry();
 303     /* denormalized means exponent of zero */
 304     sc_val_from_ulong(0, _exp(out_val));
 305
 306     _desc(out_val).clss = SUBNORMAL;
 307   }
 308
 309   /* perform rounding by adding a value that clears the guard bit and the round bit
 310    * and either causes a carry to round up or not */
 311   /* get the last 3 bits of the value */
 312   lsb = sc_sub_bits(_mant(out_val), _desc(out_val).mantissa_size + 2, 0) & 0x7;
 313   guard = (lsb&0x2)>>1;
 314   round = lsb&0x1;
 315
 316   switch (rounding_mode)
 317   {
 318     case FC_TONEAREST:
 319       /* round to nearest representable value, if in doubt choose the version
 320        * with lsb == 0 */
 321       round_dir = guard && (sticky || round || lsb>>2);
 322       break;
 323     case FC_TOPOSITIVE:
 324       /* if positive: round to one if the exact value is bigger, else to zero */
 325       round_dir = (!_sign(out_val) && (guard || round || sticky));
 326       break;
 327     case FC_TONEGATIVE:
 328       /* if negative: round to one if the exact value is bigger, else to zero */
 329       round_dir = (_sign(out_val) && (guard || round || sticky));
 330       break;
 331     case FC_TOZERO:
 332       /* always round to 0 (chopping mode) */
 333       round_dir = 0;
 334       break;
 335   }
 336   DEBUGPRINTF(("Rounding (s%d, l%d, g%d, r%d, s%d) %s\n", _sign(out_val), lsb>>2, guard, round, sticky, (round_dir)?"up":"down"));
 337
 338   if (round_dir == 1)
 339   {
 340     guard = (round^guard)<<1;
 341     lsb = !(round || guard)<<2 | guard | round;
 342   }
 343   else
 344   {
 345     lsb = -((guard<<1) | round);
 346   }
 347
 348   /* add the rounded value */
 349   if (lsb != 0) {
 350     sc_val_from_long(lsb, temp);
 351     sc_add(_mant(out_val), temp, _mant(out_val));
 352   }
 353
 354   /* could have rounded down to zero */
 355   if (sc_is_zero(_mant(out_val)) && (_desc(out_val).clss == SUBNORMAL))
 356     _desc(out_val).clss = ZERO;
 357
 358   /* check for rounding overflow */
 359   hsb = 2 + _desc(out_val).mantissa_size - sc_get_highest_set_bit(_mant(out_val)) - 1;
 360   if ((_desc(out_val).clss != SUBNORMAL) && (hsb < -1))
 361   {
 362     sc_val_from_ulong(1, temp);
 363     _shift_right(_mant(out_val), temp, _mant(out_val));
 364
 365     sc_add(_exp(out_val), temp, _exp(out_val));
 366   }
 367   else if ((_desc(out_val).clss == SUBNORMAL) && (hsb == -1))
 368   {
 369     /* overflow caused the mantissa to be normal again,
 370      * so adapt the exponent accordingly */
 371     sc_val_from_ulong(1, temp);
 372     sc_add(_exp(out_val), temp, _exp(out_val));
 373
 374     _desc(out_val).clss = NORMAL;
 375   }
 376   /* no further rounding is needed, because rounding overflow means
 377    * the carry of the original rounding was propagated all the way
 378    * up to the bit left of the radix point. This implies the bits
 379    * to the right are all zeros (rounding is +1) */
 380
 381   /* check for exponent overflow */
 382   sc_val_from_ulong((1 << _desc(out_val).exponent_size) - 1, temp);
 383   if (sc_comp(_exp(out_val), temp) != -1) {
 384     DEBUGPRINTF(("Exponent overflow!\n"));
 385     /* exponent overflow, reaction depends on rounding method:
 386      *
 387      * mode        | sign of value |  result
 388      *--------------------------------------------------------------
 389      * TO_NEAREST  |      +        |   +inf
 390      *             |      -        |   -inf
 391      *--------------------------------------------------------------
 392      * TO_POSITIVE |      +        |   +inf
 393      *             |      -        |   smallest representable value
 394      *--------------------------------------------------------------
 395      * TO_NEAGTIVE |      +        |   largest representable value
 396      *             |      -        |   -inf
 397      *--------------------------------------------------------------
 398      * TO_ZERO     |      +        |   largest representable value
 399      *             |      -        |   smallest representable value
 400      *--------------------------------------------------------------*/
 401     if (_sign(out_val) == 0)
 402     {
 403       /* value is positive */
 404       switch (rounding_mode) {
 405         case FC_TONEAREST:
 406         case FC_TOPOSITIVE:
 407           _desc(out_val).clss = INF;
 408           break;
 409
 410         case FC_TONEGATIVE:
 411         case FC_TOZERO:
 412           fc_get_max(_desc(out_val).exponent_size, _desc(out_val).mantissa_size, out_val);
 413       }
 414     } else {
 415       /* value is negative */
 416       switch (rounding_mode) {
 417         case FC_TONEAREST:
 418         case FC_TONEGATIVE:
 419           _desc(out_val).clss = INF;
 420           break;
 421
 422         case FC_TOPOSITIVE:
 423         case FC_TOZERO:
 424           fc_get_min(_desc(out_val).exponent_size, _desc(out_val).mantissa_size, out_val);
 425       }
 426     }
 427   }
 428
 429   return out_val;
 430 }
 431
 432 /**
 433  * Operations involving NaN's must return NaN
 434  */
 435 #define handle_NAN(a, b, result) \
 436 do {                                                      \
 437   if (_desc(a).clss == NAN) {                             \
 438     if (a != result) memcpy(result, a, calc_buffer_size); \
 439     return result;                                        \
 440   }                                                       \
 441   if (_desc(b).clss == NAN) {                             \
 442     if (b != result) memcpy(result, b, calc_buffer_size); \
 443     return result;                                        \
 444   }                                                       \
 445 }while (0)
 446
 447
 448 /**
 449  * calculate a + b, where a is the value with the bigger exponent
 450  */
 451 static char* _fadd(const char* a, const char* b, char* result)
 452 {
 453   char *temp;
 454   char *exp_diff;
 455
 456   char sign;
 457   char sticky;
 458
 459   handle_NAN(a, b, result);
 460
 461   /* make sure result has a descriptor */
 462   if (result != a && result != b)
 463     memcpy(&_desc(result), &_desc(a), sizeof(descriptor_t));
 464
 465   /* determine if this is an addition or subtraction */
 466   sign = _sign(a) ^ _sign(b);
 467
 468   /* produce NaN on inf - inf */
 469   if (sign && (_desc(a).clss == INF) && (_desc(b).clss == INF))
 470     return fc_get_qnan(_desc(a).exponent_size, _desc(b).mantissa_size, result);
 471
 472   temp     = alloca(value_size);
 473   exp_diff = alloca(value_size);
 474
 475   /* get exponent difference */
 476   sc_sub(_exp(a), _exp(b), exp_diff);
 477
 478   /* initially set sign to be the sign of a, special treatment of subtraction
 479    * when exponents are equal is required though.
 480    * Also special care about the sign is needed when the mantissas are equal
 481    * (+/- 0 ?) */
 482   if (sign && sc_val_to_long(exp_diff) == 0) {
 483     switch (sc_comp(_mant(a), _mant(b))) {
 484       case 1:  /* a > b */
 485         _sign(result) = _sign(a);  /* abs(a) is bigger and a is negative */
 486         break;
 487       case 0:  /* a == b */
 488         _sign(result) = (rounding_mode == FC_TONEGATIVE);
 489         break;
 490       case -1: /* a < b */
 491         _sign(result) = _sign(b); /* abs(b) is bigger and b is negative */
 492         break;
 493       default:
 494         /* can't be reached */
 495         break;
 496     }
 497   }
 498   else
 499     _sign(result) = _sign(a);
 500
 501   /* sign has been taken care of, check for special cases */
 502   if (_desc(a).clss == ZERO) {
 503     if (b != result) memcpy(result+SIGN_POS+1, b+SIGN_POS+1, calc_buffer_size-SIGN_POS-1);
 504     return result;
 505   }
 506   if (_desc(b).clss == ZERO) {
 507     if (a != result) memcpy(result+SIGN_POS+1, a+SIGN_POS+1, calc_buffer_size-SIGN_POS-1);
 508     return result;
 509   }
 510
 511   if (_desc(a).clss == INF) {
 512     if (a != result) memcpy(result+SIGN_POS+1, a+SIGN_POS+1, calc_buffer_size-SIGN_POS-1);
 513     return result;
 514   }
 515   if (_desc(b).clss == INF) {
 516     if (b != result) memcpy(result+SIGN_POS+1, b+SIGN_POS+1, calc_buffer_size-SIGN_POS-1);
 517     return result;
 518   }
 519
 520   /* shift the smaller value to the right to align the radix point */
 521   /* subnormals have their radix point shifted to the right,
 522    * take care of this first */
 523   if ((_desc(b).clss == SUBNORMAL) && (_desc(a).clss != SUBNORMAL))
 524   {
 525     sc_val_from_ulong(1, temp);
 526     sc_sub(exp_diff, temp, exp_diff);
 527   }
 528
 529   _shift_right(_mant(b), exp_diff, temp);
 530   sticky = sc_had_carry();
 531
 532   if (sticky && sign)
 533   {
 534     /* if subtracting a little more than the represented value or adding a little
 535      * more than the represented value to a negative value this, in addition to the
 536      * still set sticky bit, takes account of the 'little more' */
 537     char *temp1 = alloca(calc_buffer_size);
 538     sc_val_from_ulong(1, temp1);
 539     sc_add(temp, temp1, temp);
 540   }
 541
 542   if (sign) {
 543     if (sc_comp(_mant(a), temp) == -1)
 544       sc_sub(temp, _mant(a), _mant(result));
 545     else
 546       sc_sub(_mant(a), temp, _mant(result));
 547   } else {
 548     sc_add(_mant(a), temp, _mant(result));
 549   }
 550
 551   /* _normalize expects a 'normal' radix point, adding two subnormals
 552    * results in a subnormal radix point -> shifting before normalizing */
 553   if ((_desc(a).clss == SUBNORMAL) && (_desc(b).clss == SUBNORMAL))
 554   {
 555     sc_val_from_ulong(1, NULL);
 556     _shift_left(_mant(result), sc_get_buffer(), _mant(result));
 557   }
 558
 559   /* resulting exponent is the bigger one */
 560   memmove(_exp(result), _exp(a), value_size);
 561
 562   return _normalize(result, result, sticky);
 563 }
 564
 565 /**
 566  * calculate a * b
 567  */
 568 static char* _fmul(const char* a, const char* b, char* result)
 569 {
 570   char *temp;
 571
 572   handle_NAN(a, b, result);
 573
 574   temp = alloca(value_size);
 575
 576   if (result != a && result != b)
 577     memcpy(&_desc(result), &_desc(a), sizeof(descriptor_t));
 578
 579   _sign(result) = _sign(a) ^ _sign(b);
 580
 581   /* produce NaN on 0 * inf */
 582   if (_desc(a).clss == ZERO) {
 583     if (_desc(b).clss == INF)
 584       fc_get_qnan(_desc(a).exponent_size, _desc(a).mantissa_size, result);
 585     else
 586       if (a != result) memcpy(result+SIGN_POS+1, a+SIGN_POS+1, calc_buffer_size-1);
 587     return result;
 588   }
 589   if (_desc(b).clss == ZERO) {
 590     if (_desc(a).clss == INF)
 591       fc_get_qnan(_desc(a).exponent_size, _desc(a).mantissa_size, result);
 592     else
 593       if (b != result) memcpy(result+SIGN_POS+1, b+SIGN_POS+1, calc_buffer_size-1);
 594     return result;
 595   }
 596
 597   if (_desc(a).clss == INF) {
 598     if (a != result) memcpy(result+SIGN_POS+1, a+SIGN_POS+1, calc_buffer_size-1);
 599     return result;
 600   }
 601   if (_desc(b).clss == INF) {
 602     if (b != result) memcpy(result+SIGN_POS+1, b+SIGN_POS+1, calc_buffer_size-1);
 603     return result;
 604   }
 605
 606   /* exp = exp(a) + exp(b) - excess */
 607   sc_add(_exp(a), _exp(b), _exp(result));
 608
 609   sc_val_from_ulong((1<<_desc(a).exponent_size)/2-1, temp);
 610   sc_sub(_exp(result), temp, _exp(result));
 611
 612   /* mixed normal, subnormal values introduce an error of 1, correct it */
 613   if ((_desc(a).clss == SUBNORMAL) ^ (_desc(b).clss == SUBNORMAL))
 614   {
 615     sc_val_from_ulong(1, temp);
 616     sc_add(_exp(result), temp, _exp(result));
 617   }
 618
 619   sc_mul(_mant(a), _mant(b), _mant(result));
 620
 621   /* realign result: after a multiplication the digits right of the radix
 622    * point are the sum of the factors' digits after the radix point. As all
 623    * values are normalized they both have the same amount of these digits,
 624    * which has to be restored by proper shifting
 625    * +2 because of the two rounding bits */
 626   sc_val_from_ulong(2 + _desc(result).mantissa_size, temp);
 627
 628   _shift_right(_mant(result), temp, _mant(result));
 629
 630   return _normalize(result, result, sc_had_carry());
 631 }
 632
 633 /**
 634  * calculate a / b
 635  */
 636 static char* _fdiv(const char* a, const char* b, char* result)
 637 {
 638   char *temp, *dividend;
 639
 640   handle_NAN(a, b, result);
 641
 642   temp = alloca(value_size);
 643   dividend = alloca(value_size);
 644
 645   if (result != a && result != b)
 646     memcpy(&_desc(result), &_desc(a), sizeof(descriptor_t));
 647
 648   _sign(result) = _sign(a) ^ _sign(b);
 649
 650   /* produce nan on 0/0 and inf/inf */
 651   if (_desc(a).clss == ZERO) {
 652     if (_desc(b).clss == ZERO)
 653       /* 0/0 -> nan */
 654       fc_get_qnan(_desc(a).exponent_size, _desc(a).mantissa_size, result);
 655     else
 656       /* 0/x -> a */
 657       if (a != result) memcpy(result+SIGN_POS+1, a+SIGN_POS+1, calc_buffer_size-1);
 658     return result;
 659   }
 660
 661   if (_desc(b).clss == INF) {
 662     if (_desc(a).clss == INF)
 663       /* inf/inf -> nan */
 664       fc_get_qnan(_desc(a).exponent_size, _desc(a).mantissa_size, result);
 665     else {
 666       /* x/inf -> 0 */
 667       sc_val_from_ulong(0, NULL);
 668       _save_result(_exp(result));
 669       _save_result(_mant(result));
 670       _desc(result).clss = ZERO;
 671     }
 672     return result;
 673   }
 674
 675   if (_desc(a).clss == INF) {
 676     /* inf/x -> inf */
 677     if (a != result) memcpy(result+SIGN_POS+1, a+SIGN_POS+1, calc_buffer_size-1);
 678     return result;
 679   }
 680   if (_desc(b).clss == ZERO) {
 681     /* division by zero */
 682     if (_sign(result))
 683       fc_get_minusinf(_desc(a).exponent_size, _desc(a).mantissa_size, result);
 684     else
 685       fc_get_plusinf(_desc(a).exponent_size, _desc(a).mantissa_size, result);
 686     return result;
 687   }
 688
 689   /* exp = exp(a) - exp(b) + excess - 1*/
 690   sc_sub(_exp(a), _exp(b), _exp(result));
 691   sc_val_from_ulong((1 << _desc(a).exponent_size)/2-2, temp);
 692   sc_add(_exp(result), temp, _exp(result));
 693
 694   /* mixed normal, subnormal values introduce an error of 1, correct it */
 695   if ((_desc(a).clss == SUBNORMAL) ^ (_desc(b).clss == SUBNORMAL))
 696   {
 697     sc_val_from_ulong(1, temp);
 698     sc_add(_exp(result), temp, _exp(result));
 699   }
 700
 701   /* mant(res) = mant(a) / 1/2mant(b) */
 702   /* to gain more bits of precision in the result the dividend could be
 703    * shifted left, as this operation does not loose bits. This would not
 704    * fit into the integer precision, but due to the rounding bits (which
 705    * are always zero because the values are all normalized) the divisor
 706    * can be shifted right instead to achieve the same result */
 707   sc_val_from_ulong(2 + _desc(result).mantissa_size, temp);
 708
 709   _shift_left(_mant(a), temp, dividend);
 710
 711   {
 712     char *divisor = alloca(calc_buffer_size);
 713     sc_val_from_ulong(1, divisor);
 714     _shift_right(_mant(b), divisor, divisor);
 715     sc_div(dividend, divisor, _mant(result));
 716   }
 717
 718   return _normalize(result, result, sc_had_carry());
 719 }
 720
 721 #if 0
 722 static void _power_of_ten(int exp, descriptor_t *desc, char *result)
 723 {
 724   char *build;
 725   char *temp;
 726
 727   /* positive sign */
 728   _sign(result) = 0;
 729
 730   /* set new descriptor (else result is supposed to already have one) */
 731   if (desc != NULL)
 732     memcpy(&_desc(result), desc, sizeof(descriptor_t));
 733
 734   build = alloca(value_size);
 735   temp = alloca(value_size);
 736
 737   sc_val_from_ulong((1 << _desc(result).exponent_size)/2-1, _exp(result));
 738
 739   if (exp > 0)
 740   {
 741     /* temp is value of ten now */
 742     sc_val_from_ulong(10, NULL);
 743     _save_result(temp);
 744
 745     for (exp--; exp > 0; exp--) {
 746       _save_result(build);
 747       sc_mul(build, temp, NULL);
 748     }
 749     _save_result(build);
 750
 751     /* temp is amount of left shift needed to put the value left of the radix point */
 752     sc_val_from_ulong(_desc(result).mantissa_size + 2, temp);
 753
 754     _shift_left(build, temp, _mant(result));
 755
 756     _normalize(result, result, 0);
 757   }
 758 }
 759 #endif
 760
 761 /**
 762  * Truncate the fractional part away.
 763  *
 764  * This does not clip to any integer rang.
 765  */
 766 static char* _trunc(const char *a, char *result)
 767 {
 768   /*
 769    * When exponent == 0 all bits left of the radix point
 770    * are the integral part of the value. For 15bit exp_size
 771    * this would require a left shift of max. 16383 bits which
 772    * is too much.
 773    * But it is enough to ensure that no bit right of the radix
 774    * point remains set. This restricts the interesting
 775    * exponents to the interval [0, mant_size-1].
 776    * Outside this interval the truncated value is either 0 or
 777    * it does not have fractional parts.
 778    */
 779
 780   int exp_bias, exp_val;
 781   char *temp;
 782
 783   temp = alloca(value_size);
 784
 785   if (a != result)
 786     memcpy(&_desc(result), &_desc(a), sizeof(descriptor_t));
 787
 788   exp_bias = (1<<_desc(a).exponent_size)/2-1;
 789   exp_val  = sc_val_to_long(_exp(a)) - exp_bias;
 790
 791   if (exp_val < 0) {
 792     sc_val_from_ulong(0, NULL);
 793     _save_result(_exp(result));
 794     _save_result(_mant(result));
 795     _desc(result).clss = ZERO;
 796
 797     return result;
 798   }
 799
 800   if (exp_val > _desc(a).mantissa_size) {
 801     if (a != result)
 802       memcpy(result, a, calc_buffer_size);
 803
 804     return result;
 805   }
 806
 807   /* set up a proper mask to delete all bits right of the
 808    * radix point if the mantissa had been shifted until exp == 0 */
 809   sc_max_from_bits(1 + exp_val, 0, temp);
 810   sc_val_from_long(_desc(a).mantissa_size - exp_val + 2, NULL);
 811   _shift_left(temp, sc_get_buffer(), temp);
 812
 813   /* and the mask and return the result */
 814   sc_and(_mant(a), temp, _mant(result));
 815
 816   if (a != result) memcpy(_exp(result), _exp(a), value_size);
 817
 818   return result;
 819 }
 820
 821 /*
 822  * This does value sanity checking(or should do it), sets up any prerequisites,
 823  * calls the proper internal functions, clears up and returns
 824  * the result
 825  */
 826 char* _calc(const char *a, const char *b, int opcode, char *result)
 827 {
 828   char *temp;
 829 #ifdef FLTCALC_TRACE_CALC
 830   char *buffer;
 831
 832   buffer = alloca(100);
 833 #endif
 834
 835   if (result == NULL) result = calc_buffer;
 836
 837   TRACEPRINTF(("%s ", fc_print(a, buffer, 100, FC_PACKED)));
 838   switch (opcode)
 839   {
 840     case FC_add:
 841       /* make the value with the bigger exponent the first one */
 842       TRACEPRINTF(("+ %s ", fc_print(b, buffer, 100, FC_PACKED)));
 843       if (sc_comp(_exp(a), _exp(b)) == -1)
 844         _fadd(b, a, result);
 845       else
 846         _fadd(a, b, result);
 847       break;
 848     case FC_sub:
 849       TRACEPRINTF(("- %s ", fc_print(b, buffer, 100, FC_PACKED)));
 850       temp = alloca(calc_buffer_size);
 851       memcpy(temp, b, calc_buffer_size);
 852       _sign(temp) = !_sign(b);
 853       if (sc_comp(_exp(a), _exp(temp)) == -1)
 854         _fadd(temp, a, result);
 855       else
 856         _fadd(a, temp, result);
 857       break;
 858     case FC_mul:
 859       TRACEPRINTF(("* %s ", fc_print(b, buffer, 100, FC_PACKED)));
 860       _fmul(a, b, result);
 861       break;
 862     case FC_div:
 863       TRACEPRINTF(("/ %s ", fc_print(b, buffer, 100, FC_PACKED)));
 864       _fdiv(a, b, result);
 865       break;
 866     case FC_neg:
 867       TRACEPRINTF(("negated "));
 868       if (a != result) memcpy(result, a, calc_buffer_size);
 869       _sign(result) = !_sign(a);
 870       break;
 871     case FC_int:
 872       TRACEPRINTF(("truncated to integer "));
 873       _trunc(a, result);
 874       break;
 875     case FC_rnd:
 876       TRACEPRINTF(("rounded to integer "));
 877       assert(0);
 878       break;
 879   }
 880
 881   TRACEPRINTF(("= %s\n", fc_print(result, buffer, 100, FC_PACKED)));
 882   return result;
 883 }
 884
 885 /********
 886  * functions defined in fltcalc.h
 887  ********/
 888 const void *fc_get_buffer(void)
 889 {
 890   return calc_buffer;
 891 }
 892
 893 int fc_get_buffer_length(void)
 894 {
 895   return calc_buffer_size;
 896 }
 897
 898 char* fc_val_from_str(const char *str, unsigned int len, char exp_size, char mant_size, char *result)
 899 {
 900   (void) len;
 901 #if 0
 902   enum {
 903     START,
 904     LEFT_OF_DOT,
 905     RIGHT_OF_DOT,
 906     EXP_START,
 907     EXPONENT,
 908     END
 909   };
 910
 911   char exp_sign;
 912   int exp_int, hsb, state;
 913
 914   const char *old_str;
 915
 916   int pos;
 917   char *mant_str, *exp_val, *power_val;
 918
 919   if (result == NULL) result = calc_buffer;
 920
 921   exp_val = alloca(value_size);
 922   power_val = alloca(calc_buffer_size);
 923   mant_str = alloca((len)?(len):(strlen(str)));
 924
 925   _desc(result).exponent_size = exp_size;
 926   _desc(result).mantissa_size = mant_size;
 927   _desc(result).clss = NORMAL;
 928
 929   old_str = str;
 930   pos = 0;
 931   exp_int = 0;
 932   state = START;
 933
 934   while (len == 0 || str-old_str < len)
 935   {
 936     switch (state) {
 937       case START:
 938         switch (*str) {
 939           case '+':
 940             _sign(result) = 0;
 941             state = LEFT_OF_DOT;
 942             str++;
 943             break;
 944
 945           case '-':
 946             _sign(result) = 1;
 947             state = LEFT_OF_DOT;
 948             str++;
 949             break;
 950
 951           case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
 952             _sign(result) = 0;
 953             state = LEFT_OF_DOT;
 954             break;
 955
 956           case '.':
 957             _sign(result) = 0;
 958             state = RIGHT_OF_DOT;
 959             str++;
 960             break;
 961
 962           case 'n':
 963           case 'N':
 964           case 'i':
 965           case 'I':
 966             break;
 967
 968           default:
 969             _fail_char(old_str, len, str - old_str);
 970         }
 971         break;
 972
 973       case LEFT_OF_DOT:
 974         switch (*str) {
 975           case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
 976             mant_str[pos++] = *(str++);
 977             break;
 978
 979           case '.':
 980             state = RIGHT_OF_DOT;
 981             str++;
 982             break;
 983
 984           case 'e':
 985           case 'E':
 986             state = EXP_START;
 987             str++;
 988             break;
 989
 990           case '\0':
 991             mant_str[pos] = '\0';
 992             goto done;
 993
 994           default:
 995             _fail_char(old_str, len, str - old_str);
 996         }
 997         break;
 998
 999       case RIGHT_OF_DOT:
1000         switch (*str) {
1001           case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1002             mant_str[pos++] = *(str++);
1003             exp_int++;
1004             break;
1005
1006           case 'e':
1007           case 'E':
1008             state = EXP_START;
1009             str++;
1010             break;
1011
1012           case '\0':
1013             mant_str[pos] = '\0';
1014             goto done;
1015
1016           default:
1017             _fail_char(old_str, len, str - old_str);
1018         }
1019         break;
1020
1021       case EXP_START:
1022         switch (*str) {
1023           case '-':
1024             exp_sign = 1;
1025             /* fall through */
1026           case '+':
1027             if (*(str-1) != 'e' && *(str-1) != 'E') _fail_char(old_str, len, str - old_str);
1028             str++;
1029             break;
1030
1031           case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1032             mant_str[pos] = '\0';
1033             pos = 1;
1034             str++;
1035             state = EXPONENT;
1036             break;
1037
1038           default:
1039             _fail_char(old_str, len, str - old_str);
1040         }
1041         break;
1042
1043       case EXPONENT:
1044         switch (*str) {
1045           case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1046             pos++;
1047             str++;
1048             break;
1049
1050           case '\0': goto done;
1051
1052           default:
1053             _fail_char(old_str, len, str - old_str);
1054         }
1055     }
1056   } /*  switch(state) */
1057
1058 done:
1059   sc_val_from_str(mant_str, strlen(mant_str), _mant(result));
1060
1061   /* shift to put value left of radix point */
1062   sc_val_from_ulong(mant_size + 2, exp_val);
1063
1064   _shift_left(_mant(result), exp_val, _mant(result));
1065
1066   sc_val_from_ulong((1 << exp_size)/2-1, _exp(result));
1067
1068   _normalize(result, result, 0);
1069
1070   if (state == EXPONENT) {
1071     exp_int -= atoi(str-pos);
1072   }
1073
1074   _power_of_ten(exp_int, &_desc(result), power_val);
1075
1076   _fdiv(result, power_val, result);
1077
1078   return result;
1079 #else
1080
1081   /* XXX excuse of an implementation to make things work */
1082   LLDBL val;
1083 #ifdef HAVE_LONG_DOUBLE
1084   val = strtold(str, NULL);
1085 #else
1086   val = strtod(str, NULL);
1087 #endif
1088
1089   DEBUGPRINTF(("val_from_str(%s)\n", str));
1090   return fc_val_from_float(val, exp_size, mant_size, result);
1091 #endif
1092 }
1093
1094 char* fc_val_from_float(LLDBL l, char exp_size, char mant_size, char* result)
1095 {
1096   char *temp;
1097   int bias_res, bias_val, mant_val;
1098   value_t srcval;
1099   UINT32 sign, exponent, mantissa0, mantissa1;
1100
1101   srcval.d = l;
1102   bias_res = ((1<<exp_size)/2-1);
1103
1104 #ifdef HAVE_LONG_DOUBLE
1105   mant_val  = 64;
1106   bias_val  = 0x3fff;
1107   sign      = (srcval.val.high & 0x00008000) != 0;
1108   exponent  = (srcval.val.high & 0x00007FFF) ;
1109   mantissa0 = srcval.val.mid;
1110   mantissa1 = srcval.val.low;
1111 #else /* no long double */
1112   mant_val  = 52;
1113   bias_val  = 0x3ff;
1114   sign      = (srcval.val.high & 0x80000000) != 0;
1115   exponent  = (srcval.val.high & 0x7FF00000) >> 20;
1116   mantissa0 = srcval.val.high & 0x000FFFFF;
1117   mantissa1 = srcval.val.low;
1118 #endif
1119
1120 #ifdef HAVE_LONG_DOUBLE
1121   TRACEPRINTF(("val_from_float(%.8X%.8X%.8X)\n", ((int*)&l)[2], ((int*)&l)[1], ((int*)&l)[0]));/* srcval.val.high, srcval.val.mid, srcval.val.low)); */
1122   DEBUGPRINTF(("(%d-%.4X-%.8X%.8X)\n", sign, exponent, mantissa0, mantissa1));
1123 #else
1124   TRACEPRINTF(("val_from_float(%.8X%.8X)\n", srcval.val.high, srcval.val.low));
1125   DEBUGPRINTF(("(%d-%.3X-%.5X%.8X)\n", sign, exponent, mantissa0, mantissa1));
1126 #endif
1127
1128   if (result == NULL) result = calc_buffer;
1129   temp = alloca(value_size);
1130
1131   _desc(result).exponent_size = exp_size;
1132   _desc(result).mantissa_size = mant_size;
1133
1134   /* extract sign */
1135   _sign(result) = sign;
1136
1137   /* sign and flag suffice to identify nan or inf, no exponent/mantissa
1138    * encoding is needed. the function can return immediately in these cases */
1139   if (isnan(l)) {
1140     _desc(result).clss = NAN;
1141     TRACEPRINTF(("val_from_float resulted in NAN\n"));
1142     return result;
1143   }
1144   else if (isinf(l)) {
1145     _desc(result).clss = INF;
1146     TRACEPRINTF(("val_from_float resulted in %sINF\n", (_sign(result)==1)?"-":""));
1147     return result;
1148   }
1149
1150   /* build exponent, because input and output exponent and mantissa sizes may differ
1151    * this looks more complicated than it is: unbiased input exponent + output bias,
1152    * minus the mantissa difference which is added again later when the output float
1153    * becomes normalized */
1154 #ifdef HAVE_EXPLICIT_ONE
1155   sc_val_from_long((exponent-bias_val+bias_res)-(mant_val-mant_size-1), _exp(result));
1156 #else
1157   sc_val_from_long((exponent-bias_val+bias_res)-(mant_val-mant_size), _exp(result));
1158 #endif
1159
1160   /* build mantissa representation */
1161 #ifndef HAVE_EXPLICIT_ONE
1162   if (exponent != 0)
1163   {
1164     /* insert the hidden bit */
1165     sc_val_from_ulong(1, temp);
1166     sc_val_from_ulong(mant_val + 2, NULL);
1167     _shift_left(temp, sc_get_buffer(), NULL);
1168   }
1169   else
1170 #endif
1171   {
1172     sc_val_from_ulong(0, NULL);
1173   }
1174
1175   _save_result(_mant(result));
1176
1177   /* bits from the upper word */
1178   sc_val_from_ulong(mantissa0, temp);
1179   sc_val_from_ulong(34, NULL);
1180   _shift_left(temp, sc_get_buffer(), temp);
1181   sc_or(_mant(result), temp, _mant(result));
1182
1183   /* bits from the lower word */
1184   sc_val_from_ulong(mantissa1, temp);
1185   sc_val_from_ulong(2, NULL);
1186   _shift_left(temp, sc_get_buffer(), temp);
1187   sc_or(_mant(result), temp, _mant(result));
1188
1189   /* _normalize expects the radix point to be normal, so shift mantissa of subnormal
1190    * origin one to the left */
1191   if (exponent == 0)
1192   {
1193     sc_val_from_ulong(1, NULL);
1194     _shift_left(_mant(result), sc_get_buffer(), _mant(result));
1195   }
1196
1197   _normalize(result, result, 0);
1198
1199   TRACEPRINTF(("val_from_float results in %s\n", fc_print(result, temp, calc_buffer_size, FC_PACKED)));
1200
1201   return result;
1202 }
1203
1204 LLDBL fc_val_to_float(const void *val)
1205 {
1206   const char *value;
1207   char *temp = NULL;
1208
1209   int byte_offset;
1210
1211   UINT32 sign;
1212   UINT32 exponent;
1213   UINT32 mantissa0;
1214   UINT32 mantissa1;
1215
1216   value_t buildval;
1217
1218 #ifdef HAVE_LONG_DOUBLE
1219   char result_exponent = 15;
1220   char result_mantissa = 64;
1221 #else
1222   char result_exponent = 11;
1223   char result_mantissa = 52;
1224 #endif
1225
1226   temp = alloca(calc_buffer_size);
1227 #ifdef HAVE_EXPLICIT_ONE
1228   value = fc_cast(val, result_exponent, result_mantissa-1, temp);
1229 #else
1230   value = fc_cast(val, result_exponent, result_mantissa, temp);
1231 #endif
1232
1233   sign = _sign(value);
1234
1235   /* @@@ long double exponent is 15bit, so the use of sc_val_to_long should not
1236    * lead to wrong results */
1237   exponent = sc_val_to_long(_exp(value)) ;
1238
1239   sc_val_from_ulong(2, NULL);
1240   _shift_right(_mant(value), sc_get_buffer(), _mant(value));
1241
1242   mantissa0 = 0;
1243   mantissa1 = 0;
1244
1245   for (byte_offset = 0; byte_offset < 4; byte_offset++)
1246     mantissa1 |= sc_sub_bits(_mant(value), result_mantissa, byte_offset) << (byte_offset<<3);
1247
1248   for (; (byte_offset<<3) < result_mantissa; byte_offset++)
1249     mantissa0 |= sc_sub_bits(_mant(value), result_mantissa, byte_offset) << ((byte_offset-4)<<3);
1250
1251 #ifdef HAVE_LONG_DOUBLE
1252   buildval.val.high = sign << 15;
1253   buildval.val.high |= exponent;
1254   buildval.val.mid = mantissa0;
1255   buildval.val.low = mantissa1;
1256 #else /* no long double */
1257   mantissa0 &= 0x000FFFFF;  /* get rid of garbage */
1258   buildval.val.high = sign << 31;
1259   buildval.val.high |= exponent << 20;
1260   buildval.val.high |= mantissa0;
1261   buildval.val.low = mantissa1;
1262 #endif
1263
1264   TRACEPRINTF(("val_to_float: %d-%x-%x%x\n", sign, exponent, mantissa0, mantissa1));
1265   return buildval.d;
1266 }
1267
1268 char* fc_cast(const void *val, char exp_size, char mant_size, char *result)
1269 {
1270   const char *value = (const char*) val;
1271   char *temp;
1272   int exp_offset, val_bias, res_bias;
1273
1274   if (result == NULL) result = calc_buffer;
1275   temp = alloca(value_size);
1276
1277   if (_desc(value).exponent_size == exp_size && _desc(value).mantissa_size == mant_size)
1278   {
1279     if (value != result) memcpy(result, value, calc_buffer_size);
1280     return result;
1281   }
1282
1283   /* set the descriptor of the new value */
1284   _desc(result).exponent_size = exp_size;
1285   _desc(result).mantissa_size = mant_size;
1286   _desc(result).clss = _desc(value).clss;
1287
1288   _sign(result) = _sign(value);
1289
1290   /* when the mantissa sizes differ normalizing has to shift to align it.
1291    * this would change the exponent, which is unwanted. So calculate this
1292    * offset and add it */
1293   val_bias = (1<<_desc(value).exponent_size)/2-1;
1294   res_bias = (1<<exp_size)/2-1;
1295
1296   exp_offset = (res_bias - val_bias) - (_desc(value).mantissa_size - mant_size);
1297   sc_val_from_long(exp_offset, temp);
1298   sc_add(_exp(value), temp, _exp(result));
1299
1300   /* _normalize expects normalized radix point */
1301   if (_desc(val).clss == SUBNORMAL) {
1302     sc_val_from_ulong(1, NULL);
1303     _shift_left(_mant(val), sc_get_buffer(), _mant(result));
1304   } else if (value != result) {
1305     memcpy(_mant(result), _mant(value), value_size);
1306   } else {
1307     memmove(_mant(result), _mant(value), value_size);
1308   }
1309
1310   _normalize(result, result, 0);
1311   TRACEPRINTF(("Cast results in %s\n", fc_print(result, temp, value_size, FC_PACKED)));
1312   return result;
1313 }
1314
1315 char* fc_get_max(unsigned int exponent_size, unsigned int mantissa_size, char* result)
1316 {
1317   if (result == NULL) result = calc_buffer;
1318
1319   _desc(result).exponent_size = exponent_size;
1320   _desc(result).mantissa_size = mantissa_size;
1321   _desc(result).clss = NORMAL;
1322
1323   _sign(result) = 0;
1324
1325   sc_val_from_ulong((1<<exponent_size) - 2, _exp(result));
1326
1327   sc_max_from_bits(mantissa_size + 1, 0, _mant(result));
1328   sc_val_from_ulong(2, NULL);
1329   _shift_left(_mant(result), sc_get_buffer(), _mant(result));
1330
1331   return result;
1332 }
1333
1334 char* fc_get_min(unsigned int exponent_size, unsigned int mantissa_size, char *result)
1335 {
1336   if (result == NULL) result = calc_buffer;
1337
1338   fc_get_max(exponent_size, mantissa_size, result);
1339   _sign(result) = 1;
1340
1341   return result;
1342 }
1343
1344 char* fc_get_snan(unsigned int exponent_size, unsigned int mantissa_size, char *result)
1345 {
1346   if (result == NULL) result = calc_buffer;
1347
1348   _desc(result).exponent_size = exponent_size;
1349   _desc(result).mantissa_size = mantissa_size;
1350   _desc(result).clss = NAN;
1351
1352   _sign(result) = 0;
1353
1354   sc_val_from_ulong((1<<exponent_size)-1, _exp(result));
1355
1356   /* signaling NaN has non-zero mantissa with msb not set */
1357   sc_val_from_ulong(1, _mant(result));
1358
1359   return result;
1360 }
1361
1362 char* fc_get_qnan(unsigned int exponent_size, unsigned int mantissa_size, char *result)
1363 {
1364   if (result == NULL) result = calc_buffer;
1365
1366   _desc(result).exponent_size = exponent_size;
1367   _desc(result).mantissa_size = mantissa_size;
1368   _desc(result).clss = NAN;
1369
1370   _sign(result) = 0;
1371
1372   sc_val_from_ulong((1<<exponent_size)-1, _exp(result));
1373
1374   /* quiet NaN has the msb of the mantissa set, so shift one there */
1375   sc_val_from_ulong(1, _mant(result));
1376   /* mantissa_size >+< 1 because of two extra rounding bits */
1377   sc_val_from_ulong(mantissa_size + 1, NULL);
1378   _shift_left(_mant(result), sc_get_buffer(), _mant(result));
1379
1380   return result;
1381 }
1382
1383 char* fc_get_plusinf(unsigned int exponent_size, unsigned int mantissa_size, char *result)
1384 {
1385   if (result == NULL) result = calc_buffer;
1386
1387   _desc(result).exponent_size = exponent_size;
1388   _desc(result).mantissa_size = mantissa_size;
1389   _desc(result).clss = NORMAL;
1390
1391   _sign(result) = 0;
1392
1393   sc_val_from_ulong((1<<exponent_size)-1, _exp(result));
1394
1395   sc_val_from_ulong(0, _mant(result));
1396
1397   return result;
1398 }
1399
1400 char* fc_get_minusinf(unsigned int exponent_size, unsigned int mantissa_size, char *result)
1401 {
1402   if (result == NULL) result = calc_buffer;
1403
1404   fc_get_plusinf(exponent_size, mantissa_size, result);
1405   _sign(result) = 1;
1406
1407   return result;
1408 }
1409
1410 int fc_comp(const void *a, const void *b)
1411 {
1412   const char *val_a = (const char*)a;
1413   const char *val_b = (const char*)b;
1414   int mul = 1;
1415
1416   /*
1417    * shortcut: if both values are identical, they are either
1418    * Unordered if NaN or equal
1419    */
1420   if (a == b)
1421     return _desc(val_a).clss == NAN ? 2 : 0;
1422
1423   /* unordered if one is a NaN */
1424   if (_desc(val_a).clss == NAN || _desc(val_b).clss == NAN)
1425     return 2;
1426
1427   /* zero is equal independent of sign */
1428   if ((_desc(val_a).clss == ZERO) && (_desc(val_b).clss == ZERO))
1429     return 0;
1430
1431   /* different signs make compare easy */
1432   if (_sign(val_a) != _sign(val_b))
1433     return (_sign(val_a)==0)?(1):(-1);
1434
1435   mul = _sign(a) ? -1 : 1;
1436
1437   /* both infinity means equality */
1438   if ((_desc(val_a).clss == INF) && (_desc(val_b).clss == INF))
1439     return 0;
1440
1441   /* infinity is bigger than the rest */
1442   if (_desc(val_a).clss == INF)
1443     return  1 * mul;
1444   if (_desc(val_b).clss == INF)
1445     return -1 * mul;
1446
1447   /* check first exponent, that mantissa if equal */
1448   switch (sc_comp(_exp(val_a), _exp(val_b))) {
1449     case -1:
1450       return -1 * mul;
1451     case  1:
1452       return  1 * mul;
1453     case  0:
1454       return sc_comp(_mant(val_a), _mant(val_b)) * mul;
1455     default:
1456       return 2;
1457   }
1458 }
1459
1460 int fc_is_zero(const void *a)
1461 {
1462   return _desc(a).clss == ZERO;
1463 }
1464
1465 int fc_is_negative(const void *a)
1466 {
1467   return _sign(a);
1468 }
1469
1470 int fc_is_inf(const void *a)
1471 {
1472   return _desc(a).clss == INF;
1473 }
1474
1475 int fc_is_nan(const void *a)
1476 {
1477   return _desc(a).clss == NAN;
1478 }
1479
1480 int fc_is_subnormal(const void *a)
1481 {
1482   return _desc(a).clss == SUBNORMAL;
1483 }
1484
1485 char *fc_print(const void *a, char *buf, int buflen, unsigned base)
1486 {
1487   const char *val;
1488   char *mul_1;
1489
1490   val = (const char*)a;
1491
1492   mul_1 = alloca(calc_buffer_size);
1493
1494   switch (base) {
1495     case FC_DEC:
1496       switch (_desc(val).clss) {
1497         case INF:
1498           if (buflen >= 8+_sign(val)) sprintf(buf, "%sINFINITY", _sign(val)?"-":"");
1499           else snprintf(buf, buflen, "%sINF", _sign(val)?"-":NULL);
1500           break;
1501         case NAN:
1502           snprintf(buf, buflen, "NAN");
1503           break;
1504         case ZERO:
1505           snprintf(buf, buflen, "0.0");
1506           break;
1507         default:
1508           /* XXX to be implemented */
1509 #ifdef HAVE_LONG_DOUBLE
1510           /* XXX 30 is arbitrary */
1511           snprintf(buf, buflen, "%.30LE", fc_val_to_float(val));
1512 #else
1513           snprintf(buf, buflen, "%.18E", fc_val_to_float(val));
1514 #endif
1515       }
1516       break;
1517
1518     case FC_HEX:
1519       switch (_desc(val).clss) {
1520         case INF:
1521           if (buflen >= 8+_sign(val)) sprintf(buf, "%sINFINITY", _sign(val)?"-":"");
1522           else snprintf(buf, buflen, "%sINF", _sign(val)?"-":NULL);
1523           break;
1524         case NAN:
1525           snprintf(buf, buflen, "NAN");
1526           break;
1527         case ZERO:
1528           snprintf(buf, buflen, "0.0");
1529           break;
1530         default:
1531 #ifdef HAVE_LONG_DOUBLE
1532           snprintf(buf, buflen, "%LA", fc_val_to_float(val));
1533 #else
1534           snprintf(buf, buflen, "%A", fc_val_to_float(val));
1535 #endif
1536       }
1537       break;
1538
1539     case FC_PACKED:
1540     default:
1541       snprintf(buf, buflen, "%s", sc_print(_pack(val, mul_1), value_size*4, SC_HEX, 0));
1542       buf[buflen - 1] = '\0';
1543       break;
1544   }
1545   return buf;
1546 }
1547
1548 unsigned char fc_sub_bits(const void *value, unsigned num_bits, unsigned byte_ofs)
1549 {
1550   /* this is used to cache the packed version of the value */
1551   static char *pack = NULL;
1552
1553   if (pack == NULL) pack = xmalloc(value_size);
1554
1555   if (value != NULL)
1556     _pack((const char*)value, pack);
1557
1558   return sc_sub_bits(pack, num_bits, byte_ofs);
1559 }
1560
1561 fc_rounding_mode_t fc_set_rounding_mode(fc_rounding_mode_t mode)
1562 {
1563   if (mode == FC_TONEAREST || mode == FC_TOPOSITIVE || mode == FC_TONEGATIVE || mode == FC_TOZERO)
1564       rounding_mode = mode;
1565
1566   return rounding_mode;
1567 }
1568
1569 fc_rounding_mode_t fc_get_rounding_mode(void)
1570 {
1571   return rounding_mode;
1572 }
1573
1574 void init_fltcalc(int precision)
1575 {
1576   if (calc_buffer == NULL) {
1577     /* does nothing if already init */
1578     if (precision == 0) precision = FC_DEFAULT_PRECISION;
1579
1580     init_strcalc(precision + 4);
1581
1582     /* needs additionally two bits to round, a bit as explicit 1., and one for
1583      * addition overflow */
1584     max_precision = sc_get_precision() - 4;
1585     if (max_precision < precision)
1586       printf("WARING: not enough precision available, using %d\n", max_precision);
1587
1588     rounding_mode    = FC_TONEAREST;
1589     value_size       = sc_get_buffer_length();
1590     SIGN_POS         = 0;
1591     EXPONENT_POS     = SIGN_POS + sizeof(char);
1592     MANTISSA_POS     = EXPONENT_POS + value_size;
1593     DESCRIPTOR_POS   = MANTISSA_POS + value_size;
1594     calc_buffer_size = DESCRIPTOR_POS + sizeof(descriptor_t);
1595
1596     calc_buffer = xmalloc(calc_buffer_size);
1597     memset(calc_buffer, 0, calc_buffer_size);
1598     DEBUGPRINTF(("init fltcalc:\n\tVALUE_SIZE = %d\n\tSIGN_POS = %d\n\tEXPONENT_POS = %d\n\tMANTISSA_POS = %d\n\tDESCRIPTOR_POS = %d\n\tCALC_BUFFER_SIZE = %d\n\tcalc_buffer = %p\n\n", value_size, SIGN_POS, EXPONENT_POS, MANTISSA_POS, DESCRIPTOR_POS, calc_buffer_size, calc_buffer));
1599 #ifdef HAVE_LONG_DOUBLE
1600     DEBUGPRINTF(("\tUsing long double (1-15-64) interface\n"));
1601 #else
1602     DEBUGPRINTF(("\tUsing double (1-11-52) interface\n"));
1603 #endif
1604 #ifdef WORDS_BIGENDIAN
1605     DEBUGPRINTF(("\tWord order is big endian\n\n"));
1606 #else
1607     DEBUGPRINTF(("\tWord order is little endian\n\n"));
1608 #endif
1609   }
1610 }
1611
1612 void finish_fltcalc (void) {
1613   free(calc_buffer); calc_buffer = NULL;
1614 }
1615
1616 /* definition of interface functions */
1617 FC_DEFINE2(add)
1618 FC_DEFINE2(sub)
1619 FC_DEFINE2(mul)
1620 FC_DEFINE2(div)
1621 FC_DEFINE1(neg)
1622 FC_DEFINE1(int)
1623 FC_DEFINE1(rnd)