2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief tarval floating point calculations
10 * @author Mathias Heil
12 #ifndef FIRM_TV_FLTCALC_H
13 #define FIRM_TV_FLTCALC_H
16 #include "firm_types.h"
26 /** IEEE-754 Rounding modes. */
28 FC_TONEAREST, /**< if unsure, to the nearest even */
29 FC_TOPOSITIVE, /**< to +oo */
30 FC_TONEGATIVE, /**< to -oo */
31 FC_TOZERO /**< to 0 */
34 #define FC_DEFAULT_PRECISION 64
37 * possible float states
40 FC_NORMAL, /**< normal representation, implicit 1 */
42 FC_SUBNORMAL, /**< denormals, implicit 0 */
44 FC_NAN, /**< Not A Number */
48 typedef struct fp_value fp_value;
51 /** internal buffer access
52 * All functions that accept NULL as return buffer put their result into an
54 * @return fc_get_buffer() returns the pointer to the buffer, fc_get_buffer_length()
55 * returns the size of this buffer
57 const void *fc_get_buffer(void);
58 int fc_get_buffer_length(void);
61 void *fc_val_from_str(const char *str, size_t len, const float_descriptor_t *desc, void *result);
63 /** get the representation of a floating point value
64 * This function tries to builds a representation having the same value as the
65 * float number passed.
66 * If the wished precision is less than the precision of long double the value
67 * built will be rounded. Therefore only an approximation of the passed float
68 * can be expected in this case.
70 * @param l The floating point number to build a representation for
71 * @param desc The floating point descriptor
72 * @param result A buffer to hold the value built. If this is NULL, the internal
73 * accumulator buffer is used. Note that the buffer must be big
74 * enough to hold the value. Use fc_get_buffer_length() to find out
77 * @return The result pointer passed to the function. If this was NULL this returns
78 * a pointer to the internal accumulator buffer
80 fp_value *fc_val_from_ieee754(long double l, const float_descriptor_t *desc,
83 /** retrieve the float value of an internal value
84 * This function casts the internal value to long double and returns a
85 * long double with that value.
86 * This implies that values of higher precision than long double are subject to
87 * rounding, so the returned value might not the same than the actually
90 * @param val The representation of a float value
92 * @return a float value approximating the represented value
94 long double fc_val_to_ieee754(const fp_value *val);
96 /** cast a value to another precision
97 * This function changes the precision of a float representation.
98 * If the new precision is less than the original precision the returned
99 * value might not be the same as the original value.
101 * @param val The value to be casted
102 * @param desc The floating point descriptor
103 * @param result A buffer to hold the value built. If this is NULL, the internal
104 * accumulator buffer is used. Note that the buffer must be big
105 * enough to hold the value. Use fc_get_buffer_length() to find out
107 * @return The result pointer passed to the function. If this was NULL this returns
108 * a pointer to the internal accumulator buffer
110 fp_value *fc_cast(const fp_value *val, const float_descriptor_t *desc, fp_value *result);
113 /** build a special float value
114 * This function builds a representation for a special float value, as indicated by the
117 * @param desc The floating point descriptor
118 * @param result A buffer to hold the value built. If this is NULL, the internal
119 * accumulator buffer is used. Note that the buffer must be big
120 * enough to hold the value. Use fc_get_buffer_length() to find out
122 * @return The result pointer passed to the function. If this was NULL this returns
123 * a pointer to the internal accumulator buffer
125 fp_value *fc_get_min(const float_descriptor_t *desc, fp_value *result);
126 fp_value *fc_get_max(const float_descriptor_t *desc, fp_value *result);
127 fp_value *fc_get_snan(const float_descriptor_t *desc, fp_value *result);
128 fp_value *fc_get_qnan(const float_descriptor_t *desc, fp_value *result);
129 fp_value *fc_get_plusinf(const float_descriptor_t *desc, fp_value *result);
130 fp_value *fc_get_minusinf(const float_descriptor_t *desc, fp_value *result);
133 int fc_is_zero(const fp_value *a);
134 int fc_is_negative(const fp_value *a);
135 int fc_is_inf(const fp_value *a);
136 int fc_is_nan(const fp_value *a);
137 int fc_is_subnormal(const fp_value *a);
139 fp_value *fc_add(const fp_value *a, const fp_value *b, fp_value *result);
140 fp_value *fc_sub(const fp_value *a, const fp_value *b, fp_value *result);
141 fp_value *fc_mul(const fp_value *a, const fp_value *b, fp_value *result);
142 fp_value *fc_div(const fp_value *a, const fp_value *b, fp_value *result);
143 fp_value *fc_neg(const fp_value *a, fp_value *result);
144 fp_value *fc_int(const fp_value *a, fp_value *result);
145 fp_value *fc_rnd(const fp_value *a, fp_value *result);
147 char *fc_print(const fp_value *a, char *buf, int buflen, unsigned base);
149 /** Compare two values
150 * This function compares two values
152 * @param a Value No. 1
153 * @param b Value No. 2
154 * @result The relation between a and b; either less, equal, greater or
157 ir_relation fc_comp(const fp_value *a, const fp_value *b);
160 * Converts an floating point value into an integer value.
162 int fc_flt2int(const fp_value *a, void *result, ir_mode *dst_mode);
165 * Returns non-zero if the mantissa is zero, i.e. 1.0Exxx
167 int fc_zero_mantissa(const fp_value *value);
170 * Returns the exponent of a value.
172 int fc_get_exponent(const fp_value *value);
175 * Return non-zero if a given value can be converted lossless into another precision.
177 int fc_can_lossless_conv_to(const fp_value *value, const float_descriptor_t *desc);
179 /** Set new rounding mode
180 * This function sets the rounding mode to one of the following, returning
181 * the previously set rounding mode.
182 * FC_TONEAREST (default):
183 * Any unrepresentable value is rounded to the nearest representable
184 * value. If it lies in the middle the value with the least significant
185 * bit of zero is chosen (the even one).
186 * Values too big to represent will round to +/-infinity.
188 * Any unrepresentable value is rounded towards negative infinity.
189 * Positive values too big to represent will round to the biggest
190 * representable value, negative values too small to represent will
191 * round to -infinity.
193 * Any unrepresentable value is rounded towards positive infinity
194 * Negative values too small to represent will round to the biggest
195 * representable value, positive values too big to represent will
196 * round to +infinity.
198 * Any unrepresentable value is rounded towards zero, effectively
199 * chopping off any bits beyond the mantissa size.
200 * Values too big to represent will round to the biggest/smallest
201 * representable value.
203 * These modes correspond to the modes required by the IEEE-754 standard.
205 * @param mode The new rounding mode. Any value other than the four
206 * defined values will have no effect.
207 * @return The previous rounding mode.
209 * @see fc_get_rounding_mode()
210 * @see IEEE754, IEEE854 Floating Point Standard
212 fc_rounding_mode_t fc_set_rounding_mode(fc_rounding_mode_t mode);
214 /** Get the rounding mode
215 * This function retrieves the currently used rounding mode
217 * @return The current rounding mode
218 * @see fc_set_rounding_mode()
220 fc_rounding_mode_t fc_get_rounding_mode(void);
222 /** Get bit representation of a value
223 * This function allows to read a value in encoded form, byte wise.
224 * The value will be packed corresponding to the way used by the IEEE
225 * encoding formats, i.e.
227 * exp_size bits exponent + bias
228 * mant_size bits mantissa, without leading 1
230 * As in IEEE, an exponent of 0 indicates a denormalized number, which
231 * implies a most significant bit of zero instead of one; an exponent
232 * of all ones (2**exp_size - 1) encodes infinity if the mantissa is
233 * all zeros, else Not A Number.
235 * @param val A pointer to the value. If NULL is passed a copy of the
236 * most recent value passed to this function is used, saving the
237 * packing step. This behavior may be changed in the future.
238 * @param num_bit The maximum number of bits to return. Any bit beyond
239 * num_bit will be returned as zero.
240 * @param byte_ofs The byte index to read, 0 is the least significant
242 * @return 8 bits of encoded data
244 unsigned char fc_sub_bits(const fp_value *val, unsigned num_bit, unsigned byte_ofs);
247 * Returns non-zero if the result of the last operation was exact.
249 int fc_is_exact(void);
251 void init_fltcalc(int precision);
252 void finish_fltcalc(void);
254 #endif /* FIRM_TV_FLTCALC_H */