nsz Git - libfirm/blob - ir/be/ia32/ia32_intrinsics.c

   1 /*
   2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief       This file implements the mapping of 64Bit intrinsic
  23  *              functions to code or library calls.
  24  * @author      Michael Beck
  25  * @version     $Id$
  26  */
  27 #ifdef HAVE_CONFIG_H
  28 #include "config.h"
  29 #endif
  30
  31 #include "irgmod.h"
  32 #include "irop.h"
  33 #include "irnode_t.h"
  34 #include "ircons.h"
  35 #include "irprog_t.h"
  36 #include "lowering.h"
  37 #include "array.h"
  38
  39 #include "ia32_new_nodes.h"
  40 #include "bearch_ia32_t.h"
  41 #include "gen_ia32_regalloc_if.h"
  42
  43 /** The array of all intrinsics that must be mapped. */
  44 static i_record *intrinsics;
  45
  46 /** An array to cache all entities */
  47 static ir_entity *i_ents[iro_MaxOpcode];
  48
  49 /*
  50  * Maps all intrinsic calls that the backend support
  51  * and map all instructions the backend did not support
  52  * to runtime calls.
  53  */
  54 void ia32_handle_intrinsics(void) {
  55         if (intrinsics && ARR_LEN(intrinsics) > 0) {
  56                 lower_intrinsics(intrinsics, ARR_LEN(intrinsics), /*part_block_used=*/1);
  57         }
  58 }
  59
  60 #define BINOP_Left_Low   0
  61 #define BINOP_Left_High  1
  62 #define BINOP_Right_Low  2
  63 #define BINOP_Right_High 3
  64
  65 /**
  66  * Replace a call be a tuple of l_res, h_res.
  67  */
  68 static void resolve_call(ir_node *call, ir_node *l_res, ir_node *h_res, ir_graph *irg, ir_node *block) {
  69         ir_node *res, *in[2];
  70
  71         in[0] = l_res;
  72         in[1] = h_res;
  73         res = new_r_Tuple(irg, block, h_res == NULL ? 1 : 2, in);
  74
  75         turn_into_tuple(call, pn_Call_max);
  76         set_Tuple_pred(call, pn_Call_M_regular,        get_irg_no_mem(irg));
  77         /* Matze: the new_r_Jmp here sometimes CSEs and then bad things happen
  78          * (in movgen.c from 186.crafty for example) I don't know why it is here
  79          * and if this fix is correct... */
  80         /*set_Tuple_pred(call, pn_Call_X_regular,        new_r_Jmp(irg, block));*/
  81         set_Tuple_pred(call, pn_Call_X_regular,        get_irg_bad(irg));
  82         set_Tuple_pred(call, pn_Call_X_except,         get_irg_bad(irg));
  83         set_Tuple_pred(call, pn_Call_T_result,         res);
  84         set_Tuple_pred(call, pn_Call_M_except,         get_irg_no_mem(irg));
  85         set_Tuple_pred(call, pn_Call_P_value_res_base, get_irg_bad(irg));
  86 }
  87
  88 /**
  89  * Map an Add (a_l, a_h, b_l, b_h)
  90  */
  91 static int map_Add(ir_node *call, void *ctx) {
  92         ir_graph *irg        = current_ir_graph;
  93         dbg_info *dbg        = get_irn_dbg_info(call);
  94         ir_node  *block      = get_nodes_block(call);
  95         ir_node  **params    = get_Call_param_arr(call);
  96         ir_type  *method     = get_Call_type(call);
  97         ir_node  *a_l        = params[BINOP_Left_Low];
  98         ir_node  *a_h        = params[BINOP_Left_High];
  99         ir_node  *b_l        = params[BINOP_Right_Low];
 100         ir_node  *b_h        = params[BINOP_Right_High];
 101         ir_mode  *l_mode     = get_type_mode(get_method_res_type(method, 0));
 102         ir_mode  *h_mode     = get_type_mode(get_method_res_type(method, 1));
 103         ir_mode  *mode_flags = ia32_reg_classes[CLASS_ia32_flags].mode;
 104         ir_node  *add_low, *add_high, *flags;
 105         ir_node  *l_res, *h_res;
 106         (void) ctx;
 107
 108         /* l_res = a_l + b_l */
 109         /* h_res = a_h + b_h + carry */
 110
 111         add_low  = new_rd_ia32_l_Add(dbg, irg, block, a_l, b_l, mode_T);
 112         flags    = new_r_Proj(irg, block, add_low, mode_flags, pn_ia32_flags);
 113         add_high = new_rd_ia32_l_Adc(dbg, irg, block, a_h, b_h, flags, h_mode);
 114
 115         l_res = new_r_Proj(irg, block, add_low, l_mode, pn_ia32_res);
 116         h_res = add_high;
 117
 118         resolve_call(call, l_res, h_res, irg, block);
 119         return 1;
 120 }
 121
 122 /**
 123  * Map a Sub (a_l, a_h, b_l, b_h)
 124  */
 125 static int map_Sub(ir_node *call, void *ctx)
 126 {
 127         ir_graph *irg        = current_ir_graph;
 128         dbg_info *dbg        = get_irn_dbg_info(call);
 129         ir_node  *block      = get_nodes_block(call);
 130         ir_node  **params    = get_Call_param_arr(call);
 131         ir_type  *method     = get_Call_type(call);
 132         ir_node  *a_l        = params[BINOP_Left_Low];
 133         ir_node  *a_h        = params[BINOP_Left_High];
 134         ir_node  *b_l        = params[BINOP_Right_Low];
 135         ir_node  *b_h        = params[BINOP_Right_High];
 136         ir_mode  *l_mode     = get_type_mode(get_method_res_type(method, 0));
 137         ir_mode  *h_mode     = get_type_mode(get_method_res_type(method, 1));
 138         ir_mode  *mode_flags = ia32_reg_classes[CLASS_ia32_flags].mode;
 139         ir_node  *sub_low, *sub_high, *flags;
 140         ir_node  *l_res, *h_res;
 141         (void) ctx;
 142
 143         /* l_res = a_l - b_l */
 144         /* h_res = a_h - b_h - carry */
 145
 146         sub_low  = new_rd_ia32_l_Sub(dbg, irg, block, a_l, b_l, mode_T);
 147         flags    = new_r_Proj(irg, block, sub_low, mode_flags, pn_ia32_flags);
 148         sub_high = new_rd_ia32_l_Sbb(dbg, irg, block, a_h, b_h, flags, h_mode);
 149
 150         l_res = new_r_Proj(irg, block, sub_low, l_mode, pn_ia32_res);
 151         h_res = sub_high;
 152
 153         resolve_call(call, l_res, h_res, irg, block);
 154         return 1;
 155 }
 156
 157 /**
 158  * Map a Shl (a_l, a_h, count)
 159  */
 160 static int map_Shl(ir_node *call, void *ctx) {
 161         ir_graph *irg     = current_ir_graph;
 162         dbg_info *dbg     = get_irn_dbg_info(call);
 163         ir_node  *block   = get_nodes_block(call);
 164         ir_node  **params = get_Call_param_arr(call);
 165         ir_type  *method  = get_Call_type(call);
 166         ir_node  *a_l     = params[BINOP_Left_Low];
 167         ir_node  *a_h     = params[BINOP_Left_High];
 168         ir_node  *cnt     = params[BINOP_Right_Low];
 169         ir_mode  *l_mode  = get_type_mode(get_method_res_type(method, 0));
 170         ir_mode  *h_mode  = get_type_mode(get_method_res_type(method, 1));
 171         ir_mode  *c_mode;
 172         ir_node  *l_res, *h_res, *irn, *cond, *upper, *n_block, *l1, *l2, *h1, *h2, *in[2];
 173         (void) ctx;
 174
 175         if (is_Const(cnt)) {
 176                 /* the shift count is a const, create better code */
 177                 tarval *tv = get_Const_tarval(cnt);
 178
 179                 if (tarval_cmp(tv, new_tarval_from_long(32, l_mode)) & (pn_Cmp_Gt|pn_Cmp_Eq)) {
 180                         /* simplest case: shift only the lower bits. Note that there is no
 181                            need to reduce the constant here, this is done by the hardware.  */
 182                         ir_node *conv = new_rd_Conv(dbg, irg, block, a_l, h_mode);
 183                         h_res = new_rd_Shl(dbg, irg, block, conv, cnt, h_mode);
 184                         l_res = new_rd_Const(dbg, irg, block, l_mode, get_mode_null(l_mode));
 185
 186                 } else {
 187                         /* h_res = SHLD a_h, a_l, cnt */
 188                         h_res = new_rd_ia32_l_ShlD(dbg, irg, block, a_h, a_l, cnt, h_mode);
 189
 190                         /* l_res = SHL a_l, cnt */
 191                         l_res = new_rd_ia32_l_ShlDep(dbg, irg, block, a_l, cnt, h_res, l_mode);
 192                 }
 193
 194                 resolve_call(call, l_res, h_res, irg, block);
 195                 return 1;
 196         }
 197
 198         part_block(call);
 199         upper = get_nodes_block(call);
 200
 201         /* h_res = SHLD a_h, a_l, cnt */
 202         h1 = new_rd_ia32_l_ShlD(dbg, irg, upper, a_h, a_l, cnt, h_mode);
 203
 204         /* l_res = SHL a_l, cnt */
 205         l1 = new_rd_ia32_l_ShlDep(dbg, irg, upper, a_l, cnt, h1, l_mode);
 206
 207         c_mode = get_irn_mode(cnt);
 208         irn    = new_r_Const_long(irg, upper, c_mode, 32);
 209         irn    = new_rd_And(dbg, irg, upper, cnt, irn, c_mode);
 210         irn    = new_rd_Cmp(dbg, irg, upper, irn, new_r_Const(irg, upper, c_mode, get_mode_null(c_mode)));
 211         irn    = new_r_Proj(irg, upper, irn, mode_b, pn_Cmp_Eq);
 212         cond   = new_rd_Cond(dbg, irg, upper, irn);
 213
 214         in[0]  = new_r_Proj(irg, upper, cond, mode_X, pn_Cond_true);
 215         in[1]  = new_r_Proj(irg, upper, cond, mode_X, pn_Cond_false);
 216
 217         /* the block for cnt >= 32 */
 218         n_block = new_rd_Block(dbg, irg, 1, &in[1]);
 219         h2      = new_rd_Conv(dbg, irg, n_block, l1, h_mode);
 220         l2      = new_r_Const(irg, n_block, l_mode, get_mode_null(l_mode));
 221         in[1]   = new_r_Jmp(irg, n_block);
 222
 223         set_irn_in(block, 2, in);
 224
 225         in[0] = l1;
 226         in[1] = l2;
 227         l_res = new_r_Phi(irg, block, 2, in, l_mode);
 228         set_irn_link(block, l_res);
 229
 230         in[0] = h1;
 231         in[1] = h2;
 232         h_res = new_r_Phi(irg, block, 2, in, h_mode);
 233         set_irn_link(l_res, h_res);
 234         set_irn_link(h_res, NULL);
 235
 236         /* move it down */
 237         set_nodes_block(call, block);
 238         for (irn = get_irn_link(call); irn != NULL; irn = get_irn_link(irn))
 239                 set_nodes_block(irn, block);
 240
 241         resolve_call(call, l_res, h_res, irg, block);
 242         return 1;
 243 }
 244
 245 /**
 246  * Map a Shr (a_l, a_h, count)
 247  */
 248 static int map_Shr(ir_node *call, void *ctx) {
 249         ir_graph *irg     = current_ir_graph;
 250         dbg_info *dbg     = get_irn_dbg_info(call);
 251         ir_node  *block   = get_nodes_block(call);
 252         ir_node  **params = get_Call_param_arr(call);
 253         ir_type  *method  = get_Call_type(call);
 254         ir_node  *a_l     = params[BINOP_Left_Low];
 255         ir_node  *a_h     = params[BINOP_Left_High];
 256         ir_node  *cnt     = params[BINOP_Right_Low];
 257         ir_mode  *l_mode  = get_type_mode(get_method_res_type(method, 0));
 258         ir_mode  *h_mode  = get_type_mode(get_method_res_type(method, 1));
 259         ir_mode  *c_mode;
 260         ir_node  *l_res, *h_res, *irn, *cond, *upper, *n_block, *l1, *l2, *h1, *h2, *in[2];
 261         (void) ctx;
 262
 263         if (is_Const(cnt)) {
 264                 /* the shift count is a const, create better code */
 265                 tarval *tv = get_Const_tarval(cnt);
 266
 267                 if (tarval_cmp(tv, new_tarval_from_long(32, l_mode)) & (pn_Cmp_Gt|pn_Cmp_Eq)) {
 268                         /* simplest case: shift only the higher bits. Note that there is no
 269                            need to reduce the constant here, this is done by the hardware.  */
 270                         ir_node *conv = new_rd_Conv(dbg, irg, block, a_h, l_mode);
 271                         h_res = new_rd_Const(dbg, irg, block, h_mode, get_mode_null(h_mode));
 272                         l_res = new_rd_Shr(dbg, irg, block, conv, cnt, l_mode);
 273                 } else {
 274                         /* l_res = SHRD a_h:a_l, cnt */
 275                         l_res = new_rd_ia32_l_ShrD(dbg, irg, block, a_l, a_h, cnt, l_mode);
 276
 277                         /* h_res = SHR a_h, cnt */
 278                         h_res = new_rd_ia32_l_ShrDep(dbg, irg, block, a_h, cnt, l_res, h_mode);
 279                 }
 280                 resolve_call(call, l_res, h_res, irg, block);
 281                 return 1;
 282         }
 283
 284         part_block(call);
 285         upper = get_nodes_block(call);
 286
 287         /* l_res = SHRD a_h:a_l, cnt */
 288         l1 = new_rd_ia32_l_ShrD(dbg, irg, upper, a_l, a_h, cnt, l_mode);
 289
 290         /* h_res = SHR a_h, cnt */
 291         h1 = new_rd_ia32_l_ShrDep(dbg, irg, upper, a_h, cnt, l1, h_mode);
 292
 293         c_mode = get_irn_mode(cnt);
 294         irn    = new_r_Const_long(irg, upper, c_mode, 32);
 295         irn    = new_rd_And(dbg, irg, upper, cnt, irn, c_mode);
 296         irn    = new_rd_Cmp(dbg, irg, upper, irn, new_r_Const(irg, upper, c_mode, get_mode_null(c_mode)));
 297         irn    = new_r_Proj(irg, upper, irn, mode_b, pn_Cmp_Eq);
 298         cond   = new_rd_Cond(dbg, irg, upper, irn);
 299
 300         in[0]  = new_r_Proj(irg, upper, cond, mode_X, pn_Cond_true);
 301         in[1]  = new_r_Proj(irg, upper, cond, mode_X, pn_Cond_false);
 302
 303         /* the block for cnt >= 32 */
 304         n_block = new_rd_Block(dbg, irg, 1, &in[1]);
 305         l2      = new_rd_Conv(dbg, irg, n_block, h1, l_mode);
 306         h2      = new_r_Const(irg, n_block, l_mode, get_mode_null(h_mode));
 307         in[1]   = new_r_Jmp(irg, n_block);
 308
 309         set_irn_in(block, 2, in);
 310
 311         in[0] = l1;
 312         in[1] = l2;
 313         l_res = new_r_Phi(irg, block, 2, in, l_mode);
 314         set_irn_link(block, l_res);
 315
 316         in[0] = h1;
 317         in[1] = h2;
 318         h_res = new_r_Phi(irg, block, 2, in, h_mode);
 319         set_irn_link(l_res, h_res);
 320         set_irn_link(h_res, NULL);
 321
 322         /* move it down */
 323         set_nodes_block(call, block);
 324         for (irn = get_irn_link(call); irn != NULL; irn = get_irn_link(irn))
 325                 set_nodes_block(irn, block);
 326
 327         resolve_call(call, l_res, h_res, irg, block);
 328         return 1;
 329 }
 330
 331 /**
 332  * Map a Shrs (a_l, a_h, count)
 333  */
 334 static int map_Shrs(ir_node *call, void *ctx) {
 335         ir_graph *irg     = current_ir_graph;
 336         dbg_info *dbg     = get_irn_dbg_info(call);
 337         ir_node  *block   = get_nodes_block(call);
 338         ir_node  **params = get_Call_param_arr(call);
 339         ir_type  *method  = get_Call_type(call);
 340         ir_node  *a_l     = params[BINOP_Left_Low];
 341         ir_node  *a_h     = params[BINOP_Left_High];
 342         ir_node  *cnt     = params[BINOP_Right_Low];
 343         ir_mode  *l_mode  = get_type_mode(get_method_res_type(method, 0));
 344         ir_mode  *h_mode  = get_type_mode(get_method_res_type(method, 1));
 345         ir_mode  *c_mode;
 346         ir_node  *l_res, *h_res, *irn, *cond, *upper, *n_block, *l1, *l2, *h1, *h2, *in[2];
 347         (void) ctx;
 348
 349         if (is_Const(cnt)) {
 350                 /* the shift count is a const, create better code */
 351                 tarval *tv = get_Const_tarval(cnt);
 352
 353                 if (tarval_cmp(tv, new_tarval_from_long(32, l_mode)) & (pn_Cmp_Gt|pn_Cmp_Eq)) {
 354                         /* simplest case: shift only the higher bits. Note that there is no
 355                            need to reduce the constant here, this is done by the hardware.  */
 356                         ir_node *conv    = new_rd_Conv(dbg, irg, block, a_h, l_mode);
 357                         ir_mode *c_mode  = get_irn_mode(cnt);
 358
 359                         h_res = new_rd_Shrs(dbg, irg, block, a_h, new_r_Const_long(irg, block, c_mode, 31), h_mode);
 360                         l_res = new_rd_Shrs(dbg, irg, block, conv, cnt, l_mode);
 361                 } else {
 362                         /* l_res = SHRD a_h:a_l, cnt */
 363                         l_res = new_rd_ia32_l_ShrD(dbg, irg, block, a_l, a_h, cnt, l_mode);
 364
 365                         /* h_res = SAR a_h, cnt */
 366                         h_res = new_rd_ia32_l_SarDep(dbg, irg, block, a_h, cnt, l_res, h_mode);
 367                 }
 368                 resolve_call(call, l_res, h_res, irg, block);
 369                 return 1;
 370         }
 371
 372         part_block(call);
 373         upper = get_nodes_block(call);
 374
 375         /* l_res = SHRD a_h:a_l, cnt */
 376         l1 = new_rd_ia32_l_ShrD(dbg, irg, upper, a_l, a_h, cnt, l_mode);
 377
 378         /* h_res = SAR a_h, cnt */
 379         h1 = new_rd_ia32_l_SarDep(dbg, irg, upper, a_h, cnt, l1, h_mode);
 380
 381         c_mode = get_irn_mode(cnt);
 382         irn    = new_r_Const_long(irg, upper, c_mode, 32);
 383         irn    = new_rd_And(dbg, irg, upper, cnt, irn, c_mode);
 384         irn    = new_rd_Cmp(dbg, irg, upper, irn, new_r_Const(irg, upper, c_mode, get_mode_null(c_mode)));
 385         irn    = new_r_Proj(irg, upper, irn, mode_b, pn_Cmp_Eq);
 386         cond   = new_rd_Cond(dbg, irg, upper, irn);
 387
 388         in[0]  = new_r_Proj(irg, upper, cond, mode_X, pn_Cond_true);
 389         in[1]  = new_r_Proj(irg, upper, cond, mode_X, pn_Cond_false);
 390
 391         /* the block for cnt >= 32 */
 392         n_block = new_rd_Block(dbg, irg, 1, &in[1]);
 393         l2      = new_rd_Conv(dbg, irg, n_block, h1, l_mode);
 394         h2      = new_rd_Shrs(dbg, irg, n_block, a_h, new_r_Const_long(irg, block, c_mode, 31), h_mode);
 395         in[1]   = new_r_Jmp(irg, n_block);
 396
 397         set_irn_in(block, 2, in);
 398
 399         in[0] = l1;
 400         in[1] = l2;
 401         l_res = new_r_Phi(irg, block, 2, in, l_mode);
 402         set_irn_link(block, l_res);
 403
 404         in[0] = h1;
 405         in[1] = h2;
 406         h_res = new_r_Phi(irg, block, 2, in, h_mode);
 407         set_irn_link(l_res, h_res);
 408         set_irn_link(h_res, NULL);
 409
 410         /* move it down */
 411         set_nodes_block(call, block);
 412         for (irn = get_irn_link(call); irn != NULL; irn = get_irn_link(irn))
 413                 set_nodes_block(irn, block);
 414
 415         resolve_call(call, l_res, h_res, irg, block);
 416         return 1;
 417 }
 418
 419 /**
 420  * Map a Mul (a_l, a_h, b_l, b_h)
 421  */
 422 static int map_Mul(ir_node *call, void *ctx) {
 423         ir_graph *irg     = current_ir_graph;
 424         dbg_info *dbg     = get_irn_dbg_info(call);
 425         ir_node  *block   = get_nodes_block(call);
 426         ir_node  **params = get_Call_param_arr(call);
 427         ir_type  *method  = get_Call_type(call);
 428         ir_node  *a_l     = params[BINOP_Left_Low];
 429         ir_node  *a_h     = params[BINOP_Left_High];
 430         ir_node  *b_l     = params[BINOP_Right_Low];
 431         ir_node  *b_h     = params[BINOP_Right_High];
 432         ir_mode  *l_mode  = get_type_mode(get_method_res_type(method, 0));
 433         ir_mode  *h_mode  = get_type_mode(get_method_res_type(method, 1));
 434         ir_node  *l_res, *h_res, *mul, *pEDX, *add;
 435         (void) ctx;
 436
 437         /*
 438                 EDX:EAX = a_l * b_l
 439                 l_res   = EAX
 440
 441                 t1 = b_l * a_h
 442                 t2 = t1 + EDX
 443                 t3 = a_l * b_h
 444                 h_res = t2 + t3
 445         */
 446
 447         /* handle the often used case of 32x32=64 mul */
 448         if (is_Shrs(a_h) && get_Shrs_left(a_h) == a_l) {
 449                 ir_node *c1 = get_Shrs_right(a_h);
 450
 451                 if (is_Const(c1)) {
 452                         tarval *tv = get_Const_tarval(c1);
 453
 454                         if (tarval_is_long(tv) && get_tarval_long(tv) == 31) {
 455                                 /* a is a sign extend */
 456
 457                                 if (is_Shrs(b_h) && get_Shrs_left(b_h) == b_l && c1 == get_Shrs_right(b_h)) {
 458                                         /* b is a sign extend: it's a 32 * 32 = 64 signed multiplication */
 459                                         mul   = new_rd_ia32_l_IMul(dbg, irg, block, a_l, b_l);
 460                                         h_res = new_rd_Proj(dbg, irg, block, mul, h_mode, pn_ia32_l_Mul_EDX);
 461                                         l_res = new_rd_Proj(dbg, irg, block, mul, l_mode, pn_ia32_l_Mul_EAX);
 462
 463                                         goto end;
 464                                 }
 465                                 /* we rely here on Consts being on the right side */
 466                                 if (is_Const(b_h) && is_Const(b_l)) {
 467                                         tarval *th = get_Const_tarval(b_h);
 468                                         tarval *tl = get_Const_tarval(b_l);
 469
 470                                         if (tarval_is_long(th) && tarval_is_long(tl)) {
 471                                                 long h = get_tarval_long(th);
 472                                                 long l = get_tarval_long(tl);
 473
 474                                                 if ((h == 0 && l >= 0) || (h == -1 && l < 0)) {
 475                                                         /* b is a sign extended const */
 476                                                         mul   = new_rd_ia32_l_IMul(dbg, irg, block, a_l, b_l);
 477                                                         h_res = new_rd_Proj(dbg, irg, block, mul, h_mode, pn_ia32_l_Mul_EDX);
 478                                                         l_res = new_rd_Proj(dbg, irg, block, mul, l_mode, pn_ia32_l_Mul_EAX);
 479
 480                                                         goto end;
 481                                                 }
 482                                         }
 483                                 }
 484                         }
 485                 }
 486         }
 487
 488         mul   = new_rd_ia32_l_Mul(dbg, irg, block, a_l, b_l);
 489         pEDX  = new_rd_Proj(dbg, irg, block, mul, h_mode, pn_ia32_l_Mul_EDX);
 490         l_res = new_rd_Proj(dbg, irg, block, mul, l_mode, pn_ia32_l_Mul_EAX);
 491
 492         b_l   = new_rd_Conv(dbg, irg, block, b_l, h_mode);
 493         mul   = new_rd_Mul( dbg, irg, block, a_h, b_l, h_mode);
 494         add   = new_rd_Add( dbg, irg, block, mul, pEDX, h_mode);
 495         a_l   = new_rd_Conv(dbg, irg, block, a_l, h_mode);
 496         mul   = new_rd_Mul( dbg, irg, block, a_l, b_h, h_mode);
 497         h_res = new_rd_Add( dbg, irg, block, add, mul, h_mode);
 498
 499 end:
 500         resolve_call(call, l_res, h_res, irg, block);
 501
 502         return 1;
 503 }
 504
 505 /**
 506  * Map a Minus (a_l, a_h)
 507  */
 508 static int map_Minus(ir_node *call, void *ctx) {
 509         ir_graph *irg     = current_ir_graph;
 510         dbg_info *dbg     = get_irn_dbg_info(call);
 511         ir_node  *block   = get_nodes_block(call);
 512         ir_node  **params = get_Call_param_arr(call);
 513         ir_type  *method  = get_Call_type(call);
 514         ir_node  *a_l     = params[BINOP_Left_Low];
 515         ir_node  *a_h     = params[BINOP_Left_High];
 516         ir_mode  *l_mode  = get_type_mode(get_method_res_type(method, 0));
 517         ir_mode  *h_mode  = get_type_mode(get_method_res_type(method, 1));
 518         ir_node  *l_res, *h_res, *res;
 519         (void) ctx;
 520
 521         res   = new_rd_ia32_Minus64Bit(dbg, irg, block, a_l, a_h);
 522         l_res = new_r_Proj(irg, block, res, l_mode, pn_ia32_Minus64Bit_low_res);
 523         h_res = new_r_Proj(irg, block, res, h_mode, pn_ia32_Minus64Bit_high_res);
 524
 525         resolve_call(call, l_res, h_res, irg, block);
 526
 527         return 1;
 528 }
 529
 530 /**
 531  * Map a Abs (a_l, a_h)
 532  */
 533 static int map_Abs(ir_node *call, void *ctx) {
 534         ir_graph *irg        = current_ir_graph;
 535         dbg_info *dbg        = get_irn_dbg_info(call);
 536         ir_node  *block      = get_nodes_block(call);
 537         ir_node  **params    = get_Call_param_arr(call);
 538         ir_type  *method     = get_Call_type(call);
 539         ir_node  *a_l        = params[BINOP_Left_Low];
 540         ir_node  *a_h        = params[BINOP_Left_High];
 541         ir_mode  *l_mode     = get_type_mode(get_method_res_type(method, 0));
 542         ir_mode  *h_mode     = get_type_mode(get_method_res_type(method, 1));
 543         ir_mode  *mode_flags = ia32_reg_classes[CLASS_ia32_flags].mode;
 544         ir_node  *l_res, *h_res, *sign, *sub_l, *sub_h;
 545         ir_node  *sign_l;
 546         ir_node  *l_sub;
 547         ir_node  *flags;
 548         (void) ctx;
 549
 550         /*
 551                 Code inspired by gcc output :) (although gcc doubles the
 552                 operation for t1 as t2 and uses t1 for operations with low part
 553                 and t2 for operations with high part which is actually unnecessary
 554                 because t1 and t2 represent the same value)
 555
 556                 t1    = SHRS a_h, 31
 557                 t2    = a_l ^ t1
 558                 t3    = a_h ^ t1
 559                 l_res = t2 - t1
 560                 h_res = t3 - t1 - carry
 561
 562         */
 563
 564         /* TODO: give a hint to the backend somehow to not create a cltd here... */
 565         sign   = new_rd_Shrs(dbg, irg, block, a_h, new_Const_long(l_mode, 31), h_mode);
 566         sign_l = new_rd_Conv(dbg, irg, block, sign, l_mode);
 567         sub_l  = new_rd_Eor(dbg, irg, block, a_l, sign_l, l_mode);
 568         sub_h  = new_rd_Eor(dbg, irg, block, a_h, sign,   h_mode);
 569
 570         l_sub  = new_rd_ia32_l_Sub(dbg, irg, block, sub_l, sign_l, mode_T);
 571         l_res  = new_r_Proj(irg, block, l_sub, l_mode,     pn_ia32_res);
 572         flags  = new_r_Proj(irg, block, l_sub, mode_flags, pn_ia32_flags);
 573         h_res  = new_rd_ia32_l_Sbb(dbg, irg, block, sub_h, sign, flags, h_mode);
 574
 575         resolve_call(call, l_res, h_res, irg, block);
 576
 577         return 1;
 578 }
 579
 580 #define ID(x) new_id_from_chars(x, sizeof(x)-1)
 581
 582 /**
 583  * Maps a Div. Change into a library call
 584  */
 585 static int map_Div(ir_node *call, void *ctx) {
 586         ia32_intrinsic_env_t *env = ctx;
 587         ir_type   *method    = get_Call_type(call);
 588         ir_mode   *h_mode    = get_type_mode(get_method_res_type(method, 1));
 589         ir_node   *ptr;
 590         ir_entity *ent;
 591         symconst_symbol sym;
 592
 593         if (mode_is_signed(h_mode)) {
 594                 /* 64bit signed Division */
 595                 ent = env->divdi3;
 596                 if (ent == NULL) {
 597                         /* create library entity */
 598                         ent = env->divdi3 = new_entity(get_glob_type(), ID("__divdi3"), method);
 599                         set_entity_visibility(ent, visibility_external_allocated);
 600                         set_entity_ld_ident(ent, ID("__divdi3"));
 601                 }
 602         } else {
 603                 /* 64bit unsigned Division */
 604                 ent = env->udivdi3;
 605                 if (ent == NULL) {
 606                         /* create library entity */
 607                         ent = env->udivdi3 = new_entity(get_glob_type(), ID("__udivdi3"), method);
 608                         set_entity_visibility(ent, visibility_external_allocated);
 609                         set_entity_ld_ident(ent, ID("__udivdi3"));
 610                 }
 611         }
 612         sym.entity_p = ent;
 613         ptr = get_Call_ptr(call);
 614         set_SymConst_symbol(ptr, sym);
 615         return 1;
 616 }
 617
 618 /**
 619  * Maps a Mod. Change into a library call
 620  */
 621 static int map_Mod(ir_node *call, void *ctx) {
 622         ia32_intrinsic_env_t *env = ctx;
 623         ir_type   *method    = get_Call_type(call);
 624         ir_mode   *h_mode    = get_type_mode(get_method_res_type(method, 0));
 625         ir_node   *ptr;
 626         ir_entity *ent;
 627         symconst_symbol sym;
 628
 629         if (mode_is_signed(h_mode)) {
 630                 /* 64bit signed Modulo */
 631                 ent = env->moddi3;
 632                 if (ent == NULL) {
 633                         /* create library entity */
 634                         ent = env->moddi3 = new_entity(get_glob_type(), ID("__moddi3"), method);
 635                         set_entity_visibility(ent, visibility_external_allocated);
 636                         set_entity_ld_ident(ent, ID("__moddi3"));
 637                 }
 638         } else {
 639                 /* 64bit signed Modulo */
 640                 ent = env->umoddi3;
 641                 if (ent == NULL) {
 642                         /* create library entity */
 643                         ent = env->umoddi3 = new_entity(get_glob_type(), ID("__umoddi3"), method);
 644                         set_entity_visibility(ent, visibility_external_allocated);
 645                         set_entity_ld_ident(ent, ID("__umoddi3"));
 646                 }
 647         }
 648         sym.entity_p = ent;
 649         ptr = get_Call_ptr(call);
 650         set_SymConst_symbol(ptr, sym);
 651         return 1;
 652 }
 653
 654 /**
 655  * Maps a Conv.
 656  */
 657 static int map_Conv(ir_node *call, void *ctx) {
 658         ia32_intrinsic_env_t *env = ctx;
 659         ir_graph  *irg        = current_ir_graph;
 660         dbg_info  *dbg        = get_irn_dbg_info(call);
 661         ir_node   *block      = get_nodes_block(call);
 662         ir_node   **params    = get_Call_param_arr(call);
 663         ir_type   *method     = get_Call_type(call);
 664         int       n           = get_Call_n_params(call);
 665         int       gp_bytes    = get_mode_size_bytes(ia32_reg_classes[CLASS_ia32_gp].mode);
 666         ir_entity *ent;
 667         ir_node   *l_res, *h_res, *frame, *fres;
 668         ir_node   *store_l, *store_h;
 669         ir_node   *op_mem[2], *mem;
 670
 671         if (n == 1) {
 672                 /* We have a Conv float -> long long here */
 673                 ir_node *a_f        = params[0];
 674                 ir_mode *l_res_mode = get_type_mode(get_method_res_type(method, 0));
 675                 ir_mode *h_res_mode = get_type_mode(get_method_res_type(method, 1));
 676
 677                 assert(mode_is_float(get_irn_mode(a_f)) && "unexpected Conv call");
 678
 679                 /* allocate memory on frame to store args */
 680                 ent = env->irg == irg ? env->d_ll_conv : NULL;
 681                 if (! ent) {
 682                         ent      = env->d_ll_conv = frame_alloc_area(get_irg_frame_type(irg), 2 * gp_bytes, 16, 0);
 683                         env->irg = irg;
 684                 }
 685
 686                 /* Store arg */
 687                 frame = get_irg_frame(irg);
 688
 689                 /*
 690                         Now we create a node to move the value from a XMM register into
 691                         x87 FPU because it is unknown here, which FPU is used.
 692                         This node is killed in transformation phase when not needed.
 693                         Otherwise it is split up into a movsd + fld
 694                 */
 695                 a_f = new_rd_ia32_l_SSEtoX87(dbg, irg, block, frame, a_f, get_irg_no_mem(irg), mode_D);
 696                 set_ia32_frame_ent(a_f, ent);
 697                 set_ia32_use_frame(a_f);
 698                 set_ia32_ls_mode(a_f, mode_D);
 699
 700                 /* store from FPU as Int */
 701                 a_f = new_rd_ia32_l_vfist(dbg, irg, block, frame, a_f, get_irg_no_mem(irg));
 702                 set_ia32_frame_ent(a_f, ent);
 703                 set_ia32_use_frame(a_f);
 704                 set_ia32_ls_mode(a_f, mode_Ls);
 705                 mem = a_f;
 706
 707                 /* load low part of the result */
 708                 l_res = new_rd_ia32_l_Load(dbg, irg, block, frame, mem);
 709                 set_ia32_frame_ent(l_res, ent);
 710                 set_ia32_use_frame(l_res);
 711                 set_ia32_ls_mode(l_res, l_res_mode);
 712                 l_res = new_r_Proj(irg, block, l_res, l_res_mode, pn_ia32_l_Load_res);
 713
 714                 /* load hight part of the result */
 715                 h_res = new_rd_ia32_l_Load(dbg, irg, block, frame, mem);
 716                 set_ia32_frame_ent(h_res, ent);
 717                 add_ia32_am_offs_int(h_res, gp_bytes);
 718                 set_ia32_use_frame(h_res);
 719                 set_ia32_ls_mode(h_res, h_res_mode);
 720                 h_res = new_r_Proj(irg, block, h_res, h_res_mode, pn_ia32_l_Load_res);
 721
 722                 /* lower the call */
 723                 resolve_call(call, l_res, h_res, irg, block);
 724         }
 725         else if (n == 2) {
 726                 /* We have a Conv long long -> float here */
 727                 ir_node *a_l       = params[BINOP_Left_Low];
 728                 ir_node *a_h       = params[BINOP_Left_High];
 729                 ir_mode *mode_a_l  = get_irn_mode(a_l);
 730                 ir_mode *mode_a_h  = get_irn_mode(a_h);
 731                 ir_mode *fres_mode = get_type_mode(get_method_res_type(method, 0));
 732
 733                 assert(! mode_is_float(mode_a_l) && ! mode_is_float(mode_a_h) && "unexpected Conv call");
 734
 735                 /* allocate memory on frame to store args */
 736                 ent = env->irg == irg ? env->ll_d_conv : NULL;
 737                 if (! ent) {
 738                         ent = env->ll_d_conv = frame_alloc_area(get_irg_frame_type(irg), 2 * gp_bytes, 16, 0);
 739                         env->irg = irg;
 740                 }
 741
 742                 /* Store arg */
 743                 frame = get_irg_frame(irg);
 744
 745                 /* store first arg (low part) */
 746                 store_l   = new_rd_ia32_l_Store(dbg, irg, block, frame, a_l, get_irg_no_mem(irg));
 747                 set_ia32_frame_ent(store_l, ent);
 748                 set_ia32_use_frame(store_l);
 749                 set_ia32_ls_mode(store_l, get_irn_mode(a_l));
 750                 op_mem[0] = store_l;
 751
 752                 /* store second arg (high part) */
 753                 store_h   = new_rd_ia32_l_Store(dbg, irg, block, frame, a_h, get_irg_no_mem(irg));
 754                 set_ia32_frame_ent(store_h, ent);
 755                 add_ia32_am_offs_int(store_h, gp_bytes);
 756                 set_ia32_use_frame(store_h);
 757                 set_ia32_ls_mode(store_h, get_irn_mode(a_h));
 758                 op_mem[1] = store_h;
 759
 760                 mem = new_r_Sync(irg, block, 2, op_mem);
 761
 762                 /* Load arg into x87 FPU (implicit convert) */
 763                 fres = new_rd_ia32_l_vfild(dbg, irg, block, frame, mem);
 764                 set_ia32_frame_ent(fres, ent);
 765                 set_ia32_use_frame(fres);
 766                 set_ia32_ls_mode(fres, mode_D);
 767                 mem  = new_r_Proj(irg, block, fres, mode_M, pn_ia32_l_vfild_M);
 768                 fres = new_r_Proj(irg, block, fres, fres_mode, pn_ia32_l_vfild_res);
 769
 770                 /*
 771                         Now we create a node to move the loaded value into a XMM
 772                         register because it is unknown here, which FPU is used.
 773                         This node is killed in transformation phase when not needed.
 774                         Otherwise it is split up into a fst + movsd
 775                 */
 776                 fres = new_rd_ia32_l_X87toSSE(dbg, irg, block, frame, fres, mem, fres_mode);
 777                 set_ia32_frame_ent(fres, ent);
 778                 set_ia32_use_frame(fres);
 779                 set_ia32_ls_mode(fres, fres_mode);
 780
 781                 /* lower the call */
 782                 resolve_call(call, fres, NULL, irg, block);
 783         }
 784         else {
 785                 assert(0 && "unexpected Conv call");
 786         }
 787
 788         return 1;
 789 }
 790
 791 /* Ia32 implementation of intrinsic mapping. */
 792 ir_entity *ia32_create_intrinsic_fkt(ir_type *method, const ir_op *op,
 793                                      const ir_mode *imode, const ir_mode *omode,
 794                                      void *context)
 795 {
 796         i_record      elt;
 797         ir_entity     **ent = NULL;
 798         i_mapper_func mapper;
 799
 800         if (! intrinsics)
 801                 intrinsics = NEW_ARR_F(i_record, 0);
 802
 803         switch (get_op_code(op)) {
 804         case iro_Add:
 805                 ent    = &i_ents[iro_Add];
 806                 mapper = map_Add;
 807                 break;
 808         case iro_Sub:
 809                 ent    = &i_ents[iro_Sub];
 810                 mapper = map_Sub;
 811                 break;
 812         case iro_Shl:
 813                 ent    = &i_ents[iro_Shl];
 814                 mapper = map_Shl;
 815                 break;
 816         case iro_Shr:
 817                 ent    = &i_ents[iro_Shr];
 818                 mapper = map_Shr;
 819                 break;
 820         case iro_Shrs:
 821                 ent    = &i_ents[iro_Shrs];
 822                 mapper = map_Shrs;
 823                 break;
 824         case iro_Mul:
 825                 ent    = &i_ents[iro_Mul];
 826                 mapper = map_Mul;
 827                 break;
 828         case iro_Minus:
 829                 ent    = &i_ents[iro_Minus];
 830                 mapper = map_Minus;
 831                 break;
 832         case iro_Abs:
 833                 ent    = &i_ents[iro_Abs];
 834                 mapper = map_Abs;
 835                 break;
 836         case iro_Div:
 837                 ent    = &i_ents[iro_Div];
 838                 mapper = map_Div;
 839                 break;
 840         case iro_Mod:
 841                 ent    = &i_ents[iro_Mod];
 842                 mapper = map_Mod;
 843                 break;
 844         case iro_Conv:
 845                 ent    = &i_ents[iro_Conv];
 846                 mapper = map_Conv;
 847                 break;
 848         default:
 849                 fprintf(stderr, "FIXME: unhandled op for ia32 intrinsic function %s\n", get_id_str(op->name));
 850                 return def_create_intrinsic_fkt(method, op, imode, omode, context);
 851         }
 852
 853         if (ent && ! *ent) {
 854 #define IDENT(s)  new_id_from_chars(s, sizeof(s)-1)
 855
 856                 ident *id = mangle(IDENT("L"), get_op_ident(op));
 857                 *ent = new_entity(get_glob_type(), id, method);
 858         }
 859
 860         elt.i_call.kind     = INTRINSIC_CALL;
 861         elt.i_call.i_ent    = *ent;
 862         elt.i_call.i_mapper = mapper;
 863         elt.i_call.ctx      = context;
 864         elt.i_call.link     = NULL;
 865
 866         ARR_APP1(i_record, intrinsics, elt);
 867         return *ent;
 868 }