2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
35 #include "irgraph_t.h"
40 #include "iredges_t.h"
52 #include "../benode_t.h"
53 #include "../besched.h"
55 #include "../beutil.h"
56 #include "../beirg_t.h"
57 #include "../betranshlp.h"
60 #include "bearch_ia32_t.h"
61 #include "ia32_common_transform.h"
62 #include "ia32_nodes_attr.h"
63 #include "ia32_transform.h"
64 #include "ia32_new_nodes.h"
65 #include "ia32_map_regs.h"
66 #include "ia32_dbg_stat.h"
67 #include "ia32_optimize.h"
68 #include "ia32_util.h"
69 #include "ia32_address_mode.h"
70 #include "ia32_architecture.h"
72 #include "gen_ia32_regalloc_if.h"
74 #define SFP_SIGN "0x80000000"
75 #define DFP_SIGN "0x8000000000000000"
76 #define SFP_ABS "0x7FFFFFFF"
77 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
78 #define DFP_INTMAX "9223372036854775807"
80 #define TP_SFP_SIGN "ia32_sfp_sign"
81 #define TP_DFP_SIGN "ia32_dfp_sign"
82 #define TP_SFP_ABS "ia32_sfp_abs"
83 #define TP_DFP_ABS "ia32_dfp_abs"
84 #define TP_INT_MAX "ia32_int_max"
86 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
87 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
88 #define ENT_SFP_ABS "IA32_SFP_ABS"
89 #define ENT_DFP_ABS "IA32_DFP_ABS"
90 #define ENT_INT_MAX "IA32_INT_MAX"
92 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
93 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
95 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
97 static ir_node *initial_fpcw = NULL;
99 extern ir_op *get_op_Mulh(void);
101 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
102 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
103 ir_node *op1, ir_node *op2);
105 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
106 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
107 ir_node *op1, ir_node *op2, ir_node *flags);
109 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
110 ir_node *block, ir_node *op1, ir_node *op2);
112 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
113 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
116 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
117 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
119 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
120 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
121 ir_node *op1, ir_node *op2, ir_node *fpcw);
123 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
124 ir_node *block, ir_node *op);
126 static ir_node *create_immediate_or_transform(ir_node *node,
127 char immediate_constraint_type);
129 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
130 dbg_info *dbgi, ir_node *block,
131 ir_node *op, ir_node *orig_node);
133 /** Return non-zero is a node represents the 0 constant. */
134 static int is_Const_0(ir_node *node) {
135 return is_Const(node) && is_Const_null(node);
138 /** Return non-zero is a node represents the 1 constant. */
139 static int is_Const_1(ir_node *node) {
140 return is_Const(node) && is_Const_one(node);
143 /** Return non-zero is a node represents the -1 constant. */
144 static int is_Const_Minus_1(ir_node *node) {
145 return is_Const(node) && is_Const_all_one(node);
149 * returns true if constant can be created with a simple float command
151 static int is_simple_x87_Const(ir_node *node)
153 tarval *tv = get_Const_tarval(node);
154 if (tarval_is_null(tv) || tarval_is_one(tv))
157 /* TODO: match all the other float constants */
162 * returns true if constant can be created with a simple float command
164 static int is_simple_sse_Const(ir_node *node)
166 tarval *tv = get_Const_tarval(node);
167 ir_mode *mode = get_tarval_mode(tv);
172 if (tarval_is_null(tv) || tarval_is_one(tv))
175 if (mode == mode_D) {
176 unsigned val = get_tarval_sub_bits(tv, 0) |
177 (get_tarval_sub_bits(tv, 1) << 8) |
178 (get_tarval_sub_bits(tv, 2) << 16) |
179 (get_tarval_sub_bits(tv, 3) << 24);
181 /* lower 32bit are zero, really a 32bit constant */
185 /* TODO: match all the other float constants */
190 * Transforms a Const.
192 static ir_node *gen_Const(ir_node *node) {
193 ir_graph *irg = current_ir_graph;
194 ir_node *old_block = get_nodes_block(node);
195 ir_node *block = be_transform_node(old_block);
196 dbg_info *dbgi = get_irn_dbg_info(node);
197 ir_mode *mode = get_irn_mode(node);
199 assert(is_Const(node));
201 if (mode_is_float(mode)) {
203 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
204 ir_node *nomem = new_NoMem();
208 if (ia32_cg_config.use_sse2) {
209 tarval *tv = get_Const_tarval(node);
210 if (tarval_is_null(tv)) {
211 load = new_rd_ia32_xZero(dbgi, irg, block);
212 set_ia32_ls_mode(load, mode);
214 } else if (tarval_is_one(tv)) {
215 int cnst = mode == mode_F ? 26 : 55;
216 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
217 ir_node *imm2 = create_Immediate(NULL, 0, 2);
218 ir_node *pslld, *psrld;
220 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
221 set_ia32_ls_mode(load, mode);
222 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
223 set_ia32_ls_mode(pslld, mode);
224 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
225 set_ia32_ls_mode(psrld, mode);
227 } else if (mode == mode_F) {
228 /* we can place any 32bit constant by using a movd gp, sse */
229 unsigned val = get_tarval_sub_bits(tv, 0) |
230 (get_tarval_sub_bits(tv, 1) << 8) |
231 (get_tarval_sub_bits(tv, 2) << 16) |
232 (get_tarval_sub_bits(tv, 3) << 24);
233 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
234 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
235 set_ia32_ls_mode(load, mode);
238 if (mode == mode_D) {
239 unsigned val = get_tarval_sub_bits(tv, 0) |
240 (get_tarval_sub_bits(tv, 1) << 8) |
241 (get_tarval_sub_bits(tv, 2) << 16) |
242 (get_tarval_sub_bits(tv, 3) << 24);
244 ir_node *imm32 = create_Immediate(NULL, 0, 32);
245 ir_node *cnst, *psllq;
247 /* fine, lower 32bit are zero, produce 32bit value */
248 val = get_tarval_sub_bits(tv, 4) |
249 (get_tarval_sub_bits(tv, 5) << 8) |
250 (get_tarval_sub_bits(tv, 6) << 16) |
251 (get_tarval_sub_bits(tv, 7) << 24);
252 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
253 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
254 set_ia32_ls_mode(load, mode);
255 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
256 set_ia32_ls_mode(psllq, mode);
261 floatent = create_float_const_entity(node);
263 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
265 set_ia32_op_type(load, ia32_AddrModeS);
266 set_ia32_am_sc(load, floatent);
267 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
268 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
271 if (is_Const_null(node)) {
272 load = new_rd_ia32_vfldz(dbgi, irg, block);
274 set_ia32_ls_mode(load, mode);
275 } else if (is_Const_one(node)) {
276 load = new_rd_ia32_vfld1(dbgi, irg, block);
278 set_ia32_ls_mode(load, mode);
280 floatent = create_float_const_entity(node);
282 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
283 set_ia32_op_type(load, ia32_AddrModeS);
284 set_ia32_am_sc(load, floatent);
285 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
286 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
287 /* take the mode from the entity */
288 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
292 /* Const Nodes before the initial IncSP are a bad idea, because
293 * they could be spilled and we have no SP ready at that point yet.
294 * So add a dependency to the initial frame pointer calculation to
295 * avoid that situation.
297 if (get_irg_start_block(irg) == block) {
298 add_irn_dep(load, get_irg_frame(irg));
301 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
303 } else { /* non-float mode */
305 tarval *tv = get_Const_tarval(node);
308 tv = tarval_convert_to(tv, mode_Iu);
310 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
312 panic("couldn't convert constant tarval (%+F)", node);
314 val = get_tarval_long(tv);
316 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
317 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
320 if (get_irg_start_block(irg) == block) {
321 add_irn_dep(cnst, get_irg_frame(irg));
329 * Transforms a SymConst.
331 static ir_node *gen_SymConst(ir_node *node) {
332 ir_graph *irg = current_ir_graph;
333 ir_node *old_block = get_nodes_block(node);
334 ir_node *block = be_transform_node(old_block);
335 dbg_info *dbgi = get_irn_dbg_info(node);
336 ir_mode *mode = get_irn_mode(node);
339 if (mode_is_float(mode)) {
340 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
341 ir_node *nomem = new_NoMem();
343 if (ia32_cg_config.use_sse2)
344 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
346 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
347 set_ia32_am_sc(cnst, get_SymConst_entity(node));
348 set_ia32_use_frame(cnst);
352 if(get_SymConst_kind(node) != symconst_addr_ent) {
353 panic("backend only support symconst_addr_ent (at %+F)", node);
355 entity = get_SymConst_entity(node);
356 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
359 /* Const Nodes before the initial IncSP are a bad idea, because
360 * they could be spilled and we have no SP ready at that point yet
362 if (get_irg_start_block(irg) == block) {
363 add_irn_dep(cnst, get_irg_frame(irg));
366 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
371 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
372 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
373 static const struct {
375 const char *ent_name;
376 const char *cnst_str;
379 } names [ia32_known_const_max] = {
380 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
381 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
382 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
383 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
384 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
386 static ir_entity *ent_cache[ia32_known_const_max];
388 const char *tp_name, *ent_name, *cnst_str;
396 ent_name = names[kct].ent_name;
397 if (! ent_cache[kct]) {
398 tp_name = names[kct].tp_name;
399 cnst_str = names[kct].cnst_str;
401 switch (names[kct].mode) {
402 case 0: mode = mode_Iu; break;
403 case 1: mode = mode_Lu; break;
404 default: mode = mode_F; break;
406 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
407 tp = new_type_primitive(new_id_from_str(tp_name), mode);
408 /* set the specified alignment */
409 set_type_alignment_bytes(tp, names[kct].align);
411 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
413 set_entity_ld_ident(ent, get_entity_ident(ent));
414 set_entity_visibility(ent, visibility_local);
415 set_entity_variability(ent, variability_constant);
416 set_entity_allocation(ent, allocation_static);
418 /* we create a new entity here: It's initialization must resist on the
420 rem = current_ir_graph;
421 current_ir_graph = get_const_code_irg();
422 cnst = new_Const(mode, tv);
423 current_ir_graph = rem;
425 set_atomic_ent_value(ent, cnst);
427 /* cache the entry */
428 ent_cache[kct] = ent;
431 return ent_cache[kct];
435 * return true if the node is a Proj(Load) and could be used in source address
436 * mode for another node. Will return only true if the @p other node is not
437 * dependent on the memory of the Load (for binary operations use the other
438 * input here, for unary operations use NULL).
440 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
441 ir_node *other, ir_node *other2, match_flags_t flags)
446 /* float constants are always available */
447 if (is_Const(node)) {
448 ir_mode *mode = get_irn_mode(node);
449 if (mode_is_float(mode)) {
450 if (ia32_cg_config.use_sse2) {
451 if (is_simple_sse_Const(node))
454 if (is_simple_x87_Const(node))
457 if (get_irn_n_edges(node) > 1)
465 load = get_Proj_pred(node);
466 pn = get_Proj_proj(node);
467 if (!is_Load(load) || pn != pn_Load_res)
469 if (get_nodes_block(load) != block)
471 /* we only use address mode if we're the only user of the load */
472 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
474 /* in some edge cases with address mode we might reach the load normally
475 * and through some AM sequence, if it is already materialized then we
476 * can't create an AM node from it */
477 if (be_is_transformed(node))
480 /* don't do AM if other node inputs depend on the load (via mem-proj) */
481 if (other != NULL && get_nodes_block(other) == block &&
482 heights_reachable_in_block(heights, other, load))
484 if (other2 != NULL && get_nodes_block(other2) == block &&
485 heights_reachable_in_block(heights, other2, load))
491 typedef struct ia32_address_mode_t ia32_address_mode_t;
492 struct ia32_address_mode_t {
496 ia32_op_type_t op_type;
500 unsigned commutative : 1;
501 unsigned ins_permuted : 1;
504 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
508 /* construct load address */
509 memset(addr, 0, sizeof(addr[0]));
510 ia32_create_address_mode(addr, ptr, /*force=*/0);
512 noreg_gp = ia32_new_NoReg_gp(env_cg);
513 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
514 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
515 addr->mem = be_transform_node(mem);
518 static void build_address(ia32_address_mode_t *am, ir_node *node)
520 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
521 ia32_address_t *addr = &am->addr;
527 if (is_Const(node)) {
528 ir_entity *entity = create_float_const_entity(node);
529 addr->base = noreg_gp;
530 addr->index = noreg_gp;
531 addr->mem = new_NoMem();
532 addr->symconst_ent = entity;
534 am->ls_mode = get_type_mode(get_entity_type(entity));
535 am->pinned = op_pin_state_floats;
539 load = get_Proj_pred(node);
540 ptr = get_Load_ptr(load);
541 mem = get_Load_mem(load);
542 new_mem = be_transform_node(mem);
543 am->pinned = get_irn_pinned(load);
544 am->ls_mode = get_Load_mode(load);
545 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
547 /* construct load address */
548 ia32_create_address_mode(addr, ptr, /*force=*/0);
550 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
551 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
555 static void set_address(ir_node *node, const ia32_address_t *addr)
557 set_ia32_am_scale(node, addr->scale);
558 set_ia32_am_sc(node, addr->symconst_ent);
559 set_ia32_am_offs_int(node, addr->offset);
560 if(addr->symconst_sign)
561 set_ia32_am_sc_sign(node);
563 set_ia32_use_frame(node);
564 set_ia32_frame_ent(node, addr->frame_entity);
568 * Apply attributes of a given address mode to a node.
570 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
572 set_address(node, &am->addr);
574 set_ia32_op_type(node, am->op_type);
575 set_ia32_ls_mode(node, am->ls_mode);
576 if (am->pinned == op_pin_state_pinned) {
577 /* beware: some nodes are already pinned and did not allow to change the state */
578 if (get_irn_pinned(node) != op_pin_state_pinned)
579 set_irn_pinned(node, op_pin_state_pinned);
582 set_ia32_commutative(node);
586 * Check, if a given node is a Down-Conv, ie. a integer Conv
587 * from a mode with a mode with more bits to a mode with lesser bits.
588 * Moreover, we return only true if the node has not more than 1 user.
590 * @param node the node
591 * @return non-zero if node is a Down-Conv
593 static int is_downconv(const ir_node *node)
601 /* we only want to skip the conv when we're the only user
602 * (not optimal but for now...)
604 if(get_irn_n_edges(node) > 1)
607 src_mode = get_irn_mode(get_Conv_op(node));
608 dest_mode = get_irn_mode(node);
609 return ia32_mode_needs_gp_reg(src_mode)
610 && ia32_mode_needs_gp_reg(dest_mode)
611 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
614 /* Skip all Down-Conv's on a given node and return the resulting node. */
615 ir_node *ia32_skip_downconv(ir_node *node) {
616 while (is_downconv(node))
617 node = get_Conv_op(node);
622 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
624 ir_mode *mode = get_irn_mode(node);
629 if(mode_is_signed(mode)) {
634 block = get_nodes_block(node);
635 dbgi = get_irn_dbg_info(node);
637 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
641 * matches operands of a node into ia32 addressing/operand modes. This covers
642 * usage of source address mode, immediates, operations with non 32-bit modes,
644 * The resulting data is filled into the @p am struct. block is the block
645 * of the node whose arguments are matched. op1, op2 are the first and second
646 * input that are matched (op1 may be NULL). other_op is another unrelated
647 * input that is not matched! but which is needed sometimes to check if AM
648 * for op1/op2 is legal.
649 * @p flags describes the supported modes of the operation in detail.
651 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
652 ir_node *op1, ir_node *op2, ir_node *other_op,
655 ia32_address_t *addr = &am->addr;
656 ir_mode *mode = get_irn_mode(op2);
657 int mode_bits = get_mode_size_bits(mode);
658 ir_node *noreg_gp, *new_op1, *new_op2;
660 unsigned commutative;
661 int use_am_and_immediates;
664 memset(am, 0, sizeof(am[0]));
666 commutative = (flags & match_commutative) != 0;
667 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
668 use_am = (flags & match_am) != 0;
669 use_immediate = (flags & match_immediate) != 0;
670 assert(!use_am_and_immediates || use_immediate);
673 assert(!commutative || op1 != NULL);
674 assert(use_am || !(flags & match_8bit_am));
675 assert(use_am || !(flags & match_16bit_am));
677 if (mode_bits == 8) {
678 if (!(flags & match_8bit_am))
680 /* we don't automatically add upconvs yet */
681 assert((flags & match_mode_neutral) || (flags & match_8bit));
682 } else if (mode_bits == 16) {
683 if (!(flags & match_16bit_am))
685 /* we don't automatically add upconvs yet */
686 assert((flags & match_mode_neutral) || (flags & match_16bit));
689 /* we can simply skip downconvs for mode neutral nodes: the upper bits
690 * can be random for these operations */
691 if (flags & match_mode_neutral) {
692 op2 = ia32_skip_downconv(op2);
694 op1 = ia32_skip_downconv(op1);
698 /* match immediates. firm nodes are normalized: constants are always on the
701 if (!(flags & match_try_am) && use_immediate) {
702 new_op2 = try_create_Immediate(op2, 0);
705 noreg_gp = ia32_new_NoReg_gp(env_cg);
706 if (new_op2 == NULL &&
707 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
708 build_address(am, op2);
709 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
710 if (mode_is_float(mode)) {
711 new_op2 = ia32_new_NoReg_vfp(env_cg);
715 am->op_type = ia32_AddrModeS;
716 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
718 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
720 build_address(am, op1);
722 if (mode_is_float(mode)) {
723 noreg = ia32_new_NoReg_vfp(env_cg);
728 if (new_op2 != NULL) {
731 new_op1 = be_transform_node(op2);
733 am->ins_permuted = 1;
735 am->op_type = ia32_AddrModeS;
737 if (flags & match_try_am) {
740 am->op_type = ia32_Normal;
744 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
746 new_op2 = be_transform_node(op2);
747 am->op_type = ia32_Normal;
748 am->ls_mode = get_irn_mode(op2);
749 if (flags & match_mode_neutral)
750 am->ls_mode = mode_Iu;
752 if (addr->base == NULL)
753 addr->base = noreg_gp;
754 if (addr->index == NULL)
755 addr->index = noreg_gp;
756 if (addr->mem == NULL)
757 addr->mem = new_NoMem();
759 am->new_op1 = new_op1;
760 am->new_op2 = new_op2;
761 am->commutative = commutative;
764 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
769 if (am->mem_proj == NULL)
772 /* we have to create a mode_T so the old MemProj can attach to us */
773 mode = get_irn_mode(node);
774 load = get_Proj_pred(am->mem_proj);
776 mark_irn_visited(load);
777 be_set_transformed_node(load, node);
779 if (mode != mode_T) {
780 set_irn_mode(node, mode_T);
781 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
788 * Construct a standard binary operation, set AM and immediate if required.
790 * @param node The original node for which the binop is created
791 * @param op1 The first operand
792 * @param op2 The second operand
793 * @param func The node constructor function
794 * @return The constructed ia32 node.
796 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
797 construct_binop_func *func, match_flags_t flags)
800 ir_node *block, *new_block, *new_node;
801 ia32_address_mode_t am;
802 ia32_address_t *addr = &am.addr;
804 block = get_nodes_block(node);
805 match_arguments(&am, block, op1, op2, NULL, flags);
807 dbgi = get_irn_dbg_info(node);
808 new_block = be_transform_node(block);
809 new_node = func(dbgi, current_ir_graph, new_block,
810 addr->base, addr->index, addr->mem,
811 am.new_op1, am.new_op2);
812 set_am_attributes(new_node, &am);
813 /* we can't use source address mode anymore when using immediates */
814 if (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
815 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
816 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
818 new_node = fix_mem_proj(new_node, &am);
825 n_ia32_l_binop_right,
826 n_ia32_l_binop_eflags
828 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
829 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
830 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
831 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
832 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
833 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
836 * Construct a binary operation which also consumes the eflags.
838 * @param node The node to transform
839 * @param func The node constructor function
840 * @param flags The match flags
841 * @return The constructor ia32 node
843 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
846 ir_node *src_block = get_nodes_block(node);
847 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
848 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
850 ir_node *block, *new_node, *eflags, *new_eflags;
851 ia32_address_mode_t am;
852 ia32_address_t *addr = &am.addr;
854 match_arguments(&am, src_block, op1, op2, NULL, flags);
856 dbgi = get_irn_dbg_info(node);
857 block = be_transform_node(src_block);
858 eflags = get_irn_n(node, n_ia32_l_binop_eflags);
859 new_eflags = be_transform_node(eflags);
860 new_node = func(dbgi, current_ir_graph, block, addr->base, addr->index,
861 addr->mem, am.new_op1, am.new_op2, new_eflags);
862 set_am_attributes(new_node, &am);
863 /* we can't use source address mode anymore when using immediates */
864 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
865 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
866 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
868 new_node = fix_mem_proj(new_node, &am);
873 static ir_node *get_fpcw(void)
876 if (initial_fpcw != NULL)
879 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
880 &ia32_fp_cw_regs[REG_FPCW]);
881 initial_fpcw = be_transform_node(fpcw);
887 * Construct a standard binary operation, set AM and immediate if required.
889 * @param op1 The first operand
890 * @param op2 The second operand
891 * @param func The node constructor function
892 * @return The constructed ia32 node.
894 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
895 construct_binop_float_func *func,
898 ir_mode *mode = get_irn_mode(node);
900 ir_node *block, *new_block, *new_node;
901 ia32_address_mode_t am;
902 ia32_address_t *addr = &am.addr;
904 /* cannot use address mode with long double on x87 */
905 if (get_mode_size_bits(mode) > 64)
908 block = get_nodes_block(node);
909 match_arguments(&am, block, op1, op2, NULL, flags);
911 dbgi = get_irn_dbg_info(node);
912 new_block = be_transform_node(block);
913 new_node = func(dbgi, current_ir_graph, new_block,
914 addr->base, addr->index, addr->mem,
915 am.new_op1, am.new_op2, get_fpcw());
916 set_am_attributes(new_node, &am);
918 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
920 new_node = fix_mem_proj(new_node, &am);
926 * Construct a shift/rotate binary operation, sets AM and immediate if required.
928 * @param op1 The first operand
929 * @param op2 The second operand
930 * @param func The node constructor function
931 * @return The constructed ia32 node.
933 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
934 construct_shift_func *func,
938 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
940 assert(! mode_is_float(get_irn_mode(node)));
941 assert(flags & match_immediate);
942 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
944 if (flags & match_mode_neutral) {
945 op1 = ia32_skip_downconv(op1);
946 new_op1 = be_transform_node(op1);
947 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
948 new_op1 = create_upconv(op1, node);
950 new_op1 = be_transform_node(op1);
953 /* the shift amount can be any mode that is bigger than 5 bits, since all
954 * other bits are ignored anyway */
955 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
956 ir_node *const op = get_Conv_op(op2);
957 if (mode_is_float(get_irn_mode(op)))
960 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
962 new_op2 = create_immediate_or_transform(op2, 0);
964 dbgi = get_irn_dbg_info(node);
965 block = get_nodes_block(node);
966 new_block = be_transform_node(block);
967 new_node = func(dbgi, current_ir_graph, new_block, new_op1, new_op2);
968 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
970 /* lowered shift instruction may have a dependency operand, handle it here */
971 if (get_irn_arity(node) == 3) {
972 /* we have a dependency */
973 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
974 add_irn_dep(new_node, new_dep);
982 * Construct a standard unary operation, set AM and immediate if required.
984 * @param op The operand
985 * @param func The node constructor function
986 * @return The constructed ia32 node.
988 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
992 ir_node *block, *new_block, *new_op, *new_node;
994 assert(flags == 0 || flags == match_mode_neutral);
995 if (flags & match_mode_neutral) {
996 op = ia32_skip_downconv(op);
999 new_op = be_transform_node(op);
1000 dbgi = get_irn_dbg_info(node);
1001 block = get_nodes_block(node);
1002 new_block = be_transform_node(block);
1003 new_node = func(dbgi, current_ir_graph, new_block, new_op);
1005 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1010 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1011 ia32_address_t *addr)
1013 ir_node *base, *index, *res;
1017 base = ia32_new_NoReg_gp(env_cg);
1019 base = be_transform_node(base);
1022 index = addr->index;
1023 if (index == NULL) {
1024 index = ia32_new_NoReg_gp(env_cg);
1026 index = be_transform_node(index);
1029 res = new_rd_ia32_Lea(dbgi, current_ir_graph, block, base, index);
1030 set_address(res, addr);
1036 * Returns non-zero if a given address mode has a symbolic or
1037 * numerical offset != 0.
1039 static int am_has_immediates(const ia32_address_t *addr)
1041 return addr->offset != 0 || addr->symconst_ent != NULL
1042 || addr->frame_entity || addr->use_frame;
1046 * Creates an ia32 Add.
1048 * @return the created ia32 Add node
1050 static ir_node *gen_Add(ir_node *node) {
1051 ir_mode *mode = get_irn_mode(node);
1052 ir_node *op1 = get_Add_left(node);
1053 ir_node *op2 = get_Add_right(node);
1055 ir_node *block, *new_block, *new_node, *add_immediate_op;
1056 ia32_address_t addr;
1057 ia32_address_mode_t am;
1059 if (mode_is_float(mode)) {
1060 if (ia32_cg_config.use_sse2)
1061 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1062 match_commutative | match_am);
1064 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1065 match_commutative | match_am);
1068 ia32_mark_non_am(node);
1070 op2 = ia32_skip_downconv(op2);
1071 op1 = ia32_skip_downconv(op1);
1075 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1076 * 1. Add with immediate -> Lea
1077 * 2. Add with possible source address mode -> Add
1078 * 3. Otherwise -> Lea
1080 memset(&addr, 0, sizeof(addr));
1081 ia32_create_address_mode(&addr, node, /*force=*/1);
1082 add_immediate_op = NULL;
1084 dbgi = get_irn_dbg_info(node);
1085 block = get_nodes_block(node);
1086 new_block = be_transform_node(block);
1089 if(addr.base == NULL && addr.index == NULL) {
1090 ir_graph *irg = current_ir_graph;
1091 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1092 addr.symconst_sign, addr.offset);
1093 add_irn_dep(new_node, get_irg_frame(irg));
1094 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1097 /* add with immediate? */
1098 if(addr.index == NULL) {
1099 add_immediate_op = addr.base;
1100 } else if(addr.base == NULL && addr.scale == 0) {
1101 add_immediate_op = addr.index;
1104 if(add_immediate_op != NULL) {
1105 if(!am_has_immediates(&addr)) {
1106 #ifdef DEBUG_libfirm
1107 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1110 return be_transform_node(add_immediate_op);
1113 new_node = create_lea_from_address(dbgi, new_block, &addr);
1114 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1118 /* test if we can use source address mode */
1119 match_arguments(&am, block, op1, op2, NULL, match_commutative
1120 | match_mode_neutral | match_am | match_immediate | match_try_am);
1122 /* construct an Add with source address mode */
1123 if (am.op_type == ia32_AddrModeS) {
1124 ir_graph *irg = current_ir_graph;
1125 ia32_address_t *am_addr = &am.addr;
1126 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1127 am_addr->index, am_addr->mem, am.new_op1,
1129 set_am_attributes(new_node, &am);
1130 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1132 new_node = fix_mem_proj(new_node, &am);
1137 /* otherwise construct a lea */
1138 new_node = create_lea_from_address(dbgi, new_block, &addr);
1139 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1144 * Creates an ia32 Mul.
1146 * @return the created ia32 Mul node
1148 static ir_node *gen_Mul(ir_node *node) {
1149 ir_node *op1 = get_Mul_left(node);
1150 ir_node *op2 = get_Mul_right(node);
1151 ir_mode *mode = get_irn_mode(node);
1153 if (mode_is_float(mode)) {
1154 if (ia32_cg_config.use_sse2)
1155 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1156 match_commutative | match_am);
1158 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1159 match_commutative | match_am);
1161 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1162 match_commutative | match_am | match_mode_neutral |
1163 match_immediate | match_am_and_immediates);
1167 * Creates an ia32 Mulh.
1168 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1169 * this result while Mul returns the lower 32 bit.
1171 * @return the created ia32 Mulh node
1173 static ir_node *gen_Mulh(ir_node *node)
1175 ir_node *block = get_nodes_block(node);
1176 ir_node *new_block = be_transform_node(block);
1177 ir_graph *irg = current_ir_graph;
1178 dbg_info *dbgi = get_irn_dbg_info(node);
1179 ir_mode *mode = get_irn_mode(node);
1180 ir_node *op1 = get_Mulh_left(node);
1181 ir_node *op2 = get_Mulh_right(node);
1182 ir_node *proj_res_high;
1184 ia32_address_mode_t am;
1185 ia32_address_t *addr = &am.addr;
1187 assert(!mode_is_float(mode) && "Mulh with float not supported");
1188 assert(get_mode_size_bits(mode) == 32);
1190 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1192 if (mode_is_signed(mode)) {
1193 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1194 addr->index, addr->mem, am.new_op1,
1197 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1198 addr->index, addr->mem, am.new_op1,
1202 set_am_attributes(new_node, &am);
1203 /* we can't use source address mode anymore when using immediates */
1204 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1205 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1206 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1208 assert(get_irn_mode(new_node) == mode_T);
1210 fix_mem_proj(new_node, &am);
1212 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1213 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1214 mode_Iu, pn_ia32_IMul1OP_res_high);
1216 return proj_res_high;
1222 * Creates an ia32 And.
1224 * @return The created ia32 And node
1226 static ir_node *gen_And(ir_node *node) {
1227 ir_node *op1 = get_And_left(node);
1228 ir_node *op2 = get_And_right(node);
1229 assert(! mode_is_float(get_irn_mode(node)));
1231 /* is it a zero extension? */
1232 if (is_Const(op2)) {
1233 tarval *tv = get_Const_tarval(op2);
1234 long v = get_tarval_long(tv);
1236 if (v == 0xFF || v == 0xFFFF) {
1237 dbg_info *dbgi = get_irn_dbg_info(node);
1238 ir_node *block = get_nodes_block(node);
1245 assert(v == 0xFFFF);
1248 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1253 return gen_binop(node, op1, op2, new_rd_ia32_And,
1254 match_commutative | match_mode_neutral | match_am
1261 * Creates an ia32 Or.
1263 * @return The created ia32 Or node
1265 static ir_node *gen_Or(ir_node *node) {
1266 ir_node *op1 = get_Or_left(node);
1267 ir_node *op2 = get_Or_right(node);
1269 assert (! mode_is_float(get_irn_mode(node)));
1270 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1271 | match_mode_neutral | match_am | match_immediate);
1277 * Creates an ia32 Eor.
1279 * @return The created ia32 Eor node
1281 static ir_node *gen_Eor(ir_node *node) {
1282 ir_node *op1 = get_Eor_left(node);
1283 ir_node *op2 = get_Eor_right(node);
1285 assert(! mode_is_float(get_irn_mode(node)));
1286 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1287 | match_mode_neutral | match_am | match_immediate);
1292 * Creates an ia32 Sub.
1294 * @return The created ia32 Sub node
1296 static ir_node *gen_Sub(ir_node *node) {
1297 ir_node *op1 = get_Sub_left(node);
1298 ir_node *op2 = get_Sub_right(node);
1299 ir_mode *mode = get_irn_mode(node);
1301 if (mode_is_float(mode)) {
1302 if (ia32_cg_config.use_sse2)
1303 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1305 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1309 if (is_Const(op2)) {
1310 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1314 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1315 | match_am | match_immediate);
1319 * Generates an ia32 DivMod with additional infrastructure for the
1320 * register allocator if needed.
1322 static ir_node *create_Div(ir_node *node)
1324 ir_graph *irg = current_ir_graph;
1325 dbg_info *dbgi = get_irn_dbg_info(node);
1326 ir_node *block = get_nodes_block(node);
1327 ir_node *new_block = be_transform_node(block);
1334 ir_node *sign_extension;
1335 ia32_address_mode_t am;
1336 ia32_address_t *addr = &am.addr;
1338 /* the upper bits have random contents for smaller modes */
1339 switch (get_irn_opcode(node)) {
1341 op1 = get_Div_left(node);
1342 op2 = get_Div_right(node);
1343 mem = get_Div_mem(node);
1344 mode = get_Div_resmode(node);
1347 op1 = get_Mod_left(node);
1348 op2 = get_Mod_right(node);
1349 mem = get_Mod_mem(node);
1350 mode = get_Mod_resmode(node);
1353 op1 = get_DivMod_left(node);
1354 op2 = get_DivMod_right(node);
1355 mem = get_DivMod_mem(node);
1356 mode = get_DivMod_resmode(node);
1359 panic("invalid divmod node %+F", node);
1362 match_arguments(&am, block, op1, op2, NULL, match_am);
1364 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1365 is the memory of the consumed address. We can have only the second op as address
1366 in Div nodes, so check only op2. */
1367 if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
1368 new_mem = be_transform_node(mem);
1369 if(!is_NoMem(addr->mem)) {
1373 new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
1376 new_mem = addr->mem;
1379 if (mode_is_signed(mode)) {
1380 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1381 add_irn_dep(produceval, get_irg_frame(irg));
1382 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1385 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1386 addr->index, new_mem, am.new_op2,
1387 am.new_op1, sign_extension);
1389 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1390 add_irn_dep(sign_extension, get_irg_frame(irg));
1392 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1393 addr->index, new_mem, am.new_op2,
1394 am.new_op1, sign_extension);
1397 set_irn_pinned(new_node, get_irn_pinned(node));
1399 set_am_attributes(new_node, &am);
1400 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1402 new_node = fix_mem_proj(new_node, &am);
1408 static ir_node *gen_Mod(ir_node *node) {
1409 return create_Div(node);
1412 static ir_node *gen_Div(ir_node *node) {
1413 return create_Div(node);
1416 static ir_node *gen_DivMod(ir_node *node) {
1417 return create_Div(node);
1423 * Creates an ia32 floating Div.
1425 * @return The created ia32 xDiv node
1427 static ir_node *gen_Quot(ir_node *node)
1429 ir_node *op1 = get_Quot_left(node);
1430 ir_node *op2 = get_Quot_right(node);
1432 if (ia32_cg_config.use_sse2) {
1433 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1435 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1441 * Creates an ia32 Shl.
1443 * @return The created ia32 Shl node
1445 static ir_node *gen_Shl(ir_node *node) {
1446 ir_node *left = get_Shl_left(node);
1447 ir_node *right = get_Shl_right(node);
1449 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1450 match_mode_neutral | match_immediate);
1454 * Creates an ia32 Shr.
1456 * @return The created ia32 Shr node
1458 static ir_node *gen_Shr(ir_node *node) {
1459 ir_node *left = get_Shr_left(node);
1460 ir_node *right = get_Shr_right(node);
1462 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1468 * Creates an ia32 Sar.
1470 * @return The created ia32 Shrs node
1472 static ir_node *gen_Shrs(ir_node *node) {
1473 ir_node *left = get_Shrs_left(node);
1474 ir_node *right = get_Shrs_right(node);
1475 ir_mode *mode = get_irn_mode(node);
1477 if(is_Const(right) && mode == mode_Is) {
1478 tarval *tv = get_Const_tarval(right);
1479 long val = get_tarval_long(tv);
1481 /* this is a sign extension */
1482 ir_graph *irg = current_ir_graph;
1483 dbg_info *dbgi = get_irn_dbg_info(node);
1484 ir_node *block = be_transform_node(get_nodes_block(node));
1486 ir_node *new_op = be_transform_node(op);
1487 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1488 add_irn_dep(pval, get_irg_frame(irg));
1490 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1494 /* 8 or 16 bit sign extension? */
1495 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1496 ir_node *shl_left = get_Shl_left(left);
1497 ir_node *shl_right = get_Shl_right(left);
1498 if(is_Const(shl_right)) {
1499 tarval *tv1 = get_Const_tarval(right);
1500 tarval *tv2 = get_Const_tarval(shl_right);
1501 if(tv1 == tv2 && tarval_is_long(tv1)) {
1502 long val = get_tarval_long(tv1);
1503 if(val == 16 || val == 24) {
1504 dbg_info *dbgi = get_irn_dbg_info(node);
1505 ir_node *block = get_nodes_block(node);
1515 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1524 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1530 * Creates an ia32 Rol.
1532 * @param op1 The first operator
1533 * @param op2 The second operator
1534 * @return The created ia32 RotL node
1536 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2) {
1537 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1543 * Creates an ia32 Ror.
1544 * NOTE: There is no RotR with immediate because this would always be a RotL
1545 * "imm-mode_size_bits" which can be pre-calculated.
1547 * @param op1 The first operator
1548 * @param op2 The second operator
1549 * @return The created ia32 RotR node
1551 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2) {
1552 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1558 * Creates an ia32 RotR or RotL (depending on the found pattern).
1560 * @return The created ia32 RotL or RotR node
1562 static ir_node *gen_Rotl(ir_node *node) {
1563 ir_node *rotate = NULL;
1564 ir_node *op1 = get_Rotl_left(node);
1565 ir_node *op2 = get_Rotl_right(node);
1567 /* Firm has only RotL, so we are looking for a right (op2)
1568 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1569 that means we can create a RotR instead of an Add and a RotL */
1573 ir_node *left = get_Add_left(add);
1574 ir_node *right = get_Add_right(add);
1575 if (is_Const(right)) {
1576 tarval *tv = get_Const_tarval(right);
1577 ir_mode *mode = get_irn_mode(node);
1578 long bits = get_mode_size_bits(mode);
1580 if (is_Minus(left) &&
1581 tarval_is_long(tv) &&
1582 get_tarval_long(tv) == bits &&
1585 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1586 rotate = gen_Ror(node, op1, get_Minus_op(left));
1591 if (rotate == NULL) {
1592 rotate = gen_Rol(node, op1, op2);
1601 * Transforms a Minus node.
1603 * @return The created ia32 Minus node
1605 static ir_node *gen_Minus(ir_node *node)
1607 ir_node *op = get_Minus_op(node);
1608 ir_node *block = be_transform_node(get_nodes_block(node));
1609 ir_graph *irg = current_ir_graph;
1610 dbg_info *dbgi = get_irn_dbg_info(node);
1611 ir_mode *mode = get_irn_mode(node);
1616 if (mode_is_float(mode)) {
1617 ir_node *new_op = be_transform_node(op);
1618 if (ia32_cg_config.use_sse2) {
1619 /* TODO: non-optimal... if we have many xXors, then we should
1620 * rather create a load for the const and use that instead of
1621 * several AM nodes... */
1622 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1623 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1624 ir_node *nomem = new_rd_NoMem(irg);
1626 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1627 nomem, new_op, noreg_xmm);
1629 size = get_mode_size_bits(mode);
1630 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1632 set_ia32_am_sc(new_node, ent);
1633 set_ia32_op_type(new_node, ia32_AddrModeS);
1634 set_ia32_ls_mode(new_node, mode);
1636 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1639 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1642 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1648 * Transforms a Not node.
1650 * @return The created ia32 Not node
1652 static ir_node *gen_Not(ir_node *node) {
1653 ir_node *op = get_Not_op(node);
1655 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1656 assert (! mode_is_float(get_irn_mode(node)));
1658 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1664 * Transforms an Abs node.
1666 * @return The created ia32 Abs node
1668 static ir_node *gen_Abs(ir_node *node)
1670 ir_node *block = get_nodes_block(node);
1671 ir_node *new_block = be_transform_node(block);
1672 ir_node *op = get_Abs_op(node);
1673 ir_graph *irg = current_ir_graph;
1674 dbg_info *dbgi = get_irn_dbg_info(node);
1675 ir_mode *mode = get_irn_mode(node);
1676 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1677 ir_node *nomem = new_NoMem();
1683 if (mode_is_float(mode)) {
1684 new_op = be_transform_node(op);
1686 if (ia32_cg_config.use_sse2) {
1687 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1688 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1689 nomem, new_op, noreg_fp);
1691 size = get_mode_size_bits(mode);
1692 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1694 set_ia32_am_sc(new_node, ent);
1696 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1698 set_ia32_op_type(new_node, ia32_AddrModeS);
1699 set_ia32_ls_mode(new_node, mode);
1701 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1702 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1705 ir_node *xor, *pval, *sign_extension;
1707 if (get_mode_size_bits(mode) == 32) {
1708 new_op = be_transform_node(op);
1710 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1713 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1714 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1717 add_irn_dep(pval, get_irg_frame(irg));
1718 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1720 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1721 nomem, new_op, sign_extension);
1722 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1724 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1725 nomem, xor, sign_extension);
1726 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1733 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1735 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n) {
1736 dbg_info *dbgi = get_irn_dbg_info(cmp);
1737 ir_node *block = get_nodes_block(cmp);
1738 ir_node *new_block = be_transform_node(block);
1739 ir_node *op1 = be_transform_node(x);
1740 ir_node *op2 = be_transform_node(n);
1742 return new_rd_ia32_Bt(dbgi, current_ir_graph, new_block, op1, op2);
1746 * Transform a node returning a "flag" result.
1748 * @param node the node to transform
1749 * @param pnc_out the compare mode to use
1751 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1760 /* we have a Cmp as input */
1761 if (is_Proj(node)) {
1762 ir_node *pred = get_Proj_pred(node);
1764 pn_Cmp pnc = get_Proj_proj(node);
1765 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1766 ir_node *l = get_Cmp_left(pred);
1767 ir_node *r = get_Cmp_right(pred);
1769 ir_node *la = get_And_left(l);
1770 ir_node *ra = get_And_right(l);
1772 ir_node *c = get_Shl_left(la);
1773 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1774 /* (1 << n) & ra) */
1775 ir_node *n = get_Shl_right(la);
1776 flags = gen_bt(pred, ra, n);
1777 /* we must generate a Jc/Jnc jump */
1778 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1781 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1786 ir_node *c = get_Shl_left(ra);
1787 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1788 /* la & (1 << n)) */
1789 ir_node *n = get_Shl_right(ra);
1790 flags = gen_bt(pred, la, n);
1791 /* we must generate a Jc/Jnc jump */
1792 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1795 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1801 flags = be_transform_node(pred);
1807 /* a mode_b value, we have to compare it against 0 */
1808 dbgi = get_irn_dbg_info(node);
1809 new_block = be_transform_node(get_nodes_block(node));
1810 new_op = be_transform_node(node);
1811 noreg = ia32_new_NoReg_gp(env_cg);
1812 nomem = new_NoMem();
1813 flags = new_rd_ia32_Test(dbgi, current_ir_graph, new_block, noreg, noreg, nomem,
1814 new_op, new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1815 *pnc_out = pn_Cmp_Lg;
1820 * Transforms a Load.
1822 * @return the created ia32 Load node
1824 static ir_node *gen_Load(ir_node *node) {
1825 ir_node *old_block = get_nodes_block(node);
1826 ir_node *block = be_transform_node(old_block);
1827 ir_node *ptr = get_Load_ptr(node);
1828 ir_node *mem = get_Load_mem(node);
1829 ir_node *new_mem = be_transform_node(mem);
1832 ir_graph *irg = current_ir_graph;
1833 dbg_info *dbgi = get_irn_dbg_info(node);
1834 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1835 ir_mode *mode = get_Load_mode(node);
1838 ia32_address_t addr;
1840 /* construct load address */
1841 memset(&addr, 0, sizeof(addr));
1842 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1849 base = be_transform_node(base);
1855 index = be_transform_node(index);
1858 if (mode_is_float(mode)) {
1859 if (ia32_cg_config.use_sse2) {
1860 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1862 res_mode = mode_xmm;
1864 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1866 res_mode = mode_vfp;
1869 assert(mode != mode_b);
1871 /* create a conv node with address mode for smaller modes */
1872 if(get_mode_size_bits(mode) < 32) {
1873 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1874 new_mem, noreg, mode);
1876 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1881 set_irn_pinned(new_node, get_irn_pinned(node));
1882 set_ia32_op_type(new_node, ia32_AddrModeS);
1883 set_ia32_ls_mode(new_node, mode);
1884 set_address(new_node, &addr);
1886 if(get_irn_pinned(node) == op_pin_state_floats) {
1887 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1890 /* make sure we are scheduled behind the initial IncSP/Barrier
1891 * to avoid spills being placed before it
1893 if (block == get_irg_start_block(irg)) {
1894 add_irn_dep(new_node, get_irg_frame(irg));
1897 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1902 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1903 ir_node *ptr, ir_node *other)
1910 /* we only use address mode if we're the only user of the load */
1911 if(get_irn_n_edges(node) > 1)
1914 load = get_Proj_pred(node);
1917 if(get_nodes_block(load) != block)
1920 /* Store should be attached to the load */
1921 if(!is_Proj(mem) || get_Proj_pred(mem) != load)
1923 /* store should have the same pointer as the load */
1924 if(get_Load_ptr(load) != ptr)
1927 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1928 if(other != NULL && get_nodes_block(other) == block
1929 && heights_reachable_in_block(heights, other, load))
1935 static void set_transformed_and_mark(ir_node *const old_node, ir_node *const new_node)
1937 mark_irn_visited(old_node);
1938 be_set_transformed_node(old_node, new_node);
1941 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1942 ir_node *mem, ir_node *ptr, ir_mode *mode,
1943 construct_binop_dest_func *func,
1944 construct_binop_dest_func *func8bit,
1945 match_flags_t flags)
1947 ir_node *src_block = get_nodes_block(node);
1949 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1950 ir_graph *irg = current_ir_graph;
1956 ia32_address_mode_t am;
1957 ia32_address_t *addr = &am.addr;
1958 memset(&am, 0, sizeof(am));
1960 assert(flags & match_dest_am);
1961 assert(flags & match_immediate); /* there is no destam node without... */
1962 commutative = (flags & match_commutative) != 0;
1964 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
1965 build_address(&am, op1);
1966 new_op = create_immediate_or_transform(op2, 0);
1967 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
1968 build_address(&am, op2);
1969 new_op = create_immediate_or_transform(op1, 0);
1974 if(addr->base == NULL)
1975 addr->base = noreg_gp;
1976 if(addr->index == NULL)
1977 addr->index = noreg_gp;
1978 if(addr->mem == NULL)
1979 addr->mem = new_NoMem();
1981 dbgi = get_irn_dbg_info(node);
1982 block = be_transform_node(src_block);
1983 if(get_mode_size_bits(mode) == 8) {
1984 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
1987 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
1990 set_address(new_node, addr);
1991 set_ia32_op_type(new_node, ia32_AddrModeD);
1992 set_ia32_ls_mode(new_node, mode);
1993 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1995 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
1996 mem_proj = be_transform_node(am.mem_proj);
1997 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2002 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2003 ir_node *ptr, ir_mode *mode,
2004 construct_unop_dest_func *func)
2006 ir_graph *irg = current_ir_graph;
2007 ir_node *src_block = get_nodes_block(node);
2012 ia32_address_mode_t am;
2013 ia32_address_t *addr = &am.addr;
2014 memset(&am, 0, sizeof(am));
2016 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2019 build_address(&am, op);
2021 dbgi = get_irn_dbg_info(node);
2022 block = be_transform_node(src_block);
2023 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
2024 set_address(new_node, addr);
2025 set_ia32_op_type(new_node, ia32_AddrModeD);
2026 set_ia32_ls_mode(new_node, mode);
2027 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2029 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
2030 mem_proj = be_transform_node(am.mem_proj);
2031 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2036 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2037 ir_mode *mode = get_irn_mode(node);
2038 ir_node *mux_true = get_Mux_true(node);
2039 ir_node *mux_false = get_Mux_false(node);
2050 ia32_address_t addr;
2052 if(get_mode_size_bits(mode) != 8)
2055 if(is_Const_1(mux_true) && is_Const_0(mux_false)) {
2057 } else if(is_Const_0(mux_true) && is_Const_1(mux_false)) {
2063 build_address_ptr(&addr, ptr, mem);
2065 irg = current_ir_graph;
2066 dbgi = get_irn_dbg_info(node);
2067 block = get_nodes_block(node);
2068 new_block = be_transform_node(block);
2069 cond = get_Mux_sel(node);
2070 flags = get_flags_node(cond, &pnc);
2071 new_mem = be_transform_node(mem);
2072 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2073 addr.index, addr.mem, flags, pnc, negated);
2074 set_address(new_node, &addr);
2075 set_ia32_op_type(new_node, ia32_AddrModeD);
2076 set_ia32_ls_mode(new_node, mode);
2077 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2082 static ir_node *try_create_dest_am(ir_node *node) {
2083 ir_node *val = get_Store_value(node);
2084 ir_node *mem = get_Store_mem(node);
2085 ir_node *ptr = get_Store_ptr(node);
2086 ir_mode *mode = get_irn_mode(val);
2087 unsigned bits = get_mode_size_bits(mode);
2092 /* handle only GP modes for now... */
2093 if(!ia32_mode_needs_gp_reg(mode))
2097 /* store must be the only user of the val node */
2098 if(get_irn_n_edges(val) > 1)
2100 /* skip pointless convs */
2102 ir_node *conv_op = get_Conv_op(val);
2103 ir_mode *pred_mode = get_irn_mode(conv_op);
2104 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2112 /* value must be in the same block */
2113 if(get_nodes_block(node) != get_nodes_block(val))
2116 switch (get_irn_opcode(val)) {
2118 op1 = get_Add_left(val);
2119 op2 = get_Add_right(val);
2120 if(is_Const_1(op2)) {
2121 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2122 new_rd_ia32_IncMem);
2124 } else if(is_Const_Minus_1(op2)) {
2125 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2126 new_rd_ia32_DecMem);
2129 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2130 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2131 match_dest_am | match_commutative |
2135 op1 = get_Sub_left(val);
2136 op2 = get_Sub_right(val);
2138 ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
2141 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2142 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2143 match_dest_am | match_immediate |
2147 op1 = get_And_left(val);
2148 op2 = get_And_right(val);
2149 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2150 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2151 match_dest_am | match_commutative |
2155 op1 = get_Or_left(val);
2156 op2 = get_Or_right(val);
2157 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2158 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2159 match_dest_am | match_commutative |
2163 op1 = get_Eor_left(val);
2164 op2 = get_Eor_right(val);
2165 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2166 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2167 match_dest_am | match_commutative |
2171 op1 = get_Shl_left(val);
2172 op2 = get_Shl_right(val);
2173 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2174 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2175 match_dest_am | match_immediate);
2178 op1 = get_Shr_left(val);
2179 op2 = get_Shr_right(val);
2180 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2181 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2182 match_dest_am | match_immediate);
2185 op1 = get_Shrs_left(val);
2186 op2 = get_Shrs_right(val);
2187 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2188 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2189 match_dest_am | match_immediate);
2192 op1 = get_Rotl_left(val);
2193 op2 = get_Rotl_right(val);
2194 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2195 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2196 match_dest_am | match_immediate);
2198 /* TODO: match ROR patterns... */
2200 new_node = try_create_SetMem(val, ptr, mem);
2203 op1 = get_Minus_op(val);
2204 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2207 /* should be lowered already */
2208 assert(mode != mode_b);
2209 op1 = get_Not_op(val);
2210 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2216 if(new_node != NULL) {
2217 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2218 get_irn_pinned(node) == op_pin_state_pinned) {
2219 set_irn_pinned(new_node, op_pin_state_pinned);
2226 static int is_float_to_int32_conv(const ir_node *node)
2228 ir_mode *mode = get_irn_mode(node);
2232 if(get_mode_size_bits(mode) != 32 || !ia32_mode_needs_gp_reg(mode))
2234 /* don't report unsigned as conv to 32bit, because we really need to do
2235 * a vfist with 64bit signed in this case */
2236 if(!mode_is_signed(mode))
2241 conv_op = get_Conv_op(node);
2242 conv_mode = get_irn_mode(conv_op);
2244 if(!mode_is_float(conv_mode))
2251 * Transform a Store(floatConst).
2253 * @return the created ia32 Store node
2255 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns) {
2256 ir_mode *mode = get_irn_mode(cns);
2257 int size = get_mode_size_bits(mode);
2258 tarval *tv = get_Const_tarval(cns);
2259 ir_node *block = get_nodes_block(node);
2260 ir_node *new_block = be_transform_node(block);
2261 ir_node *ptr = get_Store_ptr(node);
2262 ir_node *mem = get_Store_mem(node);
2263 ir_graph *irg = current_ir_graph;
2264 dbg_info *dbgi = get_irn_dbg_info(node);
2265 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2268 ia32_address_t addr;
2270 unsigned val = get_tarval_sub_bits(tv, 0) |
2271 (get_tarval_sub_bits(tv, 1) << 8) |
2272 (get_tarval_sub_bits(tv, 2) << 16) |
2273 (get_tarval_sub_bits(tv, 3) << 24);
2274 ir_node *imm = create_Immediate(NULL, 0, val);
2276 /* construct store address */
2277 memset(&addr, 0, sizeof(addr));
2278 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2280 if (addr.base == NULL) {
2283 addr.base = be_transform_node(addr.base);
2286 if (addr.index == NULL) {
2289 addr.index = be_transform_node(addr.index);
2291 addr.mem = be_transform_node(mem);
2293 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2294 addr.index, addr.mem, imm);
2296 set_irn_pinned(new_node, get_irn_pinned(node));
2297 set_ia32_op_type(new_node, ia32_AddrModeD);
2298 set_ia32_ls_mode(new_node, mode_Iu);
2300 set_address(new_node, &addr);
2302 /** add more stores if needed */
2304 unsigned val = get_tarval_sub_bits(tv, ofs) |
2305 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2306 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2307 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2308 ir_node *imm = create_Immediate(NULL, 0, val);
2311 addr.mem = new_node;
2313 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2314 addr.index, addr.mem, imm);
2316 set_irn_pinned(new_node, get_irn_pinned(node));
2317 set_ia32_op_type(new_node, ia32_AddrModeD);
2318 set_ia32_ls_mode(new_node, mode_Iu);
2320 set_address(new_node, &addr);
2325 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2330 * Generate a vfist or vfisttp instruction.
2332 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2333 ir_node *mem, ir_node *val, ir_node **fist)
2337 if (ia32_cg_config.use_fisttp) {
2338 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2339 if other users exists */
2340 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2341 ir_node *vfisttp = new_rd_ia32_vfisttp(dbgi, irg, block, base, index, mem, val);
2342 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2343 be_new_Keep(reg_class, irg, block, 1, &value);
2345 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2348 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2351 new_node = new_rd_ia32_vfist(dbgi, irg, block, base, index, mem, val, trunc_mode);
2357 * Transforms a normal Store.
2359 * @return the created ia32 Store node
2361 static ir_node *gen_normal_Store(ir_node *node)
2363 ir_node *val = get_Store_value(node);
2364 ir_mode *mode = get_irn_mode(val);
2365 ir_node *block = get_nodes_block(node);
2366 ir_node *new_block = be_transform_node(block);
2367 ir_node *ptr = get_Store_ptr(node);
2368 ir_node *mem = get_Store_mem(node);
2369 ir_graph *irg = current_ir_graph;
2370 dbg_info *dbgi = get_irn_dbg_info(node);
2371 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2372 ir_node *new_val, *new_node, *store;
2373 ia32_address_t addr;
2375 /* check for destination address mode */
2376 new_node = try_create_dest_am(node);
2377 if (new_node != NULL)
2380 /* construct store address */
2381 memset(&addr, 0, sizeof(addr));
2382 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2384 if (addr.base == NULL) {
2387 addr.base = be_transform_node(addr.base);
2390 if (addr.index == NULL) {
2393 addr.index = be_transform_node(addr.index);
2395 addr.mem = be_transform_node(mem);
2397 if (mode_is_float(mode)) {
2398 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2400 while (is_Conv(val) && mode == get_irn_mode(val)) {
2401 ir_node *op = get_Conv_op(val);
2402 if (!mode_is_float(get_irn_mode(op)))
2406 new_val = be_transform_node(val);
2407 if (ia32_cg_config.use_sse2) {
2408 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2409 addr.index, addr.mem, new_val);
2411 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2412 addr.index, addr.mem, new_val, mode);
2415 } else if (!ia32_cg_config.use_sse2 && is_float_to_int32_conv(val)) {
2416 val = get_Conv_op(val);
2418 /* TODO: is this optimisation still necessary at all (middleend)? */
2419 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2420 while (is_Conv(val)) {
2421 ir_node *op = get_Conv_op(val);
2422 if (!mode_is_float(get_irn_mode(op)))
2424 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2428 new_val = be_transform_node(val);
2429 new_node = gen_vfist(dbgi, irg, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2431 new_val = create_immediate_or_transform(val, 0);
2432 assert(mode != mode_b);
2434 if (get_mode_size_bits(mode) == 8) {
2435 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2436 addr.index, addr.mem, new_val);
2438 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2439 addr.index, addr.mem, new_val);
2444 set_irn_pinned(store, get_irn_pinned(node));
2445 set_ia32_op_type(store, ia32_AddrModeD);
2446 set_ia32_ls_mode(store, mode);
2448 set_address(store, &addr);
2449 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2455 * Transforms a Store.
2457 * @return the created ia32 Store node
2459 static ir_node *gen_Store(ir_node *node)
2461 ir_node *val = get_Store_value(node);
2462 ir_mode *mode = get_irn_mode(val);
2464 if (mode_is_float(mode) && is_Const(val)) {
2467 /* we are storing a floating point constant */
2468 if (ia32_cg_config.use_sse2) {
2469 transform = !is_simple_sse_Const(val);
2471 transform = !is_simple_x87_Const(val);
2474 return gen_float_const_Store(node, val);
2476 return gen_normal_Store(node);
2480 * Transforms a Switch.
2482 * @return the created ia32 SwitchJmp node
2484 static ir_node *create_Switch(ir_node *node)
2486 ir_graph *irg = current_ir_graph;
2487 dbg_info *dbgi = get_irn_dbg_info(node);
2488 ir_node *block = be_transform_node(get_nodes_block(node));
2489 ir_node *sel = get_Cond_selector(node);
2490 ir_node *new_sel = be_transform_node(sel);
2491 int switch_min = INT_MAX;
2492 int switch_max = INT_MIN;
2493 long default_pn = get_Cond_defaultProj(node);
2495 const ir_edge_t *edge;
2497 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2499 /* determine the smallest switch case value */
2500 foreach_out_edge(node, edge) {
2501 ir_node *proj = get_edge_src_irn(edge);
2502 long pn = get_Proj_proj(proj);
2503 if(pn == default_pn)
2512 if((unsigned) (switch_max - switch_min) > 256000) {
2513 panic("Size of switch %+F bigger than 256000", node);
2516 if (switch_min != 0) {
2517 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2519 /* if smallest switch case is not 0 we need an additional sub */
2520 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2521 add_ia32_am_offs_int(new_sel, -switch_min);
2522 set_ia32_op_type(new_sel, ia32_AddrModeS);
2524 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2527 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2528 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2534 * Transform a Cond node.
2536 static ir_node *gen_Cond(ir_node *node) {
2537 ir_node *block = get_nodes_block(node);
2538 ir_node *new_block = be_transform_node(block);
2539 ir_graph *irg = current_ir_graph;
2540 dbg_info *dbgi = get_irn_dbg_info(node);
2541 ir_node *sel = get_Cond_selector(node);
2542 ir_mode *sel_mode = get_irn_mode(sel);
2543 ir_node *flags = NULL;
2547 if (sel_mode != mode_b) {
2548 return create_Switch(node);
2551 /* we get flags from a Cmp */
2552 flags = get_flags_node(sel, &pnc);
2554 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2555 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2561 * Transforms a CopyB node.
2563 * @return The transformed node.
2565 static ir_node *gen_CopyB(ir_node *node) {
2566 ir_node *block = be_transform_node(get_nodes_block(node));
2567 ir_node *src = get_CopyB_src(node);
2568 ir_node *new_src = be_transform_node(src);
2569 ir_node *dst = get_CopyB_dst(node);
2570 ir_node *new_dst = be_transform_node(dst);
2571 ir_node *mem = get_CopyB_mem(node);
2572 ir_node *new_mem = be_transform_node(mem);
2573 ir_node *res = NULL;
2574 ir_graph *irg = current_ir_graph;
2575 dbg_info *dbgi = get_irn_dbg_info(node);
2576 int size = get_type_size_bytes(get_CopyB_type(node));
2579 /* If we have to copy more than 32 bytes, we use REP MOVSx and */
2580 /* then we need the size explicitly in ECX. */
2581 if (size >= 32 * 4) {
2582 rem = size & 0x3; /* size % 4 */
2585 res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
2586 add_irn_dep(res, get_irg_frame(irg));
2588 res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
2591 ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
2594 res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
2597 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
2602 static ir_node *gen_be_Copy(ir_node *node)
2604 ir_node *new_node = be_duplicate_node(node);
2605 ir_mode *mode = get_irn_mode(new_node);
2607 if (ia32_mode_needs_gp_reg(mode)) {
2608 set_irn_mode(new_node, mode_Iu);
2614 static ir_node *create_Fucom(ir_node *node)
2616 ir_graph *irg = current_ir_graph;
2617 dbg_info *dbgi = get_irn_dbg_info(node);
2618 ir_node *block = get_nodes_block(node);
2619 ir_node *new_block = be_transform_node(block);
2620 ir_node *left = get_Cmp_left(node);
2621 ir_node *new_left = be_transform_node(left);
2622 ir_node *right = get_Cmp_right(node);
2626 if(ia32_cg_config.use_fucomi) {
2627 new_right = be_transform_node(right);
2628 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2630 set_ia32_commutative(new_node);
2631 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2633 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2634 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2637 new_right = be_transform_node(right);
2638 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2642 set_ia32_commutative(new_node);
2644 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2646 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2647 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2653 static ir_node *create_Ucomi(ir_node *node)
2655 ir_graph *irg = current_ir_graph;
2656 dbg_info *dbgi = get_irn_dbg_info(node);
2657 ir_node *src_block = get_nodes_block(node);
2658 ir_node *new_block = be_transform_node(src_block);
2659 ir_node *left = get_Cmp_left(node);
2660 ir_node *right = get_Cmp_right(node);
2662 ia32_address_mode_t am;
2663 ia32_address_t *addr = &am.addr;
2665 match_arguments(&am, src_block, left, right, NULL,
2666 match_commutative | match_am);
2668 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2669 addr->mem, am.new_op1, am.new_op2,
2671 set_am_attributes(new_node, &am);
2673 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2675 new_node = fix_mem_proj(new_node, &am);
2681 * helper function: checks wether all Cmp projs are Lg or Eq which is needed
2682 * to fold an and into a test node
2684 static int can_fold_test_and(ir_node *node)
2686 const ir_edge_t *edge;
2688 /** we can only have eq and lg projs */
2689 foreach_out_edge(node, edge) {
2690 ir_node *proj = get_edge_src_irn(edge);
2691 pn_Cmp pnc = get_Proj_proj(proj);
2692 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2700 * Generate code for a Cmp.
2702 static ir_node *gen_Cmp(ir_node *node)
2704 ir_graph *irg = current_ir_graph;
2705 dbg_info *dbgi = get_irn_dbg_info(node);
2706 ir_node *block = get_nodes_block(node);
2707 ir_node *new_block = be_transform_node(block);
2708 ir_node *left = get_Cmp_left(node);
2709 ir_node *right = get_Cmp_right(node);
2710 ir_mode *cmp_mode = get_irn_mode(left);
2712 ia32_address_mode_t am;
2713 ia32_address_t *addr = &am.addr;
2716 if(mode_is_float(cmp_mode)) {
2717 if (ia32_cg_config.use_sse2) {
2718 return create_Ucomi(node);
2720 return create_Fucom(node);
2724 assert(ia32_mode_needs_gp_reg(cmp_mode));
2726 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2727 cmp_unsigned = !mode_is_signed(cmp_mode);
2728 if (is_Const_0(right) &&
2730 get_irn_n_edges(left) == 1 &&
2731 can_fold_test_and(node)) {
2732 /* Test(and_left, and_right) */
2733 ir_node *and_left = get_And_left(left);
2734 ir_node *and_right = get_And_right(left);
2735 ir_mode *mode = get_irn_mode(and_left);
2737 match_arguments(&am, block, and_left, and_right, NULL,
2739 match_am | match_8bit_am | match_16bit_am |
2740 match_am_and_immediates | match_immediate |
2741 match_8bit | match_16bit);
2742 if (get_mode_size_bits(mode) == 8) {
2743 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2744 addr->index, addr->mem, am.new_op1,
2745 am.new_op2, am.ins_permuted,
2748 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2749 addr->index, addr->mem, am.new_op1,
2750 am.new_op2, am.ins_permuted, cmp_unsigned);
2753 /* Cmp(left, right) */
2754 match_arguments(&am, block, left, right, NULL,
2755 match_commutative | match_am | match_8bit_am |
2756 match_16bit_am | match_am_and_immediates |
2757 match_immediate | match_8bit | match_16bit);
2758 if (get_mode_size_bits(cmp_mode) == 8) {
2759 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2760 addr->index, addr->mem, am.new_op1,
2761 am.new_op2, am.ins_permuted,
2764 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2765 addr->index, addr->mem, am.new_op1,
2766 am.new_op2, am.ins_permuted, cmp_unsigned);
2769 set_am_attributes(new_node, &am);
2770 set_ia32_ls_mode(new_node, cmp_mode);
2772 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2774 new_node = fix_mem_proj(new_node, &am);
2779 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2782 ir_graph *irg = current_ir_graph;
2783 dbg_info *dbgi = get_irn_dbg_info(node);
2784 ir_node *block = get_nodes_block(node);
2785 ir_node *new_block = be_transform_node(block);
2786 ir_node *val_true = get_Mux_true(node);
2787 ir_node *val_false = get_Mux_false(node);
2789 match_flags_t match_flags;
2790 ia32_address_mode_t am;
2791 ia32_address_t *addr;
2793 assert(ia32_cg_config.use_cmov);
2794 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2798 match_flags = match_commutative | match_am | match_16bit_am |
2801 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2803 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2804 addr->mem, am.new_op1, am.new_op2, new_flags,
2805 am.ins_permuted, pnc);
2806 set_am_attributes(new_node, &am);
2808 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2810 new_node = fix_mem_proj(new_node, &am);
2816 * Creates a ia32 Setcc instruction.
2818 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2819 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2822 ir_graph *irg = current_ir_graph;
2823 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2824 ir_node *nomem = new_NoMem();
2825 ir_mode *mode = get_irn_mode(orig_node);
2828 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2829 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2831 /* we might need to conv the result up */
2832 if (get_mode_size_bits(mode) > 8) {
2833 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2834 nomem, new_node, mode_Bu);
2835 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2842 * Create instruction for an unsigned Difference or Zero.
2844 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b) {
2845 ir_graph *irg = current_ir_graph;
2846 ir_mode *mode = get_irn_mode(psi);
2847 ir_node *new_node, *sub, *sbb, *eflags, *block, *noreg, *tmpreg, *nomem;
2850 new_node = gen_binop(psi, a, b, new_rd_ia32_Sub,
2851 match_mode_neutral | match_am | match_immediate | match_two_users);
2853 block = get_nodes_block(new_node);
2855 if (is_Proj(new_node)) {
2856 sub = get_Proj_pred(new_node);
2857 assert(is_ia32_Sub(sub));
2860 set_irn_mode(sub, mode_T);
2861 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
2863 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
2865 dbgi = get_irn_dbg_info(psi);
2866 noreg = ia32_new_NoReg_gp(env_cg);
2867 tmpreg = new_rd_ia32_ProduceVal(dbgi, irg, block);
2868 nomem = new_NoMem();
2869 sbb = new_rd_ia32_Sbb(dbgi, irg, block, noreg, noreg, nomem, tmpreg, tmpreg, eflags);
2871 new_node = new_rd_ia32_And(dbgi, irg, block, noreg, noreg, nomem, new_node, sbb);
2872 set_ia32_commutative(new_node);
2877 * Transforms a Mux node into CMov.
2879 * @return The transformed node.
2881 static ir_node *gen_Mux(ir_node *node)
2883 dbg_info *dbgi = get_irn_dbg_info(node);
2884 ir_node *block = get_nodes_block(node);
2885 ir_node *new_block = be_transform_node(block);
2886 ir_node *mux_true = get_Mux_true(node);
2887 ir_node *mux_false = get_Mux_false(node);
2888 ir_node *cond = get_Mux_sel(node);
2889 ir_mode *mode = get_irn_mode(node);
2892 assert(get_irn_mode(cond) == mode_b);
2894 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
2895 if (mode_is_float(mode)) {
2896 ir_node *cmp = get_Proj_pred(cond);
2897 ir_node *cmp_left = get_Cmp_left(cmp);
2898 ir_node *cmp_right = get_Cmp_right(cmp);
2899 pn_Cmp pnc = get_Proj_proj(cond);
2901 if (ia32_cg_config.use_sse2) {
2902 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
2903 if (cmp_left == mux_true && cmp_right == mux_false) {
2904 /* Mux(a <= b, a, b) => MIN */
2905 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2906 match_commutative | match_am | match_two_users);
2907 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2908 /* Mux(a <= b, b, a) => MAX */
2909 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2910 match_commutative | match_am | match_two_users);
2912 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
2913 if (cmp_left == mux_true && cmp_right == mux_false) {
2914 /* Mux(a >= b, a, b) => MAX */
2915 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2916 match_commutative | match_am | match_two_users);
2917 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2918 /* Mux(a >= b, b, a) => MIN */
2919 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2920 match_commutative | match_am | match_two_users);
2924 panic("cannot transform floating point Mux");
2930 assert(ia32_mode_needs_gp_reg(mode));
2932 if (is_Proj(cond)) {
2933 ir_node *cmp = get_Proj_pred(cond);
2935 ir_node *cmp_left = get_Cmp_left(cmp);
2936 ir_node *cmp_right = get_Cmp_right(cmp);
2937 pn_Cmp pnc = get_Proj_proj(cond);
2939 /* check for unsigned Doz first */
2940 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
2941 is_Const_0(mux_false) && is_Sub(mux_true) &&
2942 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
2943 /* Mux(a >=u b, a - b, 0) unsigned Doz */
2944 return create_Doz(node, cmp_left, cmp_right);
2945 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
2946 is_Const_0(mux_true) && is_Sub(mux_false) &&
2947 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
2948 /* Mux(a <=u b, 0, a - b) unsigned Doz */
2949 return create_Doz(node, cmp_left, cmp_right);
2954 flags = get_flags_node(cond, &pnc);
2956 if (is_Const(mux_true) && is_Const(mux_false)) {
2957 /* both are const, good */
2958 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2959 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
2960 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2961 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
2963 /* Not that simple. */
2968 new_node = create_CMov(node, cond, flags, pnc);
2976 * Create a conversion from x87 state register to general purpose.
2978 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
2979 ir_node *block = be_transform_node(get_nodes_block(node));
2980 ir_node *op = get_Conv_op(node);
2981 ir_node *new_op = be_transform_node(op);
2982 ia32_code_gen_t *cg = env_cg;
2983 ir_graph *irg = current_ir_graph;
2984 dbg_info *dbgi = get_irn_dbg_info(node);
2985 ir_node *noreg = ia32_new_NoReg_gp(cg);
2986 ir_mode *mode = get_irn_mode(node);
2987 ir_node *fist, *load, *mem;
2989 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, new_NoMem(), new_op, &fist);
2990 set_irn_pinned(fist, op_pin_state_floats);
2991 set_ia32_use_frame(fist);
2992 set_ia32_op_type(fist, ia32_AddrModeD);
2994 assert(get_mode_size_bits(mode) <= 32);
2995 /* exception we can only store signed 32 bit integers, so for unsigned
2996 we store a 64bit (signed) integer and load the lower bits */
2997 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
2998 set_ia32_ls_mode(fist, mode_Ls);
3000 set_ia32_ls_mode(fist, mode_Is);
3002 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
3005 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, mem);
3007 set_irn_pinned(load, op_pin_state_floats);
3008 set_ia32_use_frame(load);
3009 set_ia32_op_type(load, ia32_AddrModeS);
3010 set_ia32_ls_mode(load, mode_Is);
3011 if(get_ia32_ls_mode(fist) == mode_Ls) {
3012 ia32_attr_t *attr = get_ia32_attr(load);
3013 attr->data.need_64bit_stackent = 1;
3015 ia32_attr_t *attr = get_ia32_attr(load);
3016 attr->data.need_32bit_stackent = 1;
3018 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
3020 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3024 * Creates a x87 strict Conv by placing a Store and a Load
3026 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3028 ir_node *block = get_nodes_block(node);
3029 ir_graph *irg = current_ir_graph;
3030 dbg_info *dbgi = get_irn_dbg_info(node);
3031 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3032 ir_node *nomem = new_NoMem();
3033 ir_node *frame = get_irg_frame(irg);
3034 ir_node *store, *load;
3037 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
3039 set_ia32_use_frame(store);
3040 set_ia32_op_type(store, ia32_AddrModeD);
3041 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
3043 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
3045 set_ia32_use_frame(load);
3046 set_ia32_op_type(load, ia32_AddrModeS);
3047 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3049 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3054 * Create a conversion from general purpose to x87 register
3056 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
3057 ir_node *src_block = get_nodes_block(node);
3058 ir_node *block = be_transform_node(src_block);
3059 ir_graph *irg = current_ir_graph;
3060 dbg_info *dbgi = get_irn_dbg_info(node);
3061 ir_node *op = get_Conv_op(node);
3062 ir_node *new_op = NULL;
3066 ir_mode *store_mode;
3072 /* fild can use source AM if the operand is a signed 32bit integer */
3073 if (src_mode == mode_Is) {
3074 ia32_address_mode_t am;
3076 match_arguments(&am, src_block, NULL, op, NULL,
3077 match_am | match_try_am);
3078 if (am.op_type == ia32_AddrModeS) {
3079 ia32_address_t *addr = &am.addr;
3081 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
3082 addr->index, addr->mem);
3083 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3086 set_am_attributes(fild, &am);
3087 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3089 fix_mem_proj(fild, &am);
3094 if(new_op == NULL) {
3095 new_op = be_transform_node(op);
3098 noreg = ia32_new_NoReg_gp(env_cg);
3099 nomem = new_NoMem();
3100 mode = get_irn_mode(op);
3102 /* first convert to 32 bit signed if necessary */
3103 src_bits = get_mode_size_bits(src_mode);
3104 if (src_bits == 8) {
3105 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
3107 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3109 } else if (src_bits < 32) {
3110 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
3112 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3116 assert(get_mode_size_bits(mode) == 32);
3119 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
3122 set_ia32_use_frame(store);
3123 set_ia32_op_type(store, ia32_AddrModeD);
3124 set_ia32_ls_mode(store, mode_Iu);
3126 /* exception for 32bit unsigned, do a 64bit spill+load */
3127 if(!mode_is_signed(mode)) {
3130 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3132 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
3133 get_irg_frame(irg), noreg, nomem,
3136 set_ia32_use_frame(zero_store);
3137 set_ia32_op_type(zero_store, ia32_AddrModeD);
3138 add_ia32_am_offs_int(zero_store, 4);
3139 set_ia32_ls_mode(zero_store, mode_Iu);
3144 store = new_rd_Sync(dbgi, irg, block, 2, in);
3145 store_mode = mode_Ls;
3147 store_mode = mode_Is;
3151 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3153 set_ia32_use_frame(fild);
3154 set_ia32_op_type(fild, ia32_AddrModeS);
3155 set_ia32_ls_mode(fild, store_mode);
3157 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3163 * Create a conversion from one integer mode into another one
3165 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3166 dbg_info *dbgi, ir_node *block, ir_node *op,
3169 ir_graph *irg = current_ir_graph;
3170 int src_bits = get_mode_size_bits(src_mode);
3171 int tgt_bits = get_mode_size_bits(tgt_mode);
3172 ir_node *new_block = be_transform_node(block);
3174 ir_mode *smaller_mode;
3176 ia32_address_mode_t am;
3177 ia32_address_t *addr = &am.addr;
3180 if (src_bits < tgt_bits) {
3181 smaller_mode = src_mode;
3182 smaller_bits = src_bits;
3184 smaller_mode = tgt_mode;
3185 smaller_bits = tgt_bits;
3188 #ifdef DEBUG_libfirm
3190 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3195 match_arguments(&am, block, NULL, op, NULL,
3196 match_8bit | match_16bit |
3197 match_am | match_8bit_am | match_16bit_am);
3198 if (smaller_bits == 8) {
3199 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3200 addr->index, addr->mem, am.new_op2,
3203 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3204 addr->index, addr->mem, am.new_op2,
3207 set_am_attributes(new_node, &am);
3208 /* match_arguments assume that out-mode = in-mode, this isn't true here
3210 set_ia32_ls_mode(new_node, smaller_mode);
3211 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3212 new_node = fix_mem_proj(new_node, &am);
3217 * Transforms a Conv node.
3219 * @return The created ia32 Conv node
3221 static ir_node *gen_Conv(ir_node *node) {
3222 ir_node *block = get_nodes_block(node);
3223 ir_node *new_block = be_transform_node(block);
3224 ir_node *op = get_Conv_op(node);
3225 ir_node *new_op = NULL;
3226 ir_graph *irg = current_ir_graph;
3227 dbg_info *dbgi = get_irn_dbg_info(node);
3228 ir_mode *src_mode = get_irn_mode(op);
3229 ir_mode *tgt_mode = get_irn_mode(node);
3230 int src_bits = get_mode_size_bits(src_mode);
3231 int tgt_bits = get_mode_size_bits(tgt_mode);
3232 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3233 ir_node *nomem = new_rd_NoMem(irg);
3234 ir_node *res = NULL;
3236 if (src_mode == mode_b) {
3237 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3238 /* nothing to do, we already model bools as 0/1 ints */
3239 return be_transform_node(op);
3242 if (src_mode == tgt_mode) {
3243 if (get_Conv_strict(node)) {
3244 if (ia32_cg_config.use_sse2) {
3245 /* when we are in SSE mode, we can kill all strict no-op conversion */
3246 return be_transform_node(op);
3249 /* this should be optimized already, but who knows... */
3250 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3251 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3252 return be_transform_node(op);
3256 if (mode_is_float(src_mode)) {
3257 new_op = be_transform_node(op);
3258 /* we convert from float ... */
3259 if (mode_is_float(tgt_mode)) {
3260 if(src_mode == mode_E && tgt_mode == mode_D
3261 && !get_Conv_strict(node)) {
3262 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3267 if (ia32_cg_config.use_sse2) {
3268 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3269 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3271 set_ia32_ls_mode(res, tgt_mode);
3273 if(get_Conv_strict(node)) {
3274 res = gen_x87_strict_conv(tgt_mode, new_op);
3275 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3278 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3283 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3284 if (ia32_cg_config.use_sse2) {
3285 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3287 set_ia32_ls_mode(res, src_mode);
3289 return gen_x87_fp_to_gp(node);
3293 /* we convert from int ... */
3294 if (mode_is_float(tgt_mode)) {
3296 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3297 if (ia32_cg_config.use_sse2) {
3298 new_op = be_transform_node(op);
3299 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3301 set_ia32_ls_mode(res, tgt_mode);
3303 res = gen_x87_gp_to_fp(node, src_mode);
3304 if(get_Conv_strict(node)) {
3305 /* The strict-Conv is only necessary, if the int mode has more bits
3306 * than the float mantissa */
3307 size_t int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3308 size_t float_mantissa;
3309 /* FIXME There is no way to get the mantissa size of a mode */
3310 switch (get_mode_size_bits(tgt_mode)) {
3311 case 32: float_mantissa = 23 + 1; break; // + 1 for implicit 1
3312 case 64: float_mantissa = 52 + 1; break;
3313 case 80: float_mantissa = 64 + 1; break;
3314 default: float_mantissa = 0; break;
3316 if (float_mantissa < int_mantissa) {
3317 res = gen_x87_strict_conv(tgt_mode, res);
3318 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3323 } else if(tgt_mode == mode_b) {
3324 /* mode_b lowering already took care that we only have 0/1 values */
3325 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3326 src_mode, tgt_mode));
3327 return be_transform_node(op);
3330 if (src_bits == tgt_bits) {
3331 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3332 src_mode, tgt_mode));
3333 return be_transform_node(op);
3336 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3344 static ir_node *create_immediate_or_transform(ir_node *node,
3345 char immediate_constraint_type)
3347 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3348 if (new_node == NULL) {
3349 new_node = be_transform_node(node);
3355 * Transforms a FrameAddr into an ia32 Add.
3357 static ir_node *gen_be_FrameAddr(ir_node *node) {
3358 ir_node *block = be_transform_node(get_nodes_block(node));
3359 ir_node *op = be_get_FrameAddr_frame(node);
3360 ir_node *new_op = be_transform_node(op);
3361 ir_graph *irg = current_ir_graph;
3362 dbg_info *dbgi = get_irn_dbg_info(node);
3363 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3366 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3367 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3368 set_ia32_use_frame(new_node);
3370 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3376 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3378 static ir_node *gen_be_Return(ir_node *node) {
3379 ir_graph *irg = current_ir_graph;
3380 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3381 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3382 ir_entity *ent = get_irg_entity(irg);
3383 ir_type *tp = get_entity_type(ent);
3388 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3389 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3392 int pn_ret_val, pn_ret_mem, arity, i;
3394 assert(ret_val != NULL);
3395 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3396 return be_duplicate_node(node);
3399 res_type = get_method_res_type(tp, 0);
3401 if (! is_Primitive_type(res_type)) {
3402 return be_duplicate_node(node);
3405 mode = get_type_mode(res_type);
3406 if (! mode_is_float(mode)) {
3407 return be_duplicate_node(node);
3410 assert(get_method_n_ress(tp) == 1);
3412 pn_ret_val = get_Proj_proj(ret_val);
3413 pn_ret_mem = get_Proj_proj(ret_mem);
3415 /* get the Barrier */
3416 barrier = get_Proj_pred(ret_val);
3418 /* get result input of the Barrier */
3419 ret_val = get_irn_n(barrier, pn_ret_val);
3420 new_ret_val = be_transform_node(ret_val);
3422 /* get memory input of the Barrier */
3423 ret_mem = get_irn_n(barrier, pn_ret_mem);
3424 new_ret_mem = be_transform_node(ret_mem);
3426 frame = get_irg_frame(irg);
3428 dbgi = get_irn_dbg_info(barrier);
3429 block = be_transform_node(get_nodes_block(barrier));
3431 noreg = ia32_new_NoReg_gp(env_cg);
3433 /* store xmm0 onto stack */
3434 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3435 new_ret_mem, new_ret_val);
3436 set_ia32_ls_mode(sse_store, mode);
3437 set_ia32_op_type(sse_store, ia32_AddrModeD);
3438 set_ia32_use_frame(sse_store);
3440 /* load into x87 register */
3441 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3442 set_ia32_op_type(fld, ia32_AddrModeS);
3443 set_ia32_use_frame(fld);
3445 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3446 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3448 /* create a new barrier */
3449 arity = get_irn_arity(barrier);
3450 in = alloca(arity * sizeof(in[0]));
3451 for (i = 0; i < arity; ++i) {
3454 if (i == pn_ret_val) {
3456 } else if (i == pn_ret_mem) {
3459 ir_node *in = get_irn_n(barrier, i);
3460 new_in = be_transform_node(in);
3465 new_barrier = new_ir_node(dbgi, irg, block,
3466 get_irn_op(barrier), get_irn_mode(barrier),
3468 copy_node_attr(barrier, new_barrier);
3469 be_duplicate_deps(barrier, new_barrier);
3470 be_set_transformed_node(barrier, new_barrier);
3471 mark_irn_visited(barrier);
3473 /* transform normally */
3474 return be_duplicate_node(node);
3478 * Transform a be_AddSP into an ia32_SubSP.
3480 static ir_node *gen_be_AddSP(ir_node *node)
3482 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3483 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3485 return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
3489 * Transform a be_SubSP into an ia32_AddSP
3491 static ir_node *gen_be_SubSP(ir_node *node)
3493 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3494 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3496 return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
3500 * Change some phi modes
3502 static ir_node *gen_Phi(ir_node *node) {
3503 ir_node *block = be_transform_node(get_nodes_block(node));
3504 ir_graph *irg = current_ir_graph;
3505 dbg_info *dbgi = get_irn_dbg_info(node);
3506 ir_mode *mode = get_irn_mode(node);
3509 if(ia32_mode_needs_gp_reg(mode)) {
3510 /* we shouldn't have any 64bit stuff around anymore */
3511 assert(get_mode_size_bits(mode) <= 32);
3512 /* all integer operations are on 32bit registers now */
3514 } else if(mode_is_float(mode)) {
3515 if (ia32_cg_config.use_sse2) {
3522 /* phi nodes allow loops, so we use the old arguments for now
3523 * and fix this later */
3524 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3525 get_irn_in(node) + 1);
3526 copy_node_attr(node, phi);
3527 be_duplicate_deps(node, phi);
3529 be_set_transformed_node(node, phi);
3530 be_enqueue_preds(node);
3538 static ir_node *gen_IJmp(ir_node *node)
3540 ir_node *block = get_nodes_block(node);
3541 ir_node *new_block = be_transform_node(block);
3542 dbg_info *dbgi = get_irn_dbg_info(node);
3543 ir_node *op = get_IJmp_target(node);
3545 ia32_address_mode_t am;
3546 ia32_address_t *addr = &am.addr;
3548 assert(get_irn_mode(op) == mode_P);
3550 match_arguments(&am, block, NULL, op, NULL,
3551 match_am | match_8bit_am | match_16bit_am |
3552 match_immediate | match_8bit | match_16bit);
3554 new_node = new_rd_ia32_IJmp(dbgi, current_ir_graph, new_block,
3555 addr->base, addr->index, addr->mem,
3557 set_am_attributes(new_node, &am);
3558 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3560 new_node = fix_mem_proj(new_node, &am);
3566 * Transform a Bound node.
3568 static ir_node *gen_Bound(ir_node *node)
3571 ir_node *lower = get_Bound_lower(node);
3572 dbg_info *dbgi = get_irn_dbg_info(node);
3574 if (is_Const_0(lower)) {
3575 /* typical case for Java */
3576 ir_node *sub, *res, *flags, *block;
3577 ir_graph *irg = current_ir_graph;
3579 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3580 new_rd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3582 block = get_nodes_block(res);
3583 if (! is_Proj(res)) {
3585 set_irn_mode(sub, mode_T);
3586 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3588 sub = get_Proj_pred(res);
3590 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3591 new_node = new_rd_ia32_Jcc(dbgi, irg, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3592 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3594 panic("generic Bound not supported in ia32 Backend");
3600 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
3603 typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
3604 ir_node *val, ir_node *mem);
3607 * Transforms a lowered Load into a "real" one.
3609 static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
3611 ir_node *block = be_transform_node(get_nodes_block(node));
3612 ir_node *ptr = get_irn_n(node, 0);
3613 ir_node *new_ptr = be_transform_node(ptr);
3614 ir_node *mem = get_irn_n(node, 1);
3615 ir_node *new_mem = be_transform_node(mem);
3616 ir_graph *irg = current_ir_graph;
3617 dbg_info *dbgi = get_irn_dbg_info(node);
3618 ir_mode *mode = get_ia32_ls_mode(node);
3619 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3622 new_op = func(dbgi, irg, block, new_ptr, noreg, new_mem);
3624 set_ia32_op_type(new_op, ia32_AddrModeS);
3625 set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
3626 set_ia32_am_scale(new_op, get_ia32_am_scale(node));
3627 set_ia32_am_sc(new_op, get_ia32_am_sc(node));
3628 if (is_ia32_am_sc_sign(node))
3629 set_ia32_am_sc_sign(new_op);
3630 set_ia32_ls_mode(new_op, mode);
3631 if (is_ia32_use_frame(node)) {
3632 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
3633 set_ia32_use_frame(new_op);
3636 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3642 * Transforms a lowered Store into a "real" one.
3644 static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
3646 ir_node *block = be_transform_node(get_nodes_block(node));
3647 ir_node *ptr = get_irn_n(node, 0);
3648 ir_node *new_ptr = be_transform_node(ptr);
3649 ir_node *val = get_irn_n(node, 1);
3650 ir_node *new_val = be_transform_node(val);
3651 ir_node *mem = get_irn_n(node, 2);
3652 ir_node *new_mem = be_transform_node(mem);
3653 ir_graph *irg = current_ir_graph;
3654 dbg_info *dbgi = get_irn_dbg_info(node);
3655 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3656 ir_mode *mode = get_ia32_ls_mode(node);
3660 new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
3662 am_offs = get_ia32_am_offs_int(node);
3663 add_ia32_am_offs_int(new_op, am_offs);
3665 set_ia32_op_type(new_op, ia32_AddrModeD);
3666 set_ia32_ls_mode(new_op, mode);
3667 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
3668 set_ia32_use_frame(new_op);
3670 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3675 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3677 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3678 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3680 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
3681 match_immediate | match_mode_neutral);
3684 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3686 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3687 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3688 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
3692 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3694 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3695 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3696 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
3700 static ir_node *gen_ia32_l_Add(ir_node *node) {
3701 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3702 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3703 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
3704 match_commutative | match_am | match_immediate |
3705 match_mode_neutral);
3707 if(is_Proj(lowered)) {
3708 lowered = get_Proj_pred(lowered);
3710 assert(is_ia32_Add(lowered));
3711 set_irn_mode(lowered, mode_T);
3717 static ir_node *gen_ia32_l_Adc(ir_node *node)
3719 return gen_binop_flags(node, new_rd_ia32_Adc,
3720 match_commutative | match_am | match_immediate |
3721 match_mode_neutral);
3725 * Transforms an ia32_l_vfild into a "real" ia32_vfild node
3727 * @param node The node to transform
3728 * @return the created ia32 vfild node
3730 static ir_node *gen_ia32_l_vfild(ir_node *node) {
3731 return gen_lowered_Load(node, new_rd_ia32_vfild);
3735 * Transforms an ia32_l_Load into a "real" ia32_Load node
3737 * @param node The node to transform
3738 * @return the created ia32 Load node
3740 static ir_node *gen_ia32_l_Load(ir_node *node) {
3741 return gen_lowered_Load(node, new_rd_ia32_Load);
3745 * Transforms an ia32_l_Store into a "real" ia32_Store node
3747 * @param node The node to transform
3748 * @return the created ia32 Store node
3750 static ir_node *gen_ia32_l_Store(ir_node *node) {
3751 return gen_lowered_Store(node, new_rd_ia32_Store);
3755 * Transforms a l_vfist into a "real" vfist node.
3757 * @param node The node to transform
3758 * @return the created ia32 vfist node
3760 static ir_node *gen_ia32_l_vfist(ir_node *node) {
3761 ir_node *block = be_transform_node(get_nodes_block(node));
3762 ir_node *ptr = get_irn_n(node, 0);
3763 ir_node *new_ptr = be_transform_node(ptr);
3764 ir_node *val = get_irn_n(node, 1);
3765 ir_node *new_val = be_transform_node(val);
3766 ir_node *mem = get_irn_n(node, 2);
3767 ir_node *new_mem = be_transform_node(mem);
3768 ir_graph *irg = current_ir_graph;
3769 dbg_info *dbgi = get_irn_dbg_info(node);
3770 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3771 ir_mode *mode = get_ia32_ls_mode(node);
3772 ir_node *memres, *fist;
3775 memres = gen_vfist(dbgi, irg, block, new_ptr, noreg, new_mem, new_val, &fist);
3776 am_offs = get_ia32_am_offs_int(node);
3777 add_ia32_am_offs_int(fist, am_offs);
3779 set_ia32_op_type(fist, ia32_AddrModeD);
3780 set_ia32_ls_mode(fist, mode);
3781 set_ia32_frame_ent(fist, get_ia32_frame_ent(node));
3782 set_ia32_use_frame(fist);
3784 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
3790 * Transforms a l_MulS into a "real" MulS node.
3792 * @return the created ia32 Mul node
3794 static ir_node *gen_ia32_l_Mul(ir_node *node) {
3795 ir_node *left = get_binop_left(node);
3796 ir_node *right = get_binop_right(node);
3798 return gen_binop(node, left, right, new_rd_ia32_Mul,
3799 match_commutative | match_am | match_mode_neutral);
3803 * Transforms a l_IMulS into a "real" IMul1OPS node.
3805 * @return the created ia32 IMul1OP node
3807 static ir_node *gen_ia32_l_IMul(ir_node *node) {
3808 ir_node *left = get_binop_left(node);
3809 ir_node *right = get_binop_right(node);
3811 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
3812 match_commutative | match_am | match_mode_neutral);
3815 static ir_node *gen_ia32_l_Sub(ir_node *node) {
3816 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3817 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3818 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
3819 match_am | match_immediate | match_mode_neutral);
3821 if(is_Proj(lowered)) {
3822 lowered = get_Proj_pred(lowered);
3824 assert(is_ia32_Sub(lowered));
3825 set_irn_mode(lowered, mode_T);
3831 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
3832 return gen_binop_flags(node, new_rd_ia32_Sbb,
3833 match_am | match_immediate | match_mode_neutral);
3837 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3838 * op1 - target to be shifted
3839 * op2 - contains bits to be shifted into target
3841 * Only op3 can be an immediate.
3843 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3844 ir_node *low, ir_node *count)
3846 ir_node *block = get_nodes_block(node);
3847 ir_node *new_block = be_transform_node(block);
3848 ir_graph *irg = current_ir_graph;
3849 dbg_info *dbgi = get_irn_dbg_info(node);
3850 ir_node *new_high = be_transform_node(high);
3851 ir_node *new_low = be_transform_node(low);
3855 /* the shift amount can be any mode that is bigger than 5 bits, since all
3856 * other bits are ignored anyway */
3857 while (is_Conv(count) && get_irn_n_edges(count) == 1) {
3858 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3859 count = get_Conv_op(count);
3861 new_count = create_immediate_or_transform(count, 0);
3863 if (is_ia32_l_ShlD(node)) {
3864 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
3867 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
3870 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3875 static ir_node *gen_ia32_l_ShlD(ir_node *node)
3877 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
3878 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
3879 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
3880 return gen_lowered_64bit_shifts(node, high, low, count);
3883 static ir_node *gen_ia32_l_ShrD(ir_node *node)
3885 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
3886 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
3887 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
3888 return gen_lowered_64bit_shifts(node, high, low, count);
3891 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
3892 ir_node *src_block = get_nodes_block(node);
3893 ir_node *block = be_transform_node(src_block);
3894 ir_graph *irg = current_ir_graph;
3895 dbg_info *dbgi = get_irn_dbg_info(node);
3896 ir_node *frame = get_irg_frame(irg);
3897 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3898 ir_node *nomem = new_NoMem();
3899 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
3900 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
3901 ir_node *new_val_low = be_transform_node(val_low);
3902 ir_node *new_val_high = be_transform_node(val_high);
3907 ir_node *store_high;
3909 if(!mode_is_signed(get_irn_mode(val_high))) {
3910 panic("unsigned long long -> float not supported yet (%+F)", node);
3914 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3916 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3918 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
3919 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
3921 set_ia32_use_frame(store_low);
3922 set_ia32_use_frame(store_high);
3923 set_ia32_op_type(store_low, ia32_AddrModeD);
3924 set_ia32_op_type(store_high, ia32_AddrModeD);
3925 set_ia32_ls_mode(store_low, mode_Iu);
3926 set_ia32_ls_mode(store_high, mode_Is);
3927 add_ia32_am_offs_int(store_high, 4);
3931 sync = new_rd_Sync(dbgi, irg, block, 2, in);
3934 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
3936 set_ia32_use_frame(fild);
3937 set_ia32_op_type(fild, ia32_AddrModeS);
3938 set_ia32_ls_mode(fild, mode_Ls);
3940 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3942 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3945 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
3946 ir_node *src_block = get_nodes_block(node);
3947 ir_node *block = be_transform_node(src_block);
3948 ir_graph *irg = current_ir_graph;
3949 dbg_info *dbgi = get_irn_dbg_info(node);
3950 ir_node *frame = get_irg_frame(irg);
3951 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3952 ir_node *nomem = new_NoMem();
3953 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
3954 ir_node *new_val = be_transform_node(val);
3955 ir_node *fist, *mem;
3957 mem = gen_vfist(dbgi, irg, block, frame, noreg, nomem, new_val, &fist);
3958 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
3959 set_ia32_use_frame(fist);
3960 set_ia32_op_type(fist, ia32_AddrModeD);
3961 set_ia32_ls_mode(fist, mode_Ls);
3967 * the BAD transformer.
3969 static ir_node *bad_transform(ir_node *node) {
3970 panic("No transform function for %+F available.\n", node);
3974 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
3975 ir_graph *irg = current_ir_graph;
3976 ir_node *block = be_transform_node(get_nodes_block(node));
3977 ir_node *pred = get_Proj_pred(node);
3978 ir_node *new_pred = be_transform_node(pred);
3979 ir_node *frame = get_irg_frame(irg);
3980 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3981 dbg_info *dbgi = get_irn_dbg_info(node);
3982 long pn = get_Proj_proj(node);
3987 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
3988 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3989 set_ia32_use_frame(load);
3990 set_ia32_op_type(load, ia32_AddrModeS);
3991 set_ia32_ls_mode(load, mode_Iu);
3992 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
3993 * 32 bit from it with this particular load */
3994 attr = get_ia32_attr(load);
3995 attr->data.need_64bit_stackent = 1;
3997 if (pn == pn_ia32_l_FloattoLL_res_high) {
3998 add_ia32_am_offs_int(load, 4);
4000 assert(pn == pn_ia32_l_FloattoLL_res_low);
4003 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4009 * Transform the Projs of an AddSP.
4011 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
4012 ir_node *block = be_transform_node(get_nodes_block(node));
4013 ir_node *pred = get_Proj_pred(node);
4014 ir_node *new_pred = be_transform_node(pred);
4015 ir_graph *irg = current_ir_graph;
4016 dbg_info *dbgi = get_irn_dbg_info(node);
4017 long proj = get_Proj_proj(node);
4019 if (proj == pn_be_AddSP_sp) {
4020 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4021 pn_ia32_SubSP_stack);
4022 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4024 } else if(proj == pn_be_AddSP_res) {
4025 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4026 pn_ia32_SubSP_addr);
4027 } else if (proj == pn_be_AddSP_M) {
4028 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4032 return new_rd_Unknown(irg, get_irn_mode(node));
4036 * Transform the Projs of a SubSP.
4038 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
4039 ir_node *block = be_transform_node(get_nodes_block(node));
4040 ir_node *pred = get_Proj_pred(node);
4041 ir_node *new_pred = be_transform_node(pred);
4042 ir_graph *irg = current_ir_graph;
4043 dbg_info *dbgi = get_irn_dbg_info(node);
4044 long proj = get_Proj_proj(node);
4046 if (proj == pn_be_SubSP_sp) {
4047 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4048 pn_ia32_AddSP_stack);
4049 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4051 } else if (proj == pn_be_SubSP_M) {
4052 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4056 return new_rd_Unknown(irg, get_irn_mode(node));
4060 * Transform and renumber the Projs from a Load.
4062 static ir_node *gen_Proj_Load(ir_node *node) {
4064 ir_node *block = be_transform_node(get_nodes_block(node));
4065 ir_node *pred = get_Proj_pred(node);
4066 ir_graph *irg = current_ir_graph;
4067 dbg_info *dbgi = get_irn_dbg_info(node);
4068 long proj = get_Proj_proj(node);
4070 /* loads might be part of source address mode matches, so we don't
4071 * transform the ProjMs yet (with the exception of loads whose result is
4074 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4077 /* this is needed, because sometimes we have loops that are only
4078 reachable through the ProjM */
4079 be_enqueue_preds(node);
4080 /* do it in 2 steps, to silence firm verifier */
4081 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4082 set_Proj_proj(res, pn_ia32_mem);
4086 /* renumber the proj */
4087 new_pred = be_transform_node(pred);
4088 if (is_ia32_Load(new_pred)) {
4091 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4093 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4094 case pn_Load_X_regular:
4095 return new_rd_Jmp(dbgi, irg, block);
4096 case pn_Load_X_except:
4097 /* This Load might raise an exception. Mark it. */
4098 set_ia32_exc_label(new_pred, 1);
4099 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4103 } else if (is_ia32_Conv_I2I(new_pred) ||
4104 is_ia32_Conv_I2I8Bit(new_pred)) {
4105 set_irn_mode(new_pred, mode_T);
4106 if (proj == pn_Load_res) {
4107 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4108 } else if (proj == pn_Load_M) {
4109 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4111 } else if (is_ia32_xLoad(new_pred)) {
4114 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4116 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4117 case pn_Load_X_regular:
4118 return new_rd_Jmp(dbgi, irg, block);
4119 case pn_Load_X_except:
4120 /* This Load might raise an exception. Mark it. */
4121 set_ia32_exc_label(new_pred, 1);
4122 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4126 } else if (is_ia32_vfld(new_pred)) {
4129 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4131 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4132 case pn_Load_X_regular:
4133 return new_rd_Jmp(dbgi, irg, block);
4134 case pn_Load_X_except:
4135 /* This Load might raise an exception. Mark it. */
4136 set_ia32_exc_label(new_pred, 1);
4137 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4142 /* can happen for ProJMs when source address mode happened for the
4145 /* however it should not be the result proj, as that would mean the
4146 load had multiple users and should not have been used for
4148 if (proj != pn_Load_M) {
4149 panic("internal error: transformed node not a Load");
4151 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4155 return new_rd_Unknown(irg, get_irn_mode(node));
4159 * Transform and renumber the Projs from a DivMod like instruction.
4161 static ir_node *gen_Proj_DivMod(ir_node *node) {
4162 ir_node *block = be_transform_node(get_nodes_block(node));
4163 ir_node *pred = get_Proj_pred(node);
4164 ir_node *new_pred = be_transform_node(pred);
4165 ir_graph *irg = current_ir_graph;
4166 dbg_info *dbgi = get_irn_dbg_info(node);
4167 ir_mode *mode = get_irn_mode(node);
4168 long proj = get_Proj_proj(node);
4170 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4172 switch (get_irn_opcode(pred)) {
4176 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4178 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4179 case pn_Div_X_regular:
4180 return new_rd_Jmp(dbgi, irg, block);
4181 case pn_Div_X_except:
4182 set_ia32_exc_label(new_pred, 1);
4183 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4191 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4193 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4194 case pn_Mod_X_except:
4195 set_ia32_exc_label(new_pred, 1);
4196 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4204 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4205 case pn_DivMod_res_div:
4206 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4207 case pn_DivMod_res_mod:
4208 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4209 case pn_DivMod_X_regular:
4210 return new_rd_Jmp(dbgi, irg, block);
4211 case pn_DivMod_X_except:
4212 set_ia32_exc_label(new_pred, 1);
4213 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4223 return new_rd_Unknown(irg, mode);
4227 * Transform and renumber the Projs from a CopyB.
4229 static ir_node *gen_Proj_CopyB(ir_node *node) {
4230 ir_node *block = be_transform_node(get_nodes_block(node));
4231 ir_node *pred = get_Proj_pred(node);
4232 ir_node *new_pred = be_transform_node(pred);
4233 ir_graph *irg = current_ir_graph;
4234 dbg_info *dbgi = get_irn_dbg_info(node);
4235 ir_mode *mode = get_irn_mode(node);
4236 long proj = get_Proj_proj(node);
4239 case pn_CopyB_M_regular:
4240 if (is_ia32_CopyB_i(new_pred)) {
4241 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4242 } else if (is_ia32_CopyB(new_pred)) {
4243 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4251 return new_rd_Unknown(irg, mode);
4255 * Transform and renumber the Projs from a Quot.
4257 static ir_node *gen_Proj_Quot(ir_node *node) {
4258 ir_node *block = be_transform_node(get_nodes_block(node));
4259 ir_node *pred = get_Proj_pred(node);
4260 ir_node *new_pred = be_transform_node(pred);
4261 ir_graph *irg = current_ir_graph;
4262 dbg_info *dbgi = get_irn_dbg_info(node);
4263 ir_mode *mode = get_irn_mode(node);
4264 long proj = get_Proj_proj(node);
4268 if (is_ia32_xDiv(new_pred)) {
4269 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4270 } else if (is_ia32_vfdiv(new_pred)) {
4271 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4275 if (is_ia32_xDiv(new_pred)) {
4276 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4277 } else if (is_ia32_vfdiv(new_pred)) {
4278 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4281 case pn_Quot_X_regular:
4282 case pn_Quot_X_except:
4288 return new_rd_Unknown(irg, mode);
4292 * Transform the Thread Local Storage Proj.
4294 static ir_node *gen_Proj_tls(ir_node *node) {
4295 ir_node *block = be_transform_node(get_nodes_block(node));
4296 ir_graph *irg = current_ir_graph;
4297 dbg_info *dbgi = NULL;
4298 ir_node *res = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
4303 static ir_node *gen_be_Call(ir_node *node) {
4304 ir_node *res = be_duplicate_node(node);
4305 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4310 static ir_node *gen_be_IncSP(ir_node *node) {
4311 ir_node *res = be_duplicate_node(node);
4312 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4318 * Transform the Projs from a be_Call.
4320 static ir_node *gen_Proj_be_Call(ir_node *node) {
4321 ir_node *block = be_transform_node(get_nodes_block(node));
4322 ir_node *call = get_Proj_pred(node);
4323 ir_node *new_call = be_transform_node(call);
4324 ir_graph *irg = current_ir_graph;
4325 dbg_info *dbgi = get_irn_dbg_info(node);
4326 ir_type *method_type = be_Call_get_type(call);
4327 int n_res = get_method_n_ress(method_type);
4328 long proj = get_Proj_proj(node);
4329 ir_mode *mode = get_irn_mode(node);
4331 const arch_register_class_t *cls;
4333 /* The following is kinda tricky: If we're using SSE, then we have to
4334 * move the result value of the call in floating point registers to an
4335 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4336 * after the call, we have to make sure to correctly make the
4337 * MemProj and the result Proj use these 2 nodes
4339 if (proj == pn_be_Call_M_regular) {
4340 // get new node for result, are we doing the sse load/store hack?
4341 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4342 ir_node *call_res_new;
4343 ir_node *call_res_pred = NULL;
4345 if (call_res != NULL) {
4346 call_res_new = be_transform_node(call_res);
4347 call_res_pred = get_Proj_pred(call_res_new);
4350 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4351 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4352 pn_be_Call_M_regular);
4354 assert(is_ia32_xLoad(call_res_pred));
4355 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4359 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4360 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4362 ir_node *frame = get_irg_frame(irg);
4363 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4365 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4368 /* in case there is no memory output: create one to serialize the copy
4370 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4371 pn_be_Call_M_regular);
4372 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4373 pn_be_Call_first_res);
4375 /* store st(0) onto stack */
4376 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4378 set_ia32_op_type(fstp, ia32_AddrModeD);
4379 set_ia32_use_frame(fstp);
4381 /* load into SSE register */
4382 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4384 set_ia32_op_type(sse_load, ia32_AddrModeS);
4385 set_ia32_use_frame(sse_load);
4387 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4393 /* transform call modes */
4394 if (mode_is_data(mode)) {
4395 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4399 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4403 * Transform the Projs from a Cmp.
4405 static ir_node *gen_Proj_Cmp(ir_node *node)
4407 /* this probably means not all mode_b nodes were lowered... */
4408 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4413 * Transform the Projs from a Bound.
4415 static ir_node *gen_Proj_Bound(ir_node *node)
4417 ir_node *new_node, *block;
4418 ir_node *pred = get_Proj_pred(node);
4420 switch (get_Proj_proj(node)) {
4422 return be_transform_node(get_Bound_mem(pred));
4423 case pn_Bound_X_regular:
4424 new_node = be_transform_node(pred);
4425 block = get_nodes_block(new_node);
4426 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
4427 case pn_Bound_X_except:
4428 new_node = be_transform_node(pred);
4429 block = get_nodes_block(new_node);
4430 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
4432 return be_transform_node(get_Bound_index(pred));
4434 panic("unsupported Proj from Bound");
4438 static ir_node *gen_Proj_ASM(ir_node *node)
4444 if (get_irn_mode(node) != mode_M)
4445 return be_duplicate_node(node);
4447 pred = get_Proj_pred(node);
4448 new_pred = be_transform_node(pred);
4449 block = get_nodes_block(new_pred);
4450 return new_r_Proj(current_ir_graph, block, new_pred, mode_M,
4451 get_ia32_n_res(new_pred) + 1);
4455 * Transform and potentially renumber Proj nodes.
4457 static ir_node *gen_Proj(ir_node *node) {
4458 ir_node *pred = get_Proj_pred(node);
4461 switch (get_irn_opcode(pred)) {
4463 proj = get_Proj_proj(node);
4464 if (proj == pn_Store_M) {
4465 return be_transform_node(pred);
4468 return new_r_Bad(current_ir_graph);
4471 return gen_Proj_Load(node);
4473 return gen_Proj_ASM(node);
4477 return gen_Proj_DivMod(node);
4479 return gen_Proj_CopyB(node);
4481 return gen_Proj_Quot(node);
4483 return gen_Proj_be_SubSP(node);
4485 return gen_Proj_be_AddSP(node);
4487 return gen_Proj_be_Call(node);
4489 return gen_Proj_Cmp(node);
4491 return gen_Proj_Bound(node);
4493 proj = get_Proj_proj(node);
4494 if (proj == pn_Start_X_initial_exec) {
4495 ir_node *block = get_nodes_block(pred);
4496 dbg_info *dbgi = get_irn_dbg_info(node);
4499 /* we exchange the ProjX with a jump */
4500 block = be_transform_node(block);
4501 jump = new_rd_Jmp(dbgi, current_ir_graph, block);
4504 if (node == be_get_old_anchor(anchor_tls)) {
4505 return gen_Proj_tls(node);
4510 if (is_ia32_l_FloattoLL(pred)) {
4511 return gen_Proj_l_FloattoLL(node);
4513 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4517 ir_mode *mode = get_irn_mode(node);
4518 if (ia32_mode_needs_gp_reg(mode)) {
4519 ir_node *new_pred = be_transform_node(pred);
4520 ir_node *block = be_transform_node(get_nodes_block(node));
4521 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
4522 mode_Iu, get_Proj_proj(node));
4523 #ifdef DEBUG_libfirm
4524 new_proj->node_nr = node->node_nr;
4530 return be_duplicate_node(node);
4534 * Enters all transform functions into the generic pointer
4536 static void register_transformers(void)
4540 /* first clear the generic function pointer for all ops */
4541 clear_irp_opcodes_generic_func();
4543 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4544 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4582 /* transform ops from intrinsic lowering */
4598 GEN(ia32_l_LLtoFloat);
4599 GEN(ia32_l_FloattoLL);
4605 /* we should never see these nodes */
4620 /* handle generic backend nodes */
4629 op_Mulh = get_op_Mulh();
4638 * Pre-transform all unknown and noreg nodes.
4640 static void ia32_pretransform_node(void *arch_cg) {
4641 ia32_code_gen_t *cg = arch_cg;
4643 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4644 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4645 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4646 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4647 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4648 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4653 * Walker, checks if all ia32 nodes producing more than one result have
4654 * its Projs, otherwise creates new Projs and keep them using a be_Keep node.
4656 static void add_missing_keep_walker(ir_node *node, void *data)
4659 unsigned found_projs = 0;
4660 const ir_edge_t *edge;
4661 ir_mode *mode = get_irn_mode(node);
4666 if(!is_ia32_irn(node))
4669 n_outs = get_ia32_n_res(node);
4672 if(is_ia32_SwitchJmp(node))
4675 assert(n_outs < (int) sizeof(unsigned) * 8);
4676 foreach_out_edge(node, edge) {
4677 ir_node *proj = get_edge_src_irn(edge);
4678 int pn = get_Proj_proj(proj);
4680 if (get_irn_mode(proj) == mode_M)
4683 assert(pn < n_outs);
4684 found_projs |= 1 << pn;
4688 /* are keeps missing? */
4690 for(i = 0; i < n_outs; ++i) {
4693 const arch_register_req_t *req;
4694 const arch_register_class_t *cls;
4696 if(found_projs & (1 << i)) {
4700 req = get_ia32_out_req(node, i);
4705 if(cls == &ia32_reg_classes[CLASS_ia32_flags]) {
4709 block = get_nodes_block(node);
4710 in[0] = new_r_Proj(current_ir_graph, block, node,
4711 arch_register_class_mode(cls), i);
4712 if(last_keep != NULL) {
4713 be_Keep_add_node(last_keep, cls, in[0]);
4715 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
4716 if(sched_is_scheduled(node)) {
4717 sched_add_after(node, last_keep);
4724 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
4727 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
4729 ir_graph *irg = be_get_birg_irg(cg->birg);
4730 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
4733 /* do the transformation */
4734 void ia32_transform_graph(ia32_code_gen_t *cg) {
4736 ir_graph *irg = cg->irg;
4738 register_transformers();
4740 initial_fpcw = NULL;
4742 BE_TIMER_PUSH(t_heights);
4743 heights = heights_new(irg);
4744 BE_TIMER_POP(t_heights);
4745 ia32_calculate_non_address_mode_nodes(cg->birg);
4747 /* the transform phase is not safe for CSE (yet) because several nodes get
4748 * attributes set after their creation */
4749 cse_last = get_opt_cse();
4752 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
4754 set_opt_cse(cse_last);
4756 ia32_free_non_address_mode_nodes();
4757 heights_free(heights);
4761 void ia32_init_transform(void)
4763 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");