2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
36 #include "irgraph_t.h"
41 #include "iredges_t.h"
54 #include "../benode_t.h"
55 #include "../besched.h"
57 #include "../beutil.h"
58 #include "../beirg_t.h"
59 #include "../betranshlp.h"
62 #include "bearch_ia32_t.h"
63 #include "ia32_common_transform.h"
64 #include "ia32_nodes_attr.h"
65 #include "ia32_transform.h"
66 #include "ia32_new_nodes.h"
67 #include "ia32_map_regs.h"
68 #include "ia32_dbg_stat.h"
69 #include "ia32_optimize.h"
70 #include "ia32_util.h"
71 #include "ia32_address_mode.h"
72 #include "ia32_architecture.h"
74 #include "gen_ia32_regalloc_if.h"
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
82 #define TP_SFP_SIGN "ia32_sfp_sign"
83 #define TP_DFP_SIGN "ia32_dfp_sign"
84 #define TP_SFP_ABS "ia32_sfp_abs"
85 #define TP_DFP_ABS "ia32_dfp_abs"
86 #define TP_INT_MAX "ia32_int_max"
88 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
89 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
90 #define ENT_SFP_ABS "IA32_SFP_ABS"
91 #define ENT_DFP_ABS "IA32_DFP_ABS"
92 #define ENT_INT_MAX "IA32_INT_MAX"
94 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
95 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
97 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
99 static ir_node *initial_fpcw = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *create_immediate_or_transform(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
132 dbg_info *dbgi, ir_node *block,
133 ir_node *op, ir_node *orig_node);
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node) {
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node) {
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node) {
147 return is_Const(node) && is_Const_all_one(node);
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_x87_Const(ir_node *node)
155 tarval *tv = get_Const_tarval(node);
156 if (tarval_is_null(tv) || tarval_is_one(tv))
159 /* TODO: match all the other float constants */
164 * returns true if constant can be created with a simple float command
166 static bool is_simple_sse_Const(ir_node *node)
168 tarval *tv = get_Const_tarval(node);
169 ir_mode *mode = get_tarval_mode(tv);
174 if (tarval_is_null(tv) || tarval_is_one(tv))
177 if (mode == mode_D) {
178 unsigned val = get_tarval_sub_bits(tv, 0) |
179 (get_tarval_sub_bits(tv, 1) << 8) |
180 (get_tarval_sub_bits(tv, 2) << 16) |
181 (get_tarval_sub_bits(tv, 3) << 24);
183 /* lower 32bit are zero, really a 32bit constant */
187 /* TODO: match all the other float constants */
192 * Transforms a Const.
194 static ir_node *gen_Const(ir_node *node) {
195 ir_graph *irg = current_ir_graph;
196 ir_node *old_block = get_nodes_block(node);
197 ir_node *block = be_transform_node(old_block);
198 dbg_info *dbgi = get_irn_dbg_info(node);
199 ir_mode *mode = get_irn_mode(node);
201 assert(is_Const(node));
203 if (mode_is_float(mode)) {
205 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
206 ir_node *nomem = new_NoMem();
210 if (ia32_cg_config.use_sse2) {
211 tarval *tv = get_Const_tarval(node);
212 if (tarval_is_null(tv)) {
213 load = new_rd_ia32_xZero(dbgi, irg, block);
214 set_ia32_ls_mode(load, mode);
216 } else if (tarval_is_one(tv)) {
217 int cnst = mode == mode_F ? 26 : 55;
218 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
219 ir_node *imm2 = create_Immediate(NULL, 0, 2);
220 ir_node *pslld, *psrld;
222 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
223 set_ia32_ls_mode(load, mode);
224 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
225 set_ia32_ls_mode(pslld, mode);
226 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
227 set_ia32_ls_mode(psrld, mode);
229 } else if (mode == mode_F) {
230 /* we can place any 32bit constant by using a movd gp, sse */
231 unsigned val = get_tarval_sub_bits(tv, 0) |
232 (get_tarval_sub_bits(tv, 1) << 8) |
233 (get_tarval_sub_bits(tv, 2) << 16) |
234 (get_tarval_sub_bits(tv, 3) << 24);
235 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
236 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
237 set_ia32_ls_mode(load, mode);
240 if (mode == mode_D) {
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
246 ir_node *imm32 = create_Immediate(NULL, 0, 32);
247 ir_node *cnst, *psllq;
249 /* fine, lower 32bit are zero, produce 32bit value */
250 val = get_tarval_sub_bits(tv, 4) |
251 (get_tarval_sub_bits(tv, 5) << 8) |
252 (get_tarval_sub_bits(tv, 6) << 16) |
253 (get_tarval_sub_bits(tv, 7) << 24);
254 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
255 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
256 set_ia32_ls_mode(load, mode);
257 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
258 set_ia32_ls_mode(psllq, mode);
263 floatent = create_float_const_entity(node);
265 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
267 set_ia32_op_type(load, ia32_AddrModeS);
268 set_ia32_am_sc(load, floatent);
269 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
270 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
273 if (is_Const_null(node)) {
274 load = new_rd_ia32_vfldz(dbgi, irg, block);
276 set_ia32_ls_mode(load, mode);
277 } else if (is_Const_one(node)) {
278 load = new_rd_ia32_vfld1(dbgi, irg, block);
280 set_ia32_ls_mode(load, mode);
282 floatent = create_float_const_entity(node);
284 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
285 set_ia32_op_type(load, ia32_AddrModeS);
286 set_ia32_am_sc(load, floatent);
287 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
288 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
289 /* take the mode from the entity */
290 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
294 /* Const Nodes before the initial IncSP are a bad idea, because
295 * they could be spilled and we have no SP ready at that point yet.
296 * So add a dependency to the initial frame pointer calculation to
297 * avoid that situation.
299 if (get_irg_start_block(irg) == block) {
300 add_irn_dep(load, get_irg_frame(irg));
303 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
305 } else { /* non-float mode */
307 tarval *tv = get_Const_tarval(node);
310 tv = tarval_convert_to(tv, mode_Iu);
312 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
314 panic("couldn't convert constant tarval (%+F)", node);
316 val = get_tarval_long(tv);
318 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
319 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
322 if (get_irg_start_block(irg) == block) {
323 add_irn_dep(cnst, get_irg_frame(irg));
331 * Transforms a SymConst.
333 static ir_node *gen_SymConst(ir_node *node) {
334 ir_graph *irg = current_ir_graph;
335 ir_node *old_block = get_nodes_block(node);
336 ir_node *block = be_transform_node(old_block);
337 dbg_info *dbgi = get_irn_dbg_info(node);
338 ir_mode *mode = get_irn_mode(node);
341 if (mode_is_float(mode)) {
342 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
343 ir_node *nomem = new_NoMem();
345 if (ia32_cg_config.use_sse2)
346 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
348 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if(get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
361 /* Const Nodes before the initial IncSP are a bad idea, because
362 * they could be spilled and we have no SP ready at that point yet
364 if (get_irg_start_block(irg) == block) {
365 add_irn_dep(cnst, get_irg_frame(irg));
368 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
373 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
374 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
375 static const struct {
377 const char *ent_name;
378 const char *cnst_str;
381 } names [ia32_known_const_max] = {
382 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
383 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
384 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
385 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
386 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
388 static ir_entity *ent_cache[ia32_known_const_max];
390 const char *tp_name, *ent_name, *cnst_str;
398 ent_name = names[kct].ent_name;
399 if (! ent_cache[kct]) {
400 tp_name = names[kct].tp_name;
401 cnst_str = names[kct].cnst_str;
403 switch (names[kct].mode) {
404 case 0: mode = mode_Iu; break;
405 case 1: mode = mode_Lu; break;
406 default: mode = mode_F; break;
408 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
409 tp = new_type_primitive(new_id_from_str(tp_name), mode);
410 /* set the specified alignment */
411 set_type_alignment_bytes(tp, names[kct].align);
413 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
415 set_entity_ld_ident(ent, get_entity_ident(ent));
416 set_entity_visibility(ent, visibility_local);
417 set_entity_variability(ent, variability_constant);
418 set_entity_allocation(ent, allocation_static);
420 /* we create a new entity here: It's initialization must resist on the
422 rem = current_ir_graph;
423 current_ir_graph = get_const_code_irg();
424 cnst = new_Const(mode, tv);
425 current_ir_graph = rem;
427 set_atomic_ent_value(ent, cnst);
429 /* cache the entry */
430 ent_cache[kct] = ent;
433 return ent_cache[kct];
437 * return true if the node is a Proj(Load) and could be used in source address
438 * mode for another node. Will return only true if the @p other node is not
439 * dependent on the memory of the Load (for binary operations use the other
440 * input here, for unary operations use NULL).
442 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
443 ir_node *other, ir_node *other2, match_flags_t flags)
448 /* float constants are always available */
449 if (is_Const(node)) {
450 ir_mode *mode = get_irn_mode(node);
451 if (mode_is_float(mode)) {
452 if (ia32_cg_config.use_sse2) {
453 if (is_simple_sse_Const(node))
456 if (is_simple_x87_Const(node))
459 if (get_irn_n_edges(node) > 1)
467 load = get_Proj_pred(node);
468 pn = get_Proj_proj(node);
469 if (!is_Load(load) || pn != pn_Load_res)
471 if (get_nodes_block(load) != block)
473 /* we only use address mode if we're the only user of the load */
474 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
476 /* in some edge cases with address mode we might reach the load normally
477 * and through some AM sequence, if it is already materialized then we
478 * can't create an AM node from it */
479 if (be_is_transformed(node))
482 /* don't do AM if other node inputs depend on the load (via mem-proj) */
483 if (other != NULL && prevents_AM(block, load, other))
486 if (other2 != NULL && prevents_AM(block, load, other2))
492 typedef struct ia32_address_mode_t ia32_address_mode_t;
493 struct ia32_address_mode_t {
498 ia32_op_type_t op_type;
502 unsigned commutative : 1;
503 unsigned ins_permuted : 1;
506 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
510 /* construct load address */
511 memset(addr, 0, sizeof(addr[0]));
512 ia32_create_address_mode(addr, ptr, /*force=*/0);
514 noreg_gp = ia32_new_NoReg_gp(env_cg);
515 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
516 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
517 addr->mem = be_transform_node(mem);
520 static void build_address(ia32_address_mode_t *am, ir_node *node)
522 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
523 ia32_address_t *addr = &am->addr;
529 if (is_Const(node)) {
530 ir_entity *entity = create_float_const_entity(node);
531 addr->base = noreg_gp;
532 addr->index = noreg_gp;
533 addr->mem = new_NoMem();
534 addr->symconst_ent = entity;
536 am->ls_mode = get_type_mode(get_entity_type(entity));
537 am->pinned = op_pin_state_floats;
541 load = get_Proj_pred(node);
542 ptr = get_Load_ptr(load);
543 mem = get_Load_mem(load);
544 new_mem = be_transform_node(mem);
545 am->pinned = get_irn_pinned(load);
546 am->ls_mode = get_Load_mode(load);
547 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
550 /* construct load address */
551 ia32_create_address_mode(addr, ptr, /*force=*/0);
553 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
554 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
558 static void set_address(ir_node *node, const ia32_address_t *addr)
560 set_ia32_am_scale(node, addr->scale);
561 set_ia32_am_sc(node, addr->symconst_ent);
562 set_ia32_am_offs_int(node, addr->offset);
563 if(addr->symconst_sign)
564 set_ia32_am_sc_sign(node);
566 set_ia32_use_frame(node);
567 set_ia32_frame_ent(node, addr->frame_entity);
571 * Apply attributes of a given address mode to a node.
573 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
575 set_address(node, &am->addr);
577 set_ia32_op_type(node, am->op_type);
578 set_ia32_ls_mode(node, am->ls_mode);
579 if (am->pinned == op_pin_state_pinned) {
580 /* beware: some nodes are already pinned and did not allow to change the state */
581 if (get_irn_pinned(node) != op_pin_state_pinned)
582 set_irn_pinned(node, op_pin_state_pinned);
585 set_ia32_commutative(node);
589 * Check, if a given node is a Down-Conv, ie. a integer Conv
590 * from a mode with a mode with more bits to a mode with lesser bits.
591 * Moreover, we return only true if the node has not more than 1 user.
593 * @param node the node
594 * @return non-zero if node is a Down-Conv
596 static int is_downconv(const ir_node *node)
604 /* we only want to skip the conv when we're the only user
605 * (not optimal but for now...)
607 if(get_irn_n_edges(node) > 1)
610 src_mode = get_irn_mode(get_Conv_op(node));
611 dest_mode = get_irn_mode(node);
612 return ia32_mode_needs_gp_reg(src_mode)
613 && ia32_mode_needs_gp_reg(dest_mode)
614 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
617 /* Skip all Down-Conv's on a given node and return the resulting node. */
618 ir_node *ia32_skip_downconv(ir_node *node) {
619 while (is_downconv(node))
620 node = get_Conv_op(node);
625 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
627 ir_mode *mode = get_irn_mode(node);
632 if(mode_is_signed(mode)) {
637 block = get_nodes_block(node);
638 dbgi = get_irn_dbg_info(node);
640 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
644 * matches operands of a node into ia32 addressing/operand modes. This covers
645 * usage of source address mode, immediates, operations with non 32-bit modes,
647 * The resulting data is filled into the @p am struct. block is the block
648 * of the node whose arguments are matched. op1, op2 are the first and second
649 * input that are matched (op1 may be NULL). other_op is another unrelated
650 * input that is not matched! but which is needed sometimes to check if AM
651 * for op1/op2 is legal.
652 * @p flags describes the supported modes of the operation in detail.
654 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
655 ir_node *op1, ir_node *op2, ir_node *other_op,
658 ia32_address_t *addr = &am->addr;
659 ir_mode *mode = get_irn_mode(op2);
660 int mode_bits = get_mode_size_bits(mode);
661 ir_node *noreg_gp, *new_op1, *new_op2;
663 unsigned commutative;
664 int use_am_and_immediates;
667 memset(am, 0, sizeof(am[0]));
669 commutative = (flags & match_commutative) != 0;
670 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
671 use_am = (flags & match_am) != 0;
672 use_immediate = (flags & match_immediate) != 0;
673 assert(!use_am_and_immediates || use_immediate);
676 assert(!commutative || op1 != NULL);
677 assert(use_am || !(flags & match_8bit_am));
678 assert(use_am || !(flags & match_16bit_am));
680 if (mode_bits == 8) {
681 if (!(flags & match_8bit_am))
683 /* we don't automatically add upconvs yet */
684 assert((flags & match_mode_neutral) || (flags & match_8bit));
685 } else if (mode_bits == 16) {
686 if (!(flags & match_16bit_am))
688 /* we don't automatically add upconvs yet */
689 assert((flags & match_mode_neutral) || (flags & match_16bit));
692 /* we can simply skip downconvs for mode neutral nodes: the upper bits
693 * can be random for these operations */
694 if (flags & match_mode_neutral) {
695 op2 = ia32_skip_downconv(op2);
697 op1 = ia32_skip_downconv(op1);
701 /* match immediates. firm nodes are normalized: constants are always on the
704 if (!(flags & match_try_am) && use_immediate) {
705 new_op2 = try_create_Immediate(op2, 0);
708 noreg_gp = ia32_new_NoReg_gp(env_cg);
709 if (new_op2 == NULL &&
710 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
711 build_address(am, op2);
712 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
713 if (mode_is_float(mode)) {
714 new_op2 = ia32_new_NoReg_vfp(env_cg);
718 am->op_type = ia32_AddrModeS;
719 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
721 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
723 build_address(am, op1);
725 if (mode_is_float(mode)) {
726 noreg = ia32_new_NoReg_vfp(env_cg);
731 if (new_op2 != NULL) {
734 new_op1 = be_transform_node(op2);
736 am->ins_permuted = 1;
738 am->op_type = ia32_AddrModeS;
740 if (flags & match_try_am) {
743 am->op_type = ia32_Normal;
747 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
749 new_op2 = be_transform_node(op2);
750 am->op_type = ia32_Normal;
751 am->ls_mode = get_irn_mode(op2);
752 if (flags & match_mode_neutral)
753 am->ls_mode = mode_Iu;
755 if (addr->base == NULL)
756 addr->base = noreg_gp;
757 if (addr->index == NULL)
758 addr->index = noreg_gp;
759 if (addr->mem == NULL)
760 addr->mem = new_NoMem();
762 am->new_op1 = new_op1;
763 am->new_op2 = new_op2;
764 am->commutative = commutative;
767 static void set_transformed_and_mark(ir_node *const old_node, ir_node *const new_node)
769 mark_irn_visited(old_node);
770 be_set_transformed_node(old_node, new_node);
773 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
778 if (am->mem_proj == NULL)
781 /* we have to create a mode_T so the old MemProj can attach to us */
782 mode = get_irn_mode(node);
783 load = get_Proj_pred(am->mem_proj);
785 set_transformed_and_mark(load, node);
787 if (mode != mode_T) {
788 set_irn_mode(node, mode_T);
789 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
796 * Construct a standard binary operation, set AM and immediate if required.
798 * @param node The original node for which the binop is created
799 * @param op1 The first operand
800 * @param op2 The second operand
801 * @param func The node constructor function
802 * @return The constructed ia32 node.
804 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
805 construct_binop_func *func, match_flags_t flags)
808 ir_node *block, *new_block, *new_node;
809 ia32_address_mode_t am;
810 ia32_address_t *addr = &am.addr;
812 block = get_nodes_block(node);
813 match_arguments(&am, block, op1, op2, NULL, flags);
815 dbgi = get_irn_dbg_info(node);
816 new_block = be_transform_node(block);
817 new_node = func(dbgi, current_ir_graph, new_block,
818 addr->base, addr->index, addr->mem,
819 am.new_op1, am.new_op2);
820 set_am_attributes(new_node, &am);
821 /* we can't use source address mode anymore when using immediates */
822 if (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
823 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
824 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
826 new_node = fix_mem_proj(new_node, &am);
833 n_ia32_l_binop_right,
834 n_ia32_l_binop_eflags
836 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
837 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
838 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
839 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
840 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
841 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
844 * Construct a binary operation which also consumes the eflags.
846 * @param node The node to transform
847 * @param func The node constructor function
848 * @param flags The match flags
849 * @return The constructor ia32 node
851 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
854 ir_node *src_block = get_nodes_block(node);
855 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
856 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
857 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
859 ir_node *block, *new_node, *new_eflags;
860 ia32_address_mode_t am;
861 ia32_address_t *addr = &am.addr;
863 match_arguments(&am, src_block, op1, op2, eflags, flags);
865 dbgi = get_irn_dbg_info(node);
866 block = be_transform_node(src_block);
867 new_eflags = be_transform_node(eflags);
868 new_node = func(dbgi, current_ir_graph, block, addr->base, addr->index,
869 addr->mem, am.new_op1, am.new_op2, new_eflags);
870 set_am_attributes(new_node, &am);
871 /* we can't use source address mode anymore when using immediates */
872 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
873 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
874 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
876 new_node = fix_mem_proj(new_node, &am);
881 static ir_node *get_fpcw(void)
884 if (initial_fpcw != NULL)
887 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
888 &ia32_fp_cw_regs[REG_FPCW]);
889 initial_fpcw = be_transform_node(fpcw);
895 * Construct a standard binary operation, set AM and immediate if required.
897 * @param op1 The first operand
898 * @param op2 The second operand
899 * @param func The node constructor function
900 * @return The constructed ia32 node.
902 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
903 construct_binop_float_func *func,
906 ir_mode *mode = get_irn_mode(node);
908 ir_node *block, *new_block, *new_node;
909 ia32_address_mode_t am;
910 ia32_address_t *addr = &am.addr;
912 /* cannot use address mode with long double on x87 */
913 if (get_mode_size_bits(mode) > 64)
916 block = get_nodes_block(node);
917 match_arguments(&am, block, op1, op2, NULL, flags);
919 dbgi = get_irn_dbg_info(node);
920 new_block = be_transform_node(block);
921 new_node = func(dbgi, current_ir_graph, new_block,
922 addr->base, addr->index, addr->mem,
923 am.new_op1, am.new_op2, get_fpcw());
924 set_am_attributes(new_node, &am);
926 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
928 new_node = fix_mem_proj(new_node, &am);
934 * Construct a shift/rotate binary operation, sets AM and immediate if required.
936 * @param op1 The first operand
937 * @param op2 The second operand
938 * @param func The node constructor function
939 * @return The constructed ia32 node.
941 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
942 construct_shift_func *func,
946 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
948 assert(! mode_is_float(get_irn_mode(node)));
949 assert(flags & match_immediate);
950 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
952 if (flags & match_mode_neutral) {
953 op1 = ia32_skip_downconv(op1);
954 new_op1 = be_transform_node(op1);
955 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
956 new_op1 = create_upconv(op1, node);
958 new_op1 = be_transform_node(op1);
961 /* the shift amount can be any mode that is bigger than 5 bits, since all
962 * other bits are ignored anyway */
963 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
964 ir_node *const op = get_Conv_op(op2);
965 if (mode_is_float(get_irn_mode(op)))
968 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
970 new_op2 = create_immediate_or_transform(op2, 0);
972 dbgi = get_irn_dbg_info(node);
973 block = get_nodes_block(node);
974 new_block = be_transform_node(block);
975 new_node = func(dbgi, current_ir_graph, new_block, new_op1, new_op2);
976 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
978 /* lowered shift instruction may have a dependency operand, handle it here */
979 if (get_irn_arity(node) == 3) {
980 /* we have a dependency */
981 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
982 add_irn_dep(new_node, new_dep);
990 * Construct a standard unary operation, set AM and immediate if required.
992 * @param op The operand
993 * @param func The node constructor function
994 * @return The constructed ia32 node.
996 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1000 ir_node *block, *new_block, *new_op, *new_node;
1002 assert(flags == 0 || flags == match_mode_neutral);
1003 if (flags & match_mode_neutral) {
1004 op = ia32_skip_downconv(op);
1007 new_op = be_transform_node(op);
1008 dbgi = get_irn_dbg_info(node);
1009 block = get_nodes_block(node);
1010 new_block = be_transform_node(block);
1011 new_node = func(dbgi, current_ir_graph, new_block, new_op);
1013 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1018 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1019 ia32_address_t *addr)
1021 ir_node *base, *index, *res;
1025 base = ia32_new_NoReg_gp(env_cg);
1027 base = be_transform_node(base);
1030 index = addr->index;
1031 if (index == NULL) {
1032 index = ia32_new_NoReg_gp(env_cg);
1034 index = be_transform_node(index);
1037 res = new_rd_ia32_Lea(dbgi, current_ir_graph, block, base, index);
1038 set_address(res, addr);
1044 * Returns non-zero if a given address mode has a symbolic or
1045 * numerical offset != 0.
1047 static int am_has_immediates(const ia32_address_t *addr)
1049 return addr->offset != 0 || addr->symconst_ent != NULL
1050 || addr->frame_entity || addr->use_frame;
1054 * Creates an ia32 Add.
1056 * @return the created ia32 Add node
1058 static ir_node *gen_Add(ir_node *node) {
1059 ir_mode *mode = get_irn_mode(node);
1060 ir_node *op1 = get_Add_left(node);
1061 ir_node *op2 = get_Add_right(node);
1063 ir_node *block, *new_block, *new_node, *add_immediate_op;
1064 ia32_address_t addr;
1065 ia32_address_mode_t am;
1067 if (mode_is_float(mode)) {
1068 if (ia32_cg_config.use_sse2)
1069 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1070 match_commutative | match_am);
1072 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1073 match_commutative | match_am);
1076 ia32_mark_non_am(node);
1078 op2 = ia32_skip_downconv(op2);
1079 op1 = ia32_skip_downconv(op1);
1083 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1084 * 1. Add with immediate -> Lea
1085 * 2. Add with possible source address mode -> Add
1086 * 3. Otherwise -> Lea
1088 memset(&addr, 0, sizeof(addr));
1089 ia32_create_address_mode(&addr, node, /*force=*/1);
1090 add_immediate_op = NULL;
1092 dbgi = get_irn_dbg_info(node);
1093 block = get_nodes_block(node);
1094 new_block = be_transform_node(block);
1097 if(addr.base == NULL && addr.index == NULL) {
1098 ir_graph *irg = current_ir_graph;
1099 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1100 addr.symconst_sign, addr.offset);
1101 add_irn_dep(new_node, get_irg_frame(irg));
1102 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1105 /* add with immediate? */
1106 if(addr.index == NULL) {
1107 add_immediate_op = addr.base;
1108 } else if(addr.base == NULL && addr.scale == 0) {
1109 add_immediate_op = addr.index;
1112 if(add_immediate_op != NULL) {
1113 if(!am_has_immediates(&addr)) {
1114 #ifdef DEBUG_libfirm
1115 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1118 return be_transform_node(add_immediate_op);
1121 new_node = create_lea_from_address(dbgi, new_block, &addr);
1122 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1126 /* test if we can use source address mode */
1127 match_arguments(&am, block, op1, op2, NULL, match_commutative
1128 | match_mode_neutral | match_am | match_immediate | match_try_am);
1130 /* construct an Add with source address mode */
1131 if (am.op_type == ia32_AddrModeS) {
1132 ir_graph *irg = current_ir_graph;
1133 ia32_address_t *am_addr = &am.addr;
1134 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1135 am_addr->index, am_addr->mem, am.new_op1,
1137 set_am_attributes(new_node, &am);
1138 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1140 new_node = fix_mem_proj(new_node, &am);
1145 /* otherwise construct a lea */
1146 new_node = create_lea_from_address(dbgi, new_block, &addr);
1147 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1152 * Creates an ia32 Mul.
1154 * @return the created ia32 Mul node
1156 static ir_node *gen_Mul(ir_node *node) {
1157 ir_node *op1 = get_Mul_left(node);
1158 ir_node *op2 = get_Mul_right(node);
1159 ir_mode *mode = get_irn_mode(node);
1161 if (mode_is_float(mode)) {
1162 if (ia32_cg_config.use_sse2)
1163 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1164 match_commutative | match_am);
1166 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1167 match_commutative | match_am);
1169 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1170 match_commutative | match_am | match_mode_neutral |
1171 match_immediate | match_am_and_immediates);
1175 * Creates an ia32 Mulh.
1176 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1177 * this result while Mul returns the lower 32 bit.
1179 * @return the created ia32 Mulh node
1181 static ir_node *gen_Mulh(ir_node *node)
1183 ir_node *block = get_nodes_block(node);
1184 ir_node *new_block = be_transform_node(block);
1185 ir_graph *irg = current_ir_graph;
1186 dbg_info *dbgi = get_irn_dbg_info(node);
1187 ir_mode *mode = get_irn_mode(node);
1188 ir_node *op1 = get_Mulh_left(node);
1189 ir_node *op2 = get_Mulh_right(node);
1190 ir_node *proj_res_high;
1192 ia32_address_mode_t am;
1193 ia32_address_t *addr = &am.addr;
1195 assert(!mode_is_float(mode) && "Mulh with float not supported");
1196 assert(get_mode_size_bits(mode) == 32);
1198 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1200 if (mode_is_signed(mode)) {
1201 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1202 addr->index, addr->mem, am.new_op1,
1205 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1206 addr->index, addr->mem, am.new_op1,
1210 set_am_attributes(new_node, &am);
1211 /* we can't use source address mode anymore when using immediates */
1212 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1213 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1214 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1216 assert(get_irn_mode(new_node) == mode_T);
1218 fix_mem_proj(new_node, &am);
1220 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1221 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1222 mode_Iu, pn_ia32_IMul1OP_res_high);
1224 return proj_res_high;
1230 * Creates an ia32 And.
1232 * @return The created ia32 And node
1234 static ir_node *gen_And(ir_node *node) {
1235 ir_node *op1 = get_And_left(node);
1236 ir_node *op2 = get_And_right(node);
1237 assert(! mode_is_float(get_irn_mode(node)));
1239 /* is it a zero extension? */
1240 if (is_Const(op2)) {
1241 tarval *tv = get_Const_tarval(op2);
1242 long v = get_tarval_long(tv);
1244 if (v == 0xFF || v == 0xFFFF) {
1245 dbg_info *dbgi = get_irn_dbg_info(node);
1246 ir_node *block = get_nodes_block(node);
1253 assert(v == 0xFFFF);
1256 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1261 return gen_binop(node, op1, op2, new_rd_ia32_And,
1262 match_commutative | match_mode_neutral | match_am
1269 * Creates an ia32 Or.
1271 * @return The created ia32 Or node
1273 static ir_node *gen_Or(ir_node *node) {
1274 ir_node *op1 = get_Or_left(node);
1275 ir_node *op2 = get_Or_right(node);
1277 assert (! mode_is_float(get_irn_mode(node)));
1278 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1279 | match_mode_neutral | match_am | match_immediate);
1285 * Creates an ia32 Eor.
1287 * @return The created ia32 Eor node
1289 static ir_node *gen_Eor(ir_node *node) {
1290 ir_node *op1 = get_Eor_left(node);
1291 ir_node *op2 = get_Eor_right(node);
1293 assert(! mode_is_float(get_irn_mode(node)));
1294 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1295 | match_mode_neutral | match_am | match_immediate);
1300 * Creates an ia32 Sub.
1302 * @return The created ia32 Sub node
1304 static ir_node *gen_Sub(ir_node *node) {
1305 ir_node *op1 = get_Sub_left(node);
1306 ir_node *op2 = get_Sub_right(node);
1307 ir_mode *mode = get_irn_mode(node);
1309 if (mode_is_float(mode)) {
1310 if (ia32_cg_config.use_sse2)
1311 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1313 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1317 if (is_Const(op2)) {
1318 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1322 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1323 | match_am | match_immediate);
1326 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1327 ir_node *const src_val,
1328 ir_node *const src_mem,
1329 ir_node *const am_mem)
1331 if (is_NoMem(am_mem)) {
1332 return be_transform_node(src_mem);
1333 } else if (is_Proj(src_val) &&
1335 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1336 /* avoid memory loop */
1338 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1339 ir_node *const ptr_pred = get_Proj_pred(src_val);
1340 int const arity = get_Sync_n_preds(src_mem);
1345 NEW_ARR_A(ir_node*, ins, arity + 1);
1347 for (i = arity - 1; i >= 0; --i) {
1348 ir_node *const pred = get_Sync_pred(src_mem, i);
1350 /* avoid memory loop */
1351 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1354 ins[n++] = be_transform_node(pred);
1359 return new_r_Sync(irg, block, n, ins);
1363 ins[0] = be_transform_node(src_mem);
1365 return new_r_Sync(irg, block, 2, ins);
1370 * Generates an ia32 DivMod with additional infrastructure for the
1371 * register allocator if needed.
1373 static ir_node *create_Div(ir_node *node)
1375 ir_graph *irg = current_ir_graph;
1376 dbg_info *dbgi = get_irn_dbg_info(node);
1377 ir_node *block = get_nodes_block(node);
1378 ir_node *new_block = be_transform_node(block);
1385 ir_node *sign_extension;
1386 ia32_address_mode_t am;
1387 ia32_address_t *addr = &am.addr;
1389 /* the upper bits have random contents for smaller modes */
1390 switch (get_irn_opcode(node)) {
1392 op1 = get_Div_left(node);
1393 op2 = get_Div_right(node);
1394 mem = get_Div_mem(node);
1395 mode = get_Div_resmode(node);
1398 op1 = get_Mod_left(node);
1399 op2 = get_Mod_right(node);
1400 mem = get_Mod_mem(node);
1401 mode = get_Mod_resmode(node);
1404 op1 = get_DivMod_left(node);
1405 op2 = get_DivMod_right(node);
1406 mem = get_DivMod_mem(node);
1407 mode = get_DivMod_resmode(node);
1410 panic("invalid divmod node %+F", node);
1413 match_arguments(&am, block, op1, op2, NULL, match_am);
1415 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1416 is the memory of the consumed address. We can have only the second op as address
1417 in Div nodes, so check only op2. */
1418 new_mem = transform_AM_mem(irg, block, op2, mem, addr->mem);
1420 if (mode_is_signed(mode)) {
1421 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1422 add_irn_dep(produceval, get_irg_frame(irg));
1423 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1426 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1427 addr->index, new_mem, am.new_op2,
1428 am.new_op1, sign_extension);
1430 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1431 add_irn_dep(sign_extension, get_irg_frame(irg));
1433 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1434 addr->index, new_mem, am.new_op2,
1435 am.new_op1, sign_extension);
1438 set_irn_pinned(new_node, get_irn_pinned(node));
1440 set_am_attributes(new_node, &am);
1441 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1443 new_node = fix_mem_proj(new_node, &am);
1449 static ir_node *gen_Mod(ir_node *node) {
1450 return create_Div(node);
1453 static ir_node *gen_Div(ir_node *node) {
1454 return create_Div(node);
1457 static ir_node *gen_DivMod(ir_node *node) {
1458 return create_Div(node);
1464 * Creates an ia32 floating Div.
1466 * @return The created ia32 xDiv node
1468 static ir_node *gen_Quot(ir_node *node)
1470 ir_node *op1 = get_Quot_left(node);
1471 ir_node *op2 = get_Quot_right(node);
1473 if (ia32_cg_config.use_sse2) {
1474 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1476 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1482 * Creates an ia32 Shl.
1484 * @return The created ia32 Shl node
1486 static ir_node *gen_Shl(ir_node *node) {
1487 ir_node *left = get_Shl_left(node);
1488 ir_node *right = get_Shl_right(node);
1490 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1491 match_mode_neutral | match_immediate);
1495 * Creates an ia32 Shr.
1497 * @return The created ia32 Shr node
1499 static ir_node *gen_Shr(ir_node *node) {
1500 ir_node *left = get_Shr_left(node);
1501 ir_node *right = get_Shr_right(node);
1503 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1509 * Creates an ia32 Sar.
1511 * @return The created ia32 Shrs node
1513 static ir_node *gen_Shrs(ir_node *node) {
1514 ir_node *left = get_Shrs_left(node);
1515 ir_node *right = get_Shrs_right(node);
1516 ir_mode *mode = get_irn_mode(node);
1518 if(is_Const(right) && mode == mode_Is) {
1519 tarval *tv = get_Const_tarval(right);
1520 long val = get_tarval_long(tv);
1522 /* this is a sign extension */
1523 ir_graph *irg = current_ir_graph;
1524 dbg_info *dbgi = get_irn_dbg_info(node);
1525 ir_node *block = be_transform_node(get_nodes_block(node));
1527 ir_node *new_op = be_transform_node(op);
1528 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1529 add_irn_dep(pval, get_irg_frame(irg));
1531 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1535 /* 8 or 16 bit sign extension? */
1536 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1537 ir_node *shl_left = get_Shl_left(left);
1538 ir_node *shl_right = get_Shl_right(left);
1539 if(is_Const(shl_right)) {
1540 tarval *tv1 = get_Const_tarval(right);
1541 tarval *tv2 = get_Const_tarval(shl_right);
1542 if(tv1 == tv2 && tarval_is_long(tv1)) {
1543 long val = get_tarval_long(tv1);
1544 if(val == 16 || val == 24) {
1545 dbg_info *dbgi = get_irn_dbg_info(node);
1546 ir_node *block = get_nodes_block(node);
1556 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1565 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1571 * Creates an ia32 Rol.
1573 * @param op1 The first operator
1574 * @param op2 The second operator
1575 * @return The created ia32 RotL node
1577 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2) {
1578 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1584 * Creates an ia32 Ror.
1585 * NOTE: There is no RotR with immediate because this would always be a RotL
1586 * "imm-mode_size_bits" which can be pre-calculated.
1588 * @param op1 The first operator
1589 * @param op2 The second operator
1590 * @return The created ia32 RotR node
1592 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2) {
1593 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1599 * Creates an ia32 RotR or RotL (depending on the found pattern).
1601 * @return The created ia32 RotL or RotR node
1603 static ir_node *gen_Rotl(ir_node *node) {
1604 ir_node *rotate = NULL;
1605 ir_node *op1 = get_Rotl_left(node);
1606 ir_node *op2 = get_Rotl_right(node);
1608 /* Firm has only RotL, so we are looking for a right (op2)
1609 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1610 that means we can create a RotR instead of an Add and a RotL */
1614 ir_node *left = get_Add_left(add);
1615 ir_node *right = get_Add_right(add);
1616 if (is_Const(right)) {
1617 tarval *tv = get_Const_tarval(right);
1618 ir_mode *mode = get_irn_mode(node);
1619 long bits = get_mode_size_bits(mode);
1621 if (is_Minus(left) &&
1622 tarval_is_long(tv) &&
1623 get_tarval_long(tv) == bits &&
1626 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1627 rotate = gen_Ror(node, op1, get_Minus_op(left));
1632 if (rotate == NULL) {
1633 rotate = gen_Rol(node, op1, op2);
1642 * Transforms a Minus node.
1644 * @return The created ia32 Minus node
1646 static ir_node *gen_Minus(ir_node *node)
1648 ir_node *op = get_Minus_op(node);
1649 ir_node *block = be_transform_node(get_nodes_block(node));
1650 ir_graph *irg = current_ir_graph;
1651 dbg_info *dbgi = get_irn_dbg_info(node);
1652 ir_mode *mode = get_irn_mode(node);
1657 if (mode_is_float(mode)) {
1658 ir_node *new_op = be_transform_node(op);
1659 if (ia32_cg_config.use_sse2) {
1660 /* TODO: non-optimal... if we have many xXors, then we should
1661 * rather create a load for the const and use that instead of
1662 * several AM nodes... */
1663 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1664 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1665 ir_node *nomem = new_rd_NoMem(irg);
1667 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1668 nomem, new_op, noreg_xmm);
1670 size = get_mode_size_bits(mode);
1671 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1673 set_ia32_am_sc(new_node, ent);
1674 set_ia32_op_type(new_node, ia32_AddrModeS);
1675 set_ia32_ls_mode(new_node, mode);
1677 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1680 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1683 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1689 * Transforms a Not node.
1691 * @return The created ia32 Not node
1693 static ir_node *gen_Not(ir_node *node) {
1694 ir_node *op = get_Not_op(node);
1696 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1697 assert (! mode_is_float(get_irn_mode(node)));
1699 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1705 * Transforms an Abs node.
1707 * @return The created ia32 Abs node
1709 static ir_node *gen_Abs(ir_node *node)
1711 ir_node *block = get_nodes_block(node);
1712 ir_node *new_block = be_transform_node(block);
1713 ir_node *op = get_Abs_op(node);
1714 ir_graph *irg = current_ir_graph;
1715 dbg_info *dbgi = get_irn_dbg_info(node);
1716 ir_mode *mode = get_irn_mode(node);
1717 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1718 ir_node *nomem = new_NoMem();
1724 if (mode_is_float(mode)) {
1725 new_op = be_transform_node(op);
1727 if (ia32_cg_config.use_sse2) {
1728 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1729 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1730 nomem, new_op, noreg_fp);
1732 size = get_mode_size_bits(mode);
1733 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1735 set_ia32_am_sc(new_node, ent);
1737 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1739 set_ia32_op_type(new_node, ia32_AddrModeS);
1740 set_ia32_ls_mode(new_node, mode);
1742 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1743 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1746 ir_node *xor, *pval, *sign_extension;
1748 if (get_mode_size_bits(mode) == 32) {
1749 new_op = be_transform_node(op);
1751 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1754 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1755 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1758 add_irn_dep(pval, get_irg_frame(irg));
1759 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1761 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1762 nomem, new_op, sign_extension);
1763 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1765 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1766 nomem, xor, sign_extension);
1767 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1774 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1776 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n) {
1777 dbg_info *dbgi = get_irn_dbg_info(cmp);
1778 ir_node *block = get_nodes_block(cmp);
1779 ir_node *new_block = be_transform_node(block);
1780 ir_node *op1 = be_transform_node(x);
1781 ir_node *op2 = be_transform_node(n);
1783 return new_rd_ia32_Bt(dbgi, current_ir_graph, new_block, op1, op2);
1787 * Transform a node returning a "flag" result.
1789 * @param node the node to transform
1790 * @param pnc_out the compare mode to use
1792 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1801 /* we have a Cmp as input */
1802 if (is_Proj(node)) {
1803 ir_node *pred = get_Proj_pred(node);
1805 pn_Cmp pnc = get_Proj_proj(node);
1806 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1807 ir_node *l = get_Cmp_left(pred);
1808 ir_node *r = get_Cmp_right(pred);
1810 ir_node *la = get_And_left(l);
1811 ir_node *ra = get_And_right(l);
1813 ir_node *c = get_Shl_left(la);
1814 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1815 /* (1 << n) & ra) */
1816 ir_node *n = get_Shl_right(la);
1817 flags = gen_bt(pred, ra, n);
1818 /* we must generate a Jc/Jnc jump */
1819 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1822 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1827 ir_node *c = get_Shl_left(ra);
1828 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1829 /* la & (1 << n)) */
1830 ir_node *n = get_Shl_right(ra);
1831 flags = gen_bt(pred, la, n);
1832 /* we must generate a Jc/Jnc jump */
1833 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1836 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1842 flags = be_transform_node(pred);
1848 /* a mode_b value, we have to compare it against 0 */
1849 dbgi = get_irn_dbg_info(node);
1850 new_block = be_transform_node(get_nodes_block(node));
1851 new_op = be_transform_node(node);
1852 noreg = ia32_new_NoReg_gp(env_cg);
1853 nomem = new_NoMem();
1854 flags = new_rd_ia32_Test(dbgi, current_ir_graph, new_block, noreg, noreg, nomem,
1855 new_op, new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1856 *pnc_out = pn_Cmp_Lg;
1861 * Transforms a Load.
1863 * @return the created ia32 Load node
1865 static ir_node *gen_Load(ir_node *node) {
1866 ir_node *old_block = get_nodes_block(node);
1867 ir_node *block = be_transform_node(old_block);
1868 ir_node *ptr = get_Load_ptr(node);
1869 ir_node *mem = get_Load_mem(node);
1870 ir_node *new_mem = be_transform_node(mem);
1873 ir_graph *irg = current_ir_graph;
1874 dbg_info *dbgi = get_irn_dbg_info(node);
1875 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1876 ir_mode *mode = get_Load_mode(node);
1879 ia32_address_t addr;
1881 /* construct load address */
1882 memset(&addr, 0, sizeof(addr));
1883 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1890 base = be_transform_node(base);
1896 index = be_transform_node(index);
1899 if (mode_is_float(mode)) {
1900 if (ia32_cg_config.use_sse2) {
1901 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1903 res_mode = mode_xmm;
1905 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1907 res_mode = mode_vfp;
1910 assert(mode != mode_b);
1912 /* create a conv node with address mode for smaller modes */
1913 if(get_mode_size_bits(mode) < 32) {
1914 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1915 new_mem, noreg, mode);
1917 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1922 set_irn_pinned(new_node, get_irn_pinned(node));
1923 set_ia32_op_type(new_node, ia32_AddrModeS);
1924 set_ia32_ls_mode(new_node, mode);
1925 set_address(new_node, &addr);
1927 if(get_irn_pinned(node) == op_pin_state_floats) {
1928 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1931 /* make sure we are scheduled behind the initial IncSP/Barrier
1932 * to avoid spills being placed before it
1934 if (block == get_irg_start_block(irg)) {
1935 add_irn_dep(new_node, get_irg_frame(irg));
1938 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1943 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1944 ir_node *ptr, ir_node *other)
1951 /* we only use address mode if we're the only user of the load */
1952 if (get_irn_n_edges(node) > 1)
1955 load = get_Proj_pred(node);
1958 if (get_nodes_block(load) != block)
1961 /* store should have the same pointer as the load */
1962 if (get_Load_ptr(load) != ptr)
1965 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1966 if (other != NULL &&
1967 get_nodes_block(other) == block &&
1968 heights_reachable_in_block(heights, other, load)) {
1975 for (i = get_Sync_n_preds(mem) - 1; i >= 0; --i) {
1976 ir_node *const pred = get_Sync_pred(mem, i);
1978 if (is_Proj(pred) && get_Proj_pred(pred) == load)
1981 if (get_nodes_block(pred) == block &&
1982 heights_reachable_in_block(heights, pred, load)) {
1987 /* Store should be attached to the load */
1988 if (!is_Proj(mem) || get_Proj_pred(mem) != load)
1995 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1996 ir_node *mem, ir_node *ptr, ir_mode *mode,
1997 construct_binop_dest_func *func,
1998 construct_binop_dest_func *func8bit,
1999 match_flags_t flags)
2001 ir_node *src_block = get_nodes_block(node);
2003 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
2004 ir_graph *irg = current_ir_graph;
2011 ia32_address_mode_t am;
2012 ia32_address_t *addr = &am.addr;
2013 memset(&am, 0, sizeof(am));
2015 assert(flags & match_dest_am);
2016 assert(flags & match_immediate); /* there is no destam node without... */
2017 commutative = (flags & match_commutative) != 0;
2019 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
2020 build_address(&am, op1);
2021 new_op = create_immediate_or_transform(op2, 0);
2022 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2023 build_address(&am, op2);
2024 new_op = create_immediate_or_transform(op1, 0);
2029 if(addr->base == NULL)
2030 addr->base = noreg_gp;
2031 if(addr->index == NULL)
2032 addr->index = noreg_gp;
2033 if(addr->mem == NULL)
2034 addr->mem = new_NoMem();
2036 dbgi = get_irn_dbg_info(node);
2037 block = be_transform_node(src_block);
2038 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2040 if(get_mode_size_bits(mode) == 8) {
2041 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2044 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem,
2047 set_address(new_node, addr);
2048 set_ia32_op_type(new_node, ia32_AddrModeD);
2049 set_ia32_ls_mode(new_node, mode);
2050 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2052 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
2053 mem_proj = be_transform_node(am.mem_proj);
2054 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2059 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2060 ir_node *ptr, ir_mode *mode,
2061 construct_unop_dest_func *func)
2063 ir_graph *irg = current_ir_graph;
2064 ir_node *src_block = get_nodes_block(node);
2070 ia32_address_mode_t am;
2071 ia32_address_t *addr = &am.addr;
2072 memset(&am, 0, sizeof(am));
2074 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2077 build_address(&am, op);
2079 dbgi = get_irn_dbg_info(node);
2080 block = be_transform_node(src_block);
2081 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2082 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem);
2083 set_address(new_node, addr);
2084 set_ia32_op_type(new_node, ia32_AddrModeD);
2085 set_ia32_ls_mode(new_node, mode);
2086 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2088 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
2089 mem_proj = be_transform_node(am.mem_proj);
2090 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2095 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2096 ir_mode *mode = get_irn_mode(node);
2097 ir_node *mux_true = get_Mux_true(node);
2098 ir_node *mux_false = get_Mux_false(node);
2109 ia32_address_t addr;
2111 if(get_mode_size_bits(mode) != 8)
2114 if(is_Const_1(mux_true) && is_Const_0(mux_false)) {
2116 } else if(is_Const_0(mux_true) && is_Const_1(mux_false)) {
2122 build_address_ptr(&addr, ptr, mem);
2124 irg = current_ir_graph;
2125 dbgi = get_irn_dbg_info(node);
2126 block = get_nodes_block(node);
2127 new_block = be_transform_node(block);
2128 cond = get_Mux_sel(node);
2129 flags = get_flags_node(cond, &pnc);
2130 new_mem = be_transform_node(mem);
2131 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2132 addr.index, addr.mem, flags, pnc, negated);
2133 set_address(new_node, &addr);
2134 set_ia32_op_type(new_node, ia32_AddrModeD);
2135 set_ia32_ls_mode(new_node, mode);
2136 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2141 static ir_node *try_create_dest_am(ir_node *node) {
2142 ir_node *val = get_Store_value(node);
2143 ir_node *mem = get_Store_mem(node);
2144 ir_node *ptr = get_Store_ptr(node);
2145 ir_mode *mode = get_irn_mode(val);
2146 unsigned bits = get_mode_size_bits(mode);
2151 /* handle only GP modes for now... */
2152 if(!ia32_mode_needs_gp_reg(mode))
2156 /* store must be the only user of the val node */
2157 if(get_irn_n_edges(val) > 1)
2159 /* skip pointless convs */
2161 ir_node *conv_op = get_Conv_op(val);
2162 ir_mode *pred_mode = get_irn_mode(conv_op);
2163 if (!ia32_mode_needs_gp_reg(pred_mode))
2165 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2173 /* value must be in the same block */
2174 if(get_nodes_block(node) != get_nodes_block(val))
2177 switch (get_irn_opcode(val)) {
2179 op1 = get_Add_left(val);
2180 op2 = get_Add_right(val);
2181 if(is_Const_1(op2)) {
2182 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2183 new_rd_ia32_IncMem);
2185 } else if(is_Const_Minus_1(op2)) {
2186 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2187 new_rd_ia32_DecMem);
2190 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2191 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2192 match_dest_am | match_commutative |
2196 op1 = get_Sub_left(val);
2197 op2 = get_Sub_right(val);
2198 if (is_Const(op2)) {
2199 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2201 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2202 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2203 match_dest_am | match_immediate |
2207 op1 = get_And_left(val);
2208 op2 = get_And_right(val);
2209 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2210 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2211 match_dest_am | match_commutative |
2215 op1 = get_Or_left(val);
2216 op2 = get_Or_right(val);
2217 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2218 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2219 match_dest_am | match_commutative |
2223 op1 = get_Eor_left(val);
2224 op2 = get_Eor_right(val);
2225 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2226 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2227 match_dest_am | match_commutative |
2231 op1 = get_Shl_left(val);
2232 op2 = get_Shl_right(val);
2233 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2234 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2235 match_dest_am | match_immediate);
2238 op1 = get_Shr_left(val);
2239 op2 = get_Shr_right(val);
2240 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2241 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2242 match_dest_am | match_immediate);
2245 op1 = get_Shrs_left(val);
2246 op2 = get_Shrs_right(val);
2247 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2248 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2249 match_dest_am | match_immediate);
2252 op1 = get_Rotl_left(val);
2253 op2 = get_Rotl_right(val);
2254 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2255 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2256 match_dest_am | match_immediate);
2258 /* TODO: match ROR patterns... */
2260 new_node = try_create_SetMem(val, ptr, mem);
2263 op1 = get_Minus_op(val);
2264 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2267 /* should be lowered already */
2268 assert(mode != mode_b);
2269 op1 = get_Not_op(val);
2270 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2276 if(new_node != NULL) {
2277 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2278 get_irn_pinned(node) == op_pin_state_pinned) {
2279 set_irn_pinned(new_node, op_pin_state_pinned);
2286 static int is_float_to_int32_conv(const ir_node *node)
2288 ir_mode *mode = get_irn_mode(node);
2292 if(get_mode_size_bits(mode) != 32 || !ia32_mode_needs_gp_reg(mode))
2294 /* don't report unsigned as conv to 32bit, because we really need to do
2295 * a vfist with 64bit signed in this case */
2296 if(!mode_is_signed(mode))
2301 conv_op = get_Conv_op(node);
2302 conv_mode = get_irn_mode(conv_op);
2304 if(!mode_is_float(conv_mode))
2311 * Transform a Store(floatConst).
2313 * @return the created ia32 Store node
2315 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2317 ir_mode *mode = get_irn_mode(cns);
2318 unsigned size = get_mode_size_bytes(mode);
2319 tarval *tv = get_Const_tarval(cns);
2320 ir_node *block = get_nodes_block(node);
2321 ir_node *new_block = be_transform_node(block);
2322 ir_node *ptr = get_Store_ptr(node);
2323 ir_node *mem = get_Store_mem(node);
2324 ir_graph *irg = current_ir_graph;
2325 dbg_info *dbgi = get_irn_dbg_info(node);
2329 ia32_address_t addr;
2331 assert(size % 4 == 0);
2334 build_address_ptr(&addr, ptr, mem);
2338 get_tarval_sub_bits(tv, ofs) |
2339 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2340 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2341 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2342 ir_node *imm = create_Immediate(NULL, 0, val);
2344 ir_node *new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2345 addr.index, addr.mem, imm);
2347 set_irn_pinned(new_node, get_irn_pinned(node));
2348 set_ia32_op_type(new_node, ia32_AddrModeD);
2349 set_ia32_ls_mode(new_node, mode_Iu);
2350 set_address(new_node, &addr);
2351 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2353 ins[i++] = new_node;
2358 } while (size != 0);
2360 return i == 1 ? ins[0] : new_rd_Sync(dbgi, irg, new_block, i, ins);
2364 * Generate a vfist or vfisttp instruction.
2366 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2367 ir_node *mem, ir_node *val, ir_node **fist)
2371 if (ia32_cg_config.use_fisttp) {
2372 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2373 if other users exists */
2374 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2375 ir_node *vfisttp = new_rd_ia32_vfisttp(dbgi, irg, block, base, index, mem, val);
2376 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2377 be_new_Keep(reg_class, irg, block, 1, &value);
2379 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2382 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2385 new_node = new_rd_ia32_vfist(dbgi, irg, block, base, index, mem, val, trunc_mode);
2391 * Transforms a normal Store.
2393 * @return the created ia32 Store node
2395 static ir_node *gen_normal_Store(ir_node *node)
2397 ir_node *val = get_Store_value(node);
2398 ir_mode *mode = get_irn_mode(val);
2399 ir_node *block = get_nodes_block(node);
2400 ir_node *new_block = be_transform_node(block);
2401 ir_node *ptr = get_Store_ptr(node);
2402 ir_node *mem = get_Store_mem(node);
2403 ir_graph *irg = current_ir_graph;
2404 dbg_info *dbgi = get_irn_dbg_info(node);
2405 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2406 ir_node *new_val, *new_node, *store;
2407 ia32_address_t addr;
2409 /* check for destination address mode */
2410 new_node = try_create_dest_am(node);
2411 if (new_node != NULL)
2414 /* construct store address */
2415 memset(&addr, 0, sizeof(addr));
2416 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2418 if (addr.base == NULL) {
2421 addr.base = be_transform_node(addr.base);
2424 if (addr.index == NULL) {
2427 addr.index = be_transform_node(addr.index);
2429 addr.mem = be_transform_node(mem);
2431 if (mode_is_float(mode)) {
2432 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2434 while (is_Conv(val) && mode == get_irn_mode(val)) {
2435 ir_node *op = get_Conv_op(val);
2436 if (!mode_is_float(get_irn_mode(op)))
2440 new_val = be_transform_node(val);
2441 if (ia32_cg_config.use_sse2) {
2442 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2443 addr.index, addr.mem, new_val);
2445 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2446 addr.index, addr.mem, new_val, mode);
2449 } else if (!ia32_cg_config.use_sse2 && is_float_to_int32_conv(val)) {
2450 val = get_Conv_op(val);
2452 /* TODO: is this optimisation still necessary at all (middleend)? */
2453 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2454 while (is_Conv(val)) {
2455 ir_node *op = get_Conv_op(val);
2456 if (!mode_is_float(get_irn_mode(op)))
2458 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2462 new_val = be_transform_node(val);
2463 new_node = gen_vfist(dbgi, irg, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2465 new_val = create_immediate_or_transform(val, 0);
2466 assert(mode != mode_b);
2468 if (get_mode_size_bits(mode) == 8) {
2469 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2470 addr.index, addr.mem, new_val);
2472 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2473 addr.index, addr.mem, new_val);
2478 set_irn_pinned(store, get_irn_pinned(node));
2479 set_ia32_op_type(store, ia32_AddrModeD);
2480 set_ia32_ls_mode(store, mode);
2482 set_address(store, &addr);
2483 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2489 * Transforms a Store.
2491 * @return the created ia32 Store node
2493 static ir_node *gen_Store(ir_node *node)
2495 ir_node *val = get_Store_value(node);
2496 ir_mode *mode = get_irn_mode(val);
2498 if (mode_is_float(mode) && is_Const(val)) {
2501 /* we are storing a floating point constant */
2502 if (ia32_cg_config.use_sse2) {
2503 transform = !is_simple_sse_Const(val);
2505 transform = !is_simple_x87_Const(val);
2508 return gen_float_const_Store(node, val);
2510 return gen_normal_Store(node);
2514 * Transforms a Switch.
2516 * @return the created ia32 SwitchJmp node
2518 static ir_node *create_Switch(ir_node *node)
2520 ir_graph *irg = current_ir_graph;
2521 dbg_info *dbgi = get_irn_dbg_info(node);
2522 ir_node *block = be_transform_node(get_nodes_block(node));
2523 ir_node *sel = get_Cond_selector(node);
2524 ir_node *new_sel = be_transform_node(sel);
2525 int switch_min = INT_MAX;
2526 int switch_max = INT_MIN;
2527 long default_pn = get_Cond_defaultProj(node);
2529 const ir_edge_t *edge;
2531 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2533 /* determine the smallest switch case value */
2534 foreach_out_edge(node, edge) {
2535 ir_node *proj = get_edge_src_irn(edge);
2536 long pn = get_Proj_proj(proj);
2537 if(pn == default_pn)
2546 if((unsigned) (switch_max - switch_min) > 256000) {
2547 panic("Size of switch %+F bigger than 256000", node);
2550 if (switch_min != 0) {
2551 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2553 /* if smallest switch case is not 0 we need an additional sub */
2554 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2555 add_ia32_am_offs_int(new_sel, -switch_min);
2556 set_ia32_op_type(new_sel, ia32_AddrModeS);
2558 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2561 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2562 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2568 * Transform a Cond node.
2570 static ir_node *gen_Cond(ir_node *node) {
2571 ir_node *block = get_nodes_block(node);
2572 ir_node *new_block = be_transform_node(block);
2573 ir_graph *irg = current_ir_graph;
2574 dbg_info *dbgi = get_irn_dbg_info(node);
2575 ir_node *sel = get_Cond_selector(node);
2576 ir_mode *sel_mode = get_irn_mode(sel);
2577 ir_node *flags = NULL;
2581 if (sel_mode != mode_b) {
2582 return create_Switch(node);
2585 /* we get flags from a Cmp */
2586 flags = get_flags_node(sel, &pnc);
2588 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2589 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2594 static ir_node *gen_be_Copy(ir_node *node)
2596 ir_node *new_node = be_duplicate_node(node);
2597 ir_mode *mode = get_irn_mode(new_node);
2599 if (ia32_mode_needs_gp_reg(mode)) {
2600 set_irn_mode(new_node, mode_Iu);
2606 static ir_node *create_Fucom(ir_node *node)
2608 ir_graph *irg = current_ir_graph;
2609 dbg_info *dbgi = get_irn_dbg_info(node);
2610 ir_node *block = get_nodes_block(node);
2611 ir_node *new_block = be_transform_node(block);
2612 ir_node *left = get_Cmp_left(node);
2613 ir_node *new_left = be_transform_node(left);
2614 ir_node *right = get_Cmp_right(node);
2618 if(ia32_cg_config.use_fucomi) {
2619 new_right = be_transform_node(right);
2620 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2622 set_ia32_commutative(new_node);
2623 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2625 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2626 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2629 new_right = be_transform_node(right);
2630 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2634 set_ia32_commutative(new_node);
2636 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2638 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2639 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2645 static ir_node *create_Ucomi(ir_node *node)
2647 ir_graph *irg = current_ir_graph;
2648 dbg_info *dbgi = get_irn_dbg_info(node);
2649 ir_node *src_block = get_nodes_block(node);
2650 ir_node *new_block = be_transform_node(src_block);
2651 ir_node *left = get_Cmp_left(node);
2652 ir_node *right = get_Cmp_right(node);
2654 ia32_address_mode_t am;
2655 ia32_address_t *addr = &am.addr;
2657 match_arguments(&am, src_block, left, right, NULL,
2658 match_commutative | match_am);
2660 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2661 addr->mem, am.new_op1, am.new_op2,
2663 set_am_attributes(new_node, &am);
2665 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2667 new_node = fix_mem_proj(new_node, &am);
2673 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2674 * to fold an and into a test node
2676 static bool can_fold_test_and(ir_node *node)
2678 const ir_edge_t *edge;
2680 /** we can only have eq and lg projs */
2681 foreach_out_edge(node, edge) {
2682 ir_node *proj = get_edge_src_irn(edge);
2683 pn_Cmp pnc = get_Proj_proj(proj);
2684 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2692 * returns true if it is assured, that the upper bits of a node are "clean"
2693 * which means for a 16 or 8 bit value, that the upper bits in the register
2694 * are 0 for unsigned and a copy of the last significant bit for unsigned
2697 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2699 assert(ia32_mode_needs_gp_reg(mode));
2700 if (get_mode_size_bits(mode) >= 32)
2703 if (is_Proj(transformed_node))
2704 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2706 if (is_ia32_Conv_I2I(transformed_node)
2707 || is_ia32_Conv_I2I8Bit(transformed_node)) {
2708 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2709 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2711 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2717 if (is_ia32_Shr(transformed_node) && !mode_is_signed(mode)) {
2718 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2719 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2720 const ia32_immediate_attr_t *attr
2721 = get_ia32_immediate_attr_const(right);
2722 if (attr->symconst == 0
2723 && (unsigned) attr->offset >= (32 - get_mode_size_bits(mode))) {
2727 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2730 if (is_ia32_And(transformed_node) && !mode_is_signed(mode)) {
2731 ir_node *right = get_irn_n(transformed_node, n_ia32_And_right);
2732 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2733 const ia32_immediate_attr_t *attr
2734 = get_ia32_immediate_attr_const(right);
2735 if (attr->symconst == 0
2736 && (unsigned) attr->offset
2737 <= (0xffffffff >> (32 - get_mode_size_bits(mode)))) {
2744 /* TODO recurse on Or, Xor, ... if appropriate? */
2746 if (is_ia32_Immediate(transformed_node)
2747 || is_ia32_Const(transformed_node)) {
2748 const ia32_immediate_attr_t *attr
2749 = get_ia32_immediate_attr_const(transformed_node);
2750 if (mode_is_signed(mode)) {
2751 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2752 if (shifted == 0 || shifted == -1)
2755 unsigned long shifted = (unsigned long) attr->offset;
2756 shifted >>= get_mode_size_bits(mode);
2766 * Generate code for a Cmp.
2768 static ir_node *gen_Cmp(ir_node *node)
2770 ir_graph *irg = current_ir_graph;
2771 dbg_info *dbgi = get_irn_dbg_info(node);
2772 ir_node *block = get_nodes_block(node);
2773 ir_node *new_block = be_transform_node(block);
2774 ir_node *left = get_Cmp_left(node);
2775 ir_node *right = get_Cmp_right(node);
2776 ir_mode *cmp_mode = get_irn_mode(left);
2778 ia32_address_mode_t am;
2779 ia32_address_t *addr = &am.addr;
2782 if(mode_is_float(cmp_mode)) {
2783 if (ia32_cg_config.use_sse2) {
2784 return create_Ucomi(node);
2786 return create_Fucom(node);
2790 assert(ia32_mode_needs_gp_reg(cmp_mode));
2792 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2793 cmp_unsigned = !mode_is_signed(cmp_mode);
2794 if (is_Const_0(right) &&
2796 get_irn_n_edges(left) == 1 &&
2797 can_fold_test_and(node)) {
2798 /* Test(and_left, and_right) */
2799 ir_node *and_left = get_And_left(left);
2800 ir_node *and_right = get_And_right(left);
2802 /* matze: code here used mode instead of cmd_mode, I think it is always
2803 * the same as cmp_mode, but I leave this here to see if this is really
2806 assert(get_irn_mode(and_left) == cmp_mode);
2808 match_arguments(&am, block, and_left, and_right, NULL,
2810 match_am | match_8bit_am | match_16bit_am |
2811 match_am_and_immediates | match_immediate |
2812 match_8bit | match_16bit);
2814 /* use 32bit compare mode if possible since the opcode is smaller */
2815 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2816 upper_bits_clean(am.new_op2, cmp_mode)) {
2817 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2820 if (get_mode_size_bits(cmp_mode) == 8) {
2821 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2822 addr->index, addr->mem, am.new_op1,
2823 am.new_op2, am.ins_permuted,
2826 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2827 addr->index, addr->mem, am.new_op1,
2828 am.new_op2, am.ins_permuted,
2832 /* Cmp(left, right) */
2833 match_arguments(&am, block, left, right, NULL,
2834 match_commutative | match_am | match_8bit_am |
2835 match_16bit_am | match_am_and_immediates |
2836 match_immediate | match_8bit | match_16bit);
2837 /* use 32bit compare mode if possible since the opcode is smaller */
2838 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2839 upper_bits_clean(am.new_op2, cmp_mode)) {
2840 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2843 if (get_mode_size_bits(cmp_mode) == 8) {
2844 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2845 addr->index, addr->mem, am.new_op1,
2846 am.new_op2, am.ins_permuted,
2849 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2850 addr->index, addr->mem, am.new_op1,
2851 am.new_op2, am.ins_permuted, cmp_unsigned);
2854 set_am_attributes(new_node, &am);
2855 set_ia32_ls_mode(new_node, cmp_mode);
2857 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2859 new_node = fix_mem_proj(new_node, &am);
2864 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2867 ir_graph *irg = current_ir_graph;
2868 dbg_info *dbgi = get_irn_dbg_info(node);
2869 ir_node *block = get_nodes_block(node);
2870 ir_node *new_block = be_transform_node(block);
2871 ir_node *val_true = get_Mux_true(node);
2872 ir_node *val_false = get_Mux_false(node);
2874 match_flags_t match_flags;
2875 ia32_address_mode_t am;
2876 ia32_address_t *addr;
2878 assert(ia32_cg_config.use_cmov);
2879 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2883 match_flags = match_commutative | match_am | match_16bit_am |
2886 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2888 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2889 addr->mem, am.new_op1, am.new_op2, new_flags,
2890 am.ins_permuted, pnc);
2891 set_am_attributes(new_node, &am);
2893 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2895 new_node = fix_mem_proj(new_node, &am);
2901 * Creates a ia32 Setcc instruction.
2903 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2904 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2907 ir_graph *irg = current_ir_graph;
2908 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2909 ir_node *nomem = new_NoMem();
2910 ir_mode *mode = get_irn_mode(orig_node);
2913 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2914 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2916 /* we might need to conv the result up */
2917 if (get_mode_size_bits(mode) > 8) {
2918 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2919 nomem, new_node, mode_Bu);
2920 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2927 * Create instruction for an unsigned Difference or Zero.
2929 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b) {
2930 ir_graph *irg = current_ir_graph;
2931 ir_mode *mode = get_irn_mode(psi);
2932 ir_node *new_node, *sub, *sbb, *eflags, *block, *noreg, *tmpreg, *nomem;
2935 new_node = gen_binop(psi, a, b, new_rd_ia32_Sub,
2936 match_mode_neutral | match_am | match_immediate | match_two_users);
2938 block = get_nodes_block(new_node);
2940 if (is_Proj(new_node)) {
2941 sub = get_Proj_pred(new_node);
2942 assert(is_ia32_Sub(sub));
2945 set_irn_mode(sub, mode_T);
2946 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
2948 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
2950 dbgi = get_irn_dbg_info(psi);
2951 noreg = ia32_new_NoReg_gp(env_cg);
2952 tmpreg = new_rd_ia32_ProduceVal(dbgi, irg, block);
2953 nomem = new_NoMem();
2954 sbb = new_rd_ia32_Sbb(dbgi, irg, block, noreg, noreg, nomem, tmpreg, tmpreg, eflags);
2956 new_node = new_rd_ia32_And(dbgi, irg, block, noreg, noreg, nomem, new_node, sbb);
2957 set_ia32_commutative(new_node);
2962 * Transforms a Mux node into CMov.
2964 * @return The transformed node.
2966 static ir_node *gen_Mux(ir_node *node)
2968 dbg_info *dbgi = get_irn_dbg_info(node);
2969 ir_node *block = get_nodes_block(node);
2970 ir_node *new_block = be_transform_node(block);
2971 ir_node *mux_true = get_Mux_true(node);
2972 ir_node *mux_false = get_Mux_false(node);
2973 ir_node *cond = get_Mux_sel(node);
2974 ir_mode *mode = get_irn_mode(node);
2977 assert(get_irn_mode(cond) == mode_b);
2979 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
2980 if (mode_is_float(mode)) {
2981 ir_node *cmp = get_Proj_pred(cond);
2982 ir_node *cmp_left = get_Cmp_left(cmp);
2983 ir_node *cmp_right = get_Cmp_right(cmp);
2984 pn_Cmp pnc = get_Proj_proj(cond);
2986 if (ia32_cg_config.use_sse2) {
2987 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
2988 if (cmp_left == mux_true && cmp_right == mux_false) {
2989 /* Mux(a <= b, a, b) => MIN */
2990 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2991 match_commutative | match_am | match_two_users);
2992 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2993 /* Mux(a <= b, b, a) => MAX */
2994 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2995 match_commutative | match_am | match_two_users);
2997 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
2998 if (cmp_left == mux_true && cmp_right == mux_false) {
2999 /* Mux(a >= b, a, b) => MAX */
3000 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
3001 match_commutative | match_am | match_two_users);
3002 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3003 /* Mux(a >= b, b, a) => MIN */
3004 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
3005 match_commutative | match_am | match_two_users);
3009 panic("cannot transform floating point Mux");
3015 assert(ia32_mode_needs_gp_reg(mode));
3017 if (is_Proj(cond)) {
3018 ir_node *cmp = get_Proj_pred(cond);
3020 ir_node *cmp_left = get_Cmp_left(cmp);
3021 ir_node *cmp_right = get_Cmp_right(cmp);
3022 pn_Cmp pnc = get_Proj_proj(cond);
3024 /* check for unsigned Doz first */
3025 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3026 is_Const_0(mux_false) && is_Sub(mux_true) &&
3027 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3028 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3029 return create_Doz(node, cmp_left, cmp_right);
3030 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3031 is_Const_0(mux_true) && is_Sub(mux_false) &&
3032 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3033 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3034 return create_Doz(node, cmp_left, cmp_right);
3039 flags = get_flags_node(cond, &pnc);
3041 if (is_Const(mux_true) && is_Const(mux_false)) {
3042 /* both are const, good */
3043 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3044 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3045 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3046 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3048 /* Not that simple. */
3053 new_node = create_CMov(node, cond, flags, pnc);
3061 * Create a conversion from x87 state register to general purpose.
3063 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
3064 ir_node *block = be_transform_node(get_nodes_block(node));
3065 ir_node *op = get_Conv_op(node);
3066 ir_node *new_op = be_transform_node(op);
3067 ia32_code_gen_t *cg = env_cg;
3068 ir_graph *irg = current_ir_graph;
3069 dbg_info *dbgi = get_irn_dbg_info(node);
3070 ir_node *noreg = ia32_new_NoReg_gp(cg);
3071 ir_mode *mode = get_irn_mode(node);
3072 ir_node *fist, *load, *mem;
3074 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, new_NoMem(), new_op, &fist);
3075 set_irn_pinned(fist, op_pin_state_floats);
3076 set_ia32_use_frame(fist);
3077 set_ia32_op_type(fist, ia32_AddrModeD);
3079 assert(get_mode_size_bits(mode) <= 32);
3080 /* exception we can only store signed 32 bit integers, so for unsigned
3081 we store a 64bit (signed) integer and load the lower bits */
3082 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3083 set_ia32_ls_mode(fist, mode_Ls);
3085 set_ia32_ls_mode(fist, mode_Is);
3087 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
3090 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, mem);
3092 set_irn_pinned(load, op_pin_state_floats);
3093 set_ia32_use_frame(load);
3094 set_ia32_op_type(load, ia32_AddrModeS);
3095 set_ia32_ls_mode(load, mode_Is);
3096 if(get_ia32_ls_mode(fist) == mode_Ls) {
3097 ia32_attr_t *attr = get_ia32_attr(load);
3098 attr->data.need_64bit_stackent = 1;
3100 ia32_attr_t *attr = get_ia32_attr(load);
3101 attr->data.need_32bit_stackent = 1;
3103 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
3105 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3109 * Creates a x87 strict Conv by placing a Store and a Load
3111 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3113 ir_node *block = get_nodes_block(node);
3114 ir_graph *irg = current_ir_graph;
3115 dbg_info *dbgi = get_irn_dbg_info(node);
3116 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3117 ir_node *nomem = new_NoMem();
3118 ir_node *frame = get_irg_frame(irg);
3119 ir_node *store, *load;
3122 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
3124 set_ia32_use_frame(store);
3125 set_ia32_op_type(store, ia32_AddrModeD);
3126 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
3128 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
3130 set_ia32_use_frame(load);
3131 set_ia32_op_type(load, ia32_AddrModeS);
3132 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3134 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3139 * Create a conversion from general purpose to x87 register
3141 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
3142 ir_node *src_block = get_nodes_block(node);
3143 ir_node *block = be_transform_node(src_block);
3144 ir_graph *irg = current_ir_graph;
3145 dbg_info *dbgi = get_irn_dbg_info(node);
3146 ir_node *op = get_Conv_op(node);
3147 ir_node *new_op = NULL;
3151 ir_mode *store_mode;
3157 /* fild can use source AM if the operand is a signed 32bit integer */
3158 if (src_mode == mode_Is) {
3159 ia32_address_mode_t am;
3161 match_arguments(&am, src_block, NULL, op, NULL,
3162 match_am | match_try_am);
3163 if (am.op_type == ia32_AddrModeS) {
3164 ia32_address_t *addr = &am.addr;
3166 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
3167 addr->index, addr->mem);
3168 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3171 set_am_attributes(fild, &am);
3172 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3174 fix_mem_proj(fild, &am);
3179 if(new_op == NULL) {
3180 new_op = be_transform_node(op);
3183 noreg = ia32_new_NoReg_gp(env_cg);
3184 nomem = new_NoMem();
3185 mode = get_irn_mode(op);
3187 /* first convert to 32 bit signed if necessary */
3188 src_bits = get_mode_size_bits(src_mode);
3189 if (src_bits == 8) {
3190 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
3192 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3194 } else if (src_bits < 32) {
3195 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
3197 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3201 assert(get_mode_size_bits(mode) == 32);
3204 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
3207 set_ia32_use_frame(store);
3208 set_ia32_op_type(store, ia32_AddrModeD);
3209 set_ia32_ls_mode(store, mode_Iu);
3211 /* exception for 32bit unsigned, do a 64bit spill+load */
3212 if(!mode_is_signed(mode)) {
3215 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3217 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
3218 get_irg_frame(irg), noreg, nomem,
3221 set_ia32_use_frame(zero_store);
3222 set_ia32_op_type(zero_store, ia32_AddrModeD);
3223 add_ia32_am_offs_int(zero_store, 4);
3224 set_ia32_ls_mode(zero_store, mode_Iu);
3229 store = new_rd_Sync(dbgi, irg, block, 2, in);
3230 store_mode = mode_Ls;
3232 store_mode = mode_Is;
3236 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3238 set_ia32_use_frame(fild);
3239 set_ia32_op_type(fild, ia32_AddrModeS);
3240 set_ia32_ls_mode(fild, store_mode);
3242 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3248 * Create a conversion from one integer mode into another one
3250 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3251 dbg_info *dbgi, ir_node *block, ir_node *op,
3254 ir_graph *irg = current_ir_graph;
3255 int src_bits = get_mode_size_bits(src_mode);
3256 int tgt_bits = get_mode_size_bits(tgt_mode);
3257 ir_node *new_block = be_transform_node(block);
3259 ir_mode *smaller_mode;
3261 ia32_address_mode_t am;
3262 ia32_address_t *addr = &am.addr;
3265 if (src_bits < tgt_bits) {
3266 smaller_mode = src_mode;
3267 smaller_bits = src_bits;
3269 smaller_mode = tgt_mode;
3270 smaller_bits = tgt_bits;
3273 #ifdef DEBUG_libfirm
3275 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3280 match_arguments(&am, block, NULL, op, NULL,
3281 match_8bit | match_16bit |
3282 match_am | match_8bit_am | match_16bit_am);
3284 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3285 /* unnecessary conv. in theory it shouldn't have been AM */
3286 assert(is_ia32_NoReg_GP(addr->base));
3287 assert(is_ia32_NoReg_GP(addr->index));
3288 assert(is_NoMem(addr->mem));
3289 assert(am.addr.offset == 0);
3290 assert(am.addr.symconst_ent == NULL);
3294 if (smaller_bits == 8) {
3295 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3296 addr->index, addr->mem, am.new_op2,
3299 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3300 addr->index, addr->mem, am.new_op2,
3303 set_am_attributes(new_node, &am);
3304 /* match_arguments assume that out-mode = in-mode, this isn't true here
3306 set_ia32_ls_mode(new_node, smaller_mode);
3307 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3308 new_node = fix_mem_proj(new_node, &am);
3313 * Transforms a Conv node.
3315 * @return The created ia32 Conv node
3317 static ir_node *gen_Conv(ir_node *node) {
3318 ir_node *block = get_nodes_block(node);
3319 ir_node *new_block = be_transform_node(block);
3320 ir_node *op = get_Conv_op(node);
3321 ir_node *new_op = NULL;
3322 ir_graph *irg = current_ir_graph;
3323 dbg_info *dbgi = get_irn_dbg_info(node);
3324 ir_mode *src_mode = get_irn_mode(op);
3325 ir_mode *tgt_mode = get_irn_mode(node);
3326 int src_bits = get_mode_size_bits(src_mode);
3327 int tgt_bits = get_mode_size_bits(tgt_mode);
3328 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3329 ir_node *nomem = new_rd_NoMem(irg);
3330 ir_node *res = NULL;
3332 if (src_mode == mode_b) {
3333 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3334 /* nothing to do, we already model bools as 0/1 ints */
3335 return be_transform_node(op);
3338 if (src_mode == tgt_mode) {
3339 if (get_Conv_strict(node)) {
3340 if (ia32_cg_config.use_sse2) {
3341 /* when we are in SSE mode, we can kill all strict no-op conversion */
3342 return be_transform_node(op);
3345 /* this should be optimized already, but who knows... */
3346 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3347 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3348 return be_transform_node(op);
3352 if (mode_is_float(src_mode)) {
3353 new_op = be_transform_node(op);
3354 /* we convert from float ... */
3355 if (mode_is_float(tgt_mode)) {
3356 if(src_mode == mode_E && tgt_mode == mode_D
3357 && !get_Conv_strict(node)) {
3358 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3363 if (ia32_cg_config.use_sse2) {
3364 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3365 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3367 set_ia32_ls_mode(res, tgt_mode);
3369 if(get_Conv_strict(node)) {
3370 res = gen_x87_strict_conv(tgt_mode, new_op);
3371 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3374 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3379 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3380 if (ia32_cg_config.use_sse2) {
3381 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3383 set_ia32_ls_mode(res, src_mode);
3385 return gen_x87_fp_to_gp(node);
3389 /* we convert from int ... */
3390 if (mode_is_float(tgt_mode)) {
3392 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3393 if (ia32_cg_config.use_sse2) {
3394 new_op = be_transform_node(op);
3395 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3397 set_ia32_ls_mode(res, tgt_mode);
3399 res = gen_x87_gp_to_fp(node, src_mode);
3400 if(get_Conv_strict(node)) {
3401 /* The strict-Conv is only necessary, if the int mode has more bits
3402 * than the float mantissa */
3403 size_t int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3404 size_t float_mantissa;
3405 /* FIXME There is no way to get the mantissa size of a mode */
3406 switch (get_mode_size_bits(tgt_mode)) {
3407 case 32: float_mantissa = 23 + 1; break; // + 1 for implicit 1
3408 case 64: float_mantissa = 52 + 1; break;
3410 case 96: float_mantissa = 64; break;
3411 default: float_mantissa = 0; break;
3413 if (float_mantissa < int_mantissa) {
3414 res = gen_x87_strict_conv(tgt_mode, res);
3415 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3420 } else if(tgt_mode == mode_b) {
3421 /* mode_b lowering already took care that we only have 0/1 values */
3422 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3423 src_mode, tgt_mode));
3424 return be_transform_node(op);
3427 if (src_bits == tgt_bits) {
3428 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3429 src_mode, tgt_mode));
3430 return be_transform_node(op);
3433 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3441 static ir_node *create_immediate_or_transform(ir_node *node,
3442 char immediate_constraint_type)
3444 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3445 if (new_node == NULL) {
3446 new_node = be_transform_node(node);
3452 * Transforms a FrameAddr into an ia32 Add.
3454 static ir_node *gen_be_FrameAddr(ir_node *node) {
3455 ir_node *block = be_transform_node(get_nodes_block(node));
3456 ir_node *op = be_get_FrameAddr_frame(node);
3457 ir_node *new_op = be_transform_node(op);
3458 ir_graph *irg = current_ir_graph;
3459 dbg_info *dbgi = get_irn_dbg_info(node);
3460 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3463 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3464 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3465 set_ia32_use_frame(new_node);
3467 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3473 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3475 static ir_node *gen_be_Return(ir_node *node) {
3476 ir_graph *irg = current_ir_graph;
3477 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3478 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3479 ir_entity *ent = get_irg_entity(irg);
3480 ir_type *tp = get_entity_type(ent);
3485 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3486 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3489 int pn_ret_val, pn_ret_mem, arity, i;
3491 assert(ret_val != NULL);
3492 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3493 return be_duplicate_node(node);
3496 res_type = get_method_res_type(tp, 0);
3498 if (! is_Primitive_type(res_type)) {
3499 return be_duplicate_node(node);
3502 mode = get_type_mode(res_type);
3503 if (! mode_is_float(mode)) {
3504 return be_duplicate_node(node);
3507 assert(get_method_n_ress(tp) == 1);
3509 pn_ret_val = get_Proj_proj(ret_val);
3510 pn_ret_mem = get_Proj_proj(ret_mem);
3512 /* get the Barrier */
3513 barrier = get_Proj_pred(ret_val);
3515 /* get result input of the Barrier */
3516 ret_val = get_irn_n(barrier, pn_ret_val);
3517 new_ret_val = be_transform_node(ret_val);
3519 /* get memory input of the Barrier */
3520 ret_mem = get_irn_n(barrier, pn_ret_mem);
3521 new_ret_mem = be_transform_node(ret_mem);
3523 frame = get_irg_frame(irg);
3525 dbgi = get_irn_dbg_info(barrier);
3526 block = be_transform_node(get_nodes_block(barrier));
3528 noreg = ia32_new_NoReg_gp(env_cg);
3530 /* store xmm0 onto stack */
3531 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3532 new_ret_mem, new_ret_val);
3533 set_ia32_ls_mode(sse_store, mode);
3534 set_ia32_op_type(sse_store, ia32_AddrModeD);
3535 set_ia32_use_frame(sse_store);
3537 /* load into x87 register */
3538 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3539 set_ia32_op_type(fld, ia32_AddrModeS);
3540 set_ia32_use_frame(fld);
3542 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3543 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3545 /* create a new barrier */
3546 arity = get_irn_arity(barrier);
3547 in = alloca(arity * sizeof(in[0]));
3548 for (i = 0; i < arity; ++i) {
3551 if (i == pn_ret_val) {
3553 } else if (i == pn_ret_mem) {
3556 ir_node *in = get_irn_n(barrier, i);
3557 new_in = be_transform_node(in);
3562 new_barrier = new_ir_node(dbgi, irg, block,
3563 get_irn_op(barrier), get_irn_mode(barrier),
3565 copy_node_attr(barrier, new_barrier);
3566 be_duplicate_deps(barrier, new_barrier);
3567 set_transformed_and_mark(barrier, new_barrier);
3569 /* transform normally */
3570 return be_duplicate_node(node);
3574 * Transform a be_AddSP into an ia32_SubSP.
3576 static ir_node *gen_be_AddSP(ir_node *node)
3578 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3579 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3581 return gen_binop(node, sp, sz, new_rd_ia32_SubSP,
3582 match_am | match_immediate);
3586 * Transform a be_SubSP into an ia32_AddSP
3588 static ir_node *gen_be_SubSP(ir_node *node)
3590 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3591 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3593 return gen_binop(node, sp, sz, new_rd_ia32_AddSP,
3594 match_am | match_immediate);
3598 * Change some phi modes
3600 static ir_node *gen_Phi(ir_node *node) {
3601 ir_node *block = be_transform_node(get_nodes_block(node));
3602 ir_graph *irg = current_ir_graph;
3603 dbg_info *dbgi = get_irn_dbg_info(node);
3604 ir_mode *mode = get_irn_mode(node);
3607 if(ia32_mode_needs_gp_reg(mode)) {
3608 /* we shouldn't have any 64bit stuff around anymore */
3609 assert(get_mode_size_bits(mode) <= 32);
3610 /* all integer operations are on 32bit registers now */
3612 } else if(mode_is_float(mode)) {
3613 if (ia32_cg_config.use_sse2) {
3620 /* phi nodes allow loops, so we use the old arguments for now
3621 * and fix this later */
3622 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3623 get_irn_in(node) + 1);
3624 copy_node_attr(node, phi);
3625 be_duplicate_deps(node, phi);
3627 be_set_transformed_node(node, phi);
3628 be_enqueue_preds(node);
3636 static ir_node *gen_IJmp(ir_node *node)
3638 ir_node *block = get_nodes_block(node);
3639 ir_node *new_block = be_transform_node(block);
3640 dbg_info *dbgi = get_irn_dbg_info(node);
3641 ir_node *op = get_IJmp_target(node);
3643 ia32_address_mode_t am;
3644 ia32_address_t *addr = &am.addr;
3646 assert(get_irn_mode(op) == mode_P);
3648 match_arguments(&am, block, NULL, op, NULL,
3649 match_am | match_8bit_am | match_16bit_am |
3650 match_immediate | match_8bit | match_16bit);
3652 new_node = new_rd_ia32_IJmp(dbgi, current_ir_graph, new_block,
3653 addr->base, addr->index, addr->mem,
3655 set_am_attributes(new_node, &am);
3656 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3658 new_node = fix_mem_proj(new_node, &am);
3664 * Transform a Bound node.
3666 static ir_node *gen_Bound(ir_node *node)
3669 ir_node *lower = get_Bound_lower(node);
3670 dbg_info *dbgi = get_irn_dbg_info(node);
3672 if (is_Const_0(lower)) {
3673 /* typical case for Java */
3674 ir_node *sub, *res, *flags, *block;
3675 ir_graph *irg = current_ir_graph;
3677 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3678 new_rd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3680 block = get_nodes_block(res);
3681 if (! is_Proj(res)) {
3683 set_irn_mode(sub, mode_T);
3684 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3686 sub = get_Proj_pred(res);
3688 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3689 new_node = new_rd_ia32_Jcc(dbgi, irg, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3690 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3692 panic("generic Bound not supported in ia32 Backend");
3698 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3700 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3701 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3703 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
3704 match_immediate | match_mode_neutral);
3707 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3709 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3710 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3711 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
3715 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3717 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3718 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3719 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
3723 static ir_node *gen_ia32_l_Add(ir_node *node) {
3724 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3725 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3726 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
3727 match_commutative | match_am | match_immediate |
3728 match_mode_neutral);
3730 if(is_Proj(lowered)) {
3731 lowered = get_Proj_pred(lowered);
3733 assert(is_ia32_Add(lowered));
3734 set_irn_mode(lowered, mode_T);
3740 static ir_node *gen_ia32_l_Adc(ir_node *node)
3742 return gen_binop_flags(node, new_rd_ia32_Adc,
3743 match_commutative | match_am | match_immediate |
3744 match_mode_neutral);
3748 * Transforms a l_MulS into a "real" MulS node.
3750 * @return the created ia32 Mul node
3752 static ir_node *gen_ia32_l_Mul(ir_node *node) {
3753 ir_node *left = get_binop_left(node);
3754 ir_node *right = get_binop_right(node);
3756 return gen_binop(node, left, right, new_rd_ia32_Mul,
3757 match_commutative | match_am | match_mode_neutral);
3761 * Transforms a l_IMulS into a "real" IMul1OPS node.
3763 * @return the created ia32 IMul1OP node
3765 static ir_node *gen_ia32_l_IMul(ir_node *node) {
3766 ir_node *left = get_binop_left(node);
3767 ir_node *right = get_binop_right(node);
3769 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
3770 match_commutative | match_am | match_mode_neutral);
3773 static ir_node *gen_ia32_l_Sub(ir_node *node) {
3774 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3775 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3776 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
3777 match_am | match_immediate | match_mode_neutral);
3779 if(is_Proj(lowered)) {
3780 lowered = get_Proj_pred(lowered);
3782 assert(is_ia32_Sub(lowered));
3783 set_irn_mode(lowered, mode_T);
3789 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
3790 return gen_binop_flags(node, new_rd_ia32_Sbb,
3791 match_am | match_immediate | match_mode_neutral);
3795 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3796 * op1 - target to be shifted
3797 * op2 - contains bits to be shifted into target
3799 * Only op3 can be an immediate.
3801 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3802 ir_node *low, ir_node *count)
3804 ir_node *block = get_nodes_block(node);
3805 ir_node *new_block = be_transform_node(block);
3806 ir_graph *irg = current_ir_graph;
3807 dbg_info *dbgi = get_irn_dbg_info(node);
3808 ir_node *new_high = be_transform_node(high);
3809 ir_node *new_low = be_transform_node(low);
3813 /* the shift amount can be any mode that is bigger than 5 bits, since all
3814 * other bits are ignored anyway */
3815 while (is_Conv(count) &&
3816 get_irn_n_edges(count) == 1 &&
3817 mode_is_int(get_irn_mode(count))) {
3818 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3819 count = get_Conv_op(count);
3821 new_count = create_immediate_or_transform(count, 0);
3823 if (is_ia32_l_ShlD(node)) {
3824 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
3827 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
3830 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3835 static ir_node *gen_ia32_l_ShlD(ir_node *node)
3837 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
3838 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
3839 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
3840 return gen_lowered_64bit_shifts(node, high, low, count);
3843 static ir_node *gen_ia32_l_ShrD(ir_node *node)
3845 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
3846 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
3847 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
3848 return gen_lowered_64bit_shifts(node, high, low, count);
3851 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
3852 ir_node *src_block = get_nodes_block(node);
3853 ir_node *block = be_transform_node(src_block);
3854 ir_graph *irg = current_ir_graph;
3855 dbg_info *dbgi = get_irn_dbg_info(node);
3856 ir_node *frame = get_irg_frame(irg);
3857 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3858 ir_node *nomem = new_NoMem();
3859 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
3860 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
3861 ir_node *new_val_low = be_transform_node(val_low);
3862 ir_node *new_val_high = be_transform_node(val_high);
3867 ir_node *store_high;
3869 if(!mode_is_signed(get_irn_mode(val_high))) {
3870 panic("unsigned long long -> float not supported yet (%+F)", node);
3874 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3876 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3878 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
3879 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
3881 set_ia32_use_frame(store_low);
3882 set_ia32_use_frame(store_high);
3883 set_ia32_op_type(store_low, ia32_AddrModeD);
3884 set_ia32_op_type(store_high, ia32_AddrModeD);
3885 set_ia32_ls_mode(store_low, mode_Iu);
3886 set_ia32_ls_mode(store_high, mode_Is);
3887 add_ia32_am_offs_int(store_high, 4);
3891 sync = new_rd_Sync(dbgi, irg, block, 2, in);
3894 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
3896 set_ia32_use_frame(fild);
3897 set_ia32_op_type(fild, ia32_AddrModeS);
3898 set_ia32_ls_mode(fild, mode_Ls);
3900 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3902 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3905 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
3906 ir_node *src_block = get_nodes_block(node);
3907 ir_node *block = be_transform_node(src_block);
3908 ir_graph *irg = current_ir_graph;
3909 dbg_info *dbgi = get_irn_dbg_info(node);
3910 ir_node *frame = get_irg_frame(irg);
3911 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3912 ir_node *nomem = new_NoMem();
3913 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
3914 ir_node *new_val = be_transform_node(val);
3915 ir_node *fist, *mem;
3917 mem = gen_vfist(dbgi, irg, block, frame, noreg, nomem, new_val, &fist);
3918 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
3919 set_ia32_use_frame(fist);
3920 set_ia32_op_type(fist, ia32_AddrModeD);
3921 set_ia32_ls_mode(fist, mode_Ls);
3927 * the BAD transformer.
3929 static ir_node *bad_transform(ir_node *node) {
3930 panic("No transform function for %+F available.", node);
3934 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
3935 ir_graph *irg = current_ir_graph;
3936 ir_node *block = be_transform_node(get_nodes_block(node));
3937 ir_node *pred = get_Proj_pred(node);
3938 ir_node *new_pred = be_transform_node(pred);
3939 ir_node *frame = get_irg_frame(irg);
3940 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3941 dbg_info *dbgi = get_irn_dbg_info(node);
3942 long pn = get_Proj_proj(node);
3947 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
3948 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3949 set_ia32_use_frame(load);
3950 set_ia32_op_type(load, ia32_AddrModeS);
3951 set_ia32_ls_mode(load, mode_Iu);
3952 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
3953 * 32 bit from it with this particular load */
3954 attr = get_ia32_attr(load);
3955 attr->data.need_64bit_stackent = 1;
3957 if (pn == pn_ia32_l_FloattoLL_res_high) {
3958 add_ia32_am_offs_int(load, 4);
3960 assert(pn == pn_ia32_l_FloattoLL_res_low);
3963 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3969 * Transform the Projs of an AddSP.
3971 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
3972 ir_node *block = be_transform_node(get_nodes_block(node));
3973 ir_node *pred = get_Proj_pred(node);
3974 ir_node *new_pred = be_transform_node(pred);
3975 ir_graph *irg = current_ir_graph;
3976 dbg_info *dbgi = get_irn_dbg_info(node);
3977 long proj = get_Proj_proj(node);
3979 if (proj == pn_be_AddSP_sp) {
3980 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3981 pn_ia32_SubSP_stack);
3982 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
3984 } else if(proj == pn_be_AddSP_res) {
3985 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3986 pn_ia32_SubSP_addr);
3987 } else if (proj == pn_be_AddSP_M) {
3988 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
3991 panic("No idea how to transform proj->AddSP");
3995 * Transform the Projs of a SubSP.
3997 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
3998 ir_node *block = be_transform_node(get_nodes_block(node));
3999 ir_node *pred = get_Proj_pred(node);
4000 ir_node *new_pred = be_transform_node(pred);
4001 ir_graph *irg = current_ir_graph;
4002 dbg_info *dbgi = get_irn_dbg_info(node);
4003 long proj = get_Proj_proj(node);
4005 if (proj == pn_be_SubSP_sp) {
4006 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4007 pn_ia32_AddSP_stack);
4008 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4010 } else if (proj == pn_be_SubSP_M) {
4011 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4014 panic("No idea how to transform proj->SubSP");
4018 * Transform and renumber the Projs from a Load.
4020 static ir_node *gen_Proj_Load(ir_node *node) {
4022 ir_node *block = be_transform_node(get_nodes_block(node));
4023 ir_node *pred = get_Proj_pred(node);
4024 ir_graph *irg = current_ir_graph;
4025 dbg_info *dbgi = get_irn_dbg_info(node);
4026 long proj = get_Proj_proj(node);
4028 /* loads might be part of source address mode matches, so we don't
4029 * transform the ProjMs yet (with the exception of loads whose result is
4032 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4035 /* this is needed, because sometimes we have loops that are only
4036 reachable through the ProjM */
4037 be_enqueue_preds(node);
4038 /* do it in 2 steps, to silence firm verifier */
4039 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4040 set_Proj_proj(res, pn_ia32_mem);
4044 /* renumber the proj */
4045 new_pred = be_transform_node(pred);
4046 if (is_ia32_Load(new_pred)) {
4049 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4051 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4052 case pn_Load_X_regular:
4053 return new_rd_Jmp(dbgi, irg, block);
4054 case pn_Load_X_except:
4055 /* This Load might raise an exception. Mark it. */
4056 set_ia32_exc_label(new_pred, 1);
4057 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4061 } else if (is_ia32_Conv_I2I(new_pred) ||
4062 is_ia32_Conv_I2I8Bit(new_pred)) {
4063 set_irn_mode(new_pred, mode_T);
4064 if (proj == pn_Load_res) {
4065 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4066 } else if (proj == pn_Load_M) {
4067 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4069 } else if (is_ia32_xLoad(new_pred)) {
4072 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4074 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4075 case pn_Load_X_regular:
4076 return new_rd_Jmp(dbgi, irg, block);
4077 case pn_Load_X_except:
4078 /* This Load might raise an exception. Mark it. */
4079 set_ia32_exc_label(new_pred, 1);
4080 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4084 } else if (is_ia32_vfld(new_pred)) {
4087 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4089 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4090 case pn_Load_X_regular:
4091 return new_rd_Jmp(dbgi, irg, block);
4092 case pn_Load_X_except:
4093 /* This Load might raise an exception. Mark it. */
4094 set_ia32_exc_label(new_pred, 1);
4095 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4100 /* can happen for ProJMs when source address mode happened for the
4103 /* however it should not be the result proj, as that would mean the
4104 load had multiple users and should not have been used for
4106 if (proj != pn_Load_M) {
4107 panic("internal error: transformed node not a Load");
4109 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4112 panic("No idea how to transform proj");
4116 * Transform and renumber the Projs from a DivMod like instruction.
4118 static ir_node *gen_Proj_DivMod(ir_node *node) {
4119 ir_node *block = be_transform_node(get_nodes_block(node));
4120 ir_node *pred = get_Proj_pred(node);
4121 ir_node *new_pred = be_transform_node(pred);
4122 ir_graph *irg = current_ir_graph;
4123 dbg_info *dbgi = get_irn_dbg_info(node);
4124 long proj = get_Proj_proj(node);
4126 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4128 switch (get_irn_opcode(pred)) {
4132 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4134 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4135 case pn_Div_X_regular:
4136 return new_rd_Jmp(dbgi, irg, block);
4137 case pn_Div_X_except:
4138 set_ia32_exc_label(new_pred, 1);
4139 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4147 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4149 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4150 case pn_Mod_X_except:
4151 set_ia32_exc_label(new_pred, 1);
4152 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4160 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4161 case pn_DivMod_res_div:
4162 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4163 case pn_DivMod_res_mod:
4164 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4165 case pn_DivMod_X_regular:
4166 return new_rd_Jmp(dbgi, irg, block);
4167 case pn_DivMod_X_except:
4168 set_ia32_exc_label(new_pred, 1);
4169 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4178 panic("No idea how to transform proj->DivMod");
4182 * Transform and renumber the Projs from a CopyB.
4184 static ir_node *gen_Proj_CopyB(ir_node *node) {
4185 ir_node *block = be_transform_node(get_nodes_block(node));
4186 ir_node *pred = get_Proj_pred(node);
4187 ir_node *new_pred = be_transform_node(pred);
4188 ir_graph *irg = current_ir_graph;
4189 dbg_info *dbgi = get_irn_dbg_info(node);
4190 long proj = get_Proj_proj(node);
4193 case pn_CopyB_M_regular:
4194 if (is_ia32_CopyB_i(new_pred)) {
4195 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4196 } else if (is_ia32_CopyB(new_pred)) {
4197 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4204 panic("No idea how to transform proj->CopyB");
4208 * Transform and renumber the Projs from a Quot.
4210 static ir_node *gen_Proj_Quot(ir_node *node) {
4211 ir_node *block = be_transform_node(get_nodes_block(node));
4212 ir_node *pred = get_Proj_pred(node);
4213 ir_node *new_pred = be_transform_node(pred);
4214 ir_graph *irg = current_ir_graph;
4215 dbg_info *dbgi = get_irn_dbg_info(node);
4216 long proj = get_Proj_proj(node);
4220 if (is_ia32_xDiv(new_pred)) {
4221 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4222 } else if (is_ia32_vfdiv(new_pred)) {
4223 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4227 if (is_ia32_xDiv(new_pred)) {
4228 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4229 } else if (is_ia32_vfdiv(new_pred)) {
4230 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4233 case pn_Quot_X_regular:
4234 case pn_Quot_X_except:
4239 panic("No idea how to transform proj->Quot");
4242 static ir_node *gen_be_Call(ir_node *node) {
4243 ir_node *res = be_duplicate_node(node);
4246 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4248 /* Run the x87 simulator if the call returns a float value */
4249 call_tp = be_Call_get_type(node);
4250 if (get_method_n_ress(call_tp) > 0) {
4251 ir_type *const res_type = get_method_res_type(call_tp, 0);
4252 ir_mode *const res_mode = get_type_mode(res_type);
4254 if (res_mode != NULL && mode_is_float(res_mode)) {
4255 env_cg->do_x87_sim = 1;
4262 static ir_node *gen_be_IncSP(ir_node *node) {
4263 ir_node *res = be_duplicate_node(node);
4264 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4270 * Transform the Projs from a be_Call.
4272 static ir_node *gen_Proj_be_Call(ir_node *node) {
4273 ir_node *block = be_transform_node(get_nodes_block(node));
4274 ir_node *call = get_Proj_pred(node);
4275 ir_node *new_call = be_transform_node(call);
4276 ir_graph *irg = current_ir_graph;
4277 dbg_info *dbgi = get_irn_dbg_info(node);
4278 ir_type *method_type = be_Call_get_type(call);
4279 int n_res = get_method_n_ress(method_type);
4280 long proj = get_Proj_proj(node);
4281 ir_mode *mode = get_irn_mode(node);
4283 const arch_register_class_t *cls;
4285 /* The following is kinda tricky: If we're using SSE, then we have to
4286 * move the result value of the call in floating point registers to an
4287 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4288 * after the call, we have to make sure to correctly make the
4289 * MemProj and the result Proj use these 2 nodes
4291 if (proj == pn_be_Call_M_regular) {
4292 // get new node for result, are we doing the sse load/store hack?
4293 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4294 ir_node *call_res_new;
4295 ir_node *call_res_pred = NULL;
4297 if (call_res != NULL) {
4298 call_res_new = be_transform_node(call_res);
4299 call_res_pred = get_Proj_pred(call_res_new);
4302 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4303 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4304 pn_be_Call_M_regular);
4306 assert(is_ia32_xLoad(call_res_pred));
4307 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4311 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4312 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4314 ir_node *frame = get_irg_frame(irg);
4315 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4317 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4320 /* in case there is no memory output: create one to serialize the copy
4322 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4323 pn_be_Call_M_regular);
4324 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4325 pn_be_Call_first_res);
4327 /* store st(0) onto stack */
4328 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4330 set_ia32_op_type(fstp, ia32_AddrModeD);
4331 set_ia32_use_frame(fstp);
4333 /* load into SSE register */
4334 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4336 set_ia32_op_type(sse_load, ia32_AddrModeS);
4337 set_ia32_use_frame(sse_load);
4339 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4345 /* transform call modes */
4346 if (mode_is_data(mode)) {
4347 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4351 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4355 * Transform the Projs from a Cmp.
4357 static ir_node *gen_Proj_Cmp(ir_node *node)
4359 /* this probably means not all mode_b nodes were lowered... */
4360 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4365 * Transform the Projs from a Bound.
4367 static ir_node *gen_Proj_Bound(ir_node *node)
4369 ir_node *new_node, *block;
4370 ir_node *pred = get_Proj_pred(node);
4372 switch (get_Proj_proj(node)) {
4374 return be_transform_node(get_Bound_mem(pred));
4375 case pn_Bound_X_regular:
4376 new_node = be_transform_node(pred);
4377 block = get_nodes_block(new_node);
4378 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
4379 case pn_Bound_X_except:
4380 new_node = be_transform_node(pred);
4381 block = get_nodes_block(new_node);
4382 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
4384 return be_transform_node(get_Bound_index(pred));
4386 panic("unsupported Proj from Bound");
4390 static ir_node *gen_Proj_ASM(ir_node *node)
4396 if (get_irn_mode(node) != mode_M)
4397 return be_duplicate_node(node);
4399 pred = get_Proj_pred(node);
4400 new_pred = be_transform_node(pred);
4401 block = get_nodes_block(new_pred);
4402 return new_r_Proj(current_ir_graph, block, new_pred, mode_M,
4403 get_ia32_n_res(new_pred) + 1);
4407 * Transform and potentially renumber Proj nodes.
4409 static ir_node *gen_Proj(ir_node *node) {
4410 ir_node *pred = get_Proj_pred(node);
4413 switch (get_irn_opcode(pred)) {
4415 proj = get_Proj_proj(node);
4416 if (proj == pn_Store_M) {
4417 return be_transform_node(pred);
4419 panic("No idea how to transform proj->Store");
4422 return gen_Proj_Load(node);
4424 return gen_Proj_ASM(node);
4428 return gen_Proj_DivMod(node);
4430 return gen_Proj_CopyB(node);
4432 return gen_Proj_Quot(node);
4434 return gen_Proj_be_SubSP(node);
4436 return gen_Proj_be_AddSP(node);
4438 return gen_Proj_be_Call(node);
4440 return gen_Proj_Cmp(node);
4442 return gen_Proj_Bound(node);
4444 proj = get_Proj_proj(node);
4445 if (proj == pn_Start_X_initial_exec) {
4446 ir_node *block = get_nodes_block(pred);
4447 dbg_info *dbgi = get_irn_dbg_info(node);
4450 /* we exchange the ProjX with a jump */
4451 block = be_transform_node(block);
4452 jump = new_rd_Jmp(dbgi, current_ir_graph, block);
4455 if (node == be_get_old_anchor(anchor_tls)) {
4456 return gen_Proj_tls(node);
4461 if (is_ia32_l_FloattoLL(pred)) {
4462 return gen_Proj_l_FloattoLL(node);
4464 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4468 ir_mode *mode = get_irn_mode(node);
4469 if (ia32_mode_needs_gp_reg(mode)) {
4470 ir_node *new_pred = be_transform_node(pred);
4471 ir_node *block = be_transform_node(get_nodes_block(node));
4472 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
4473 mode_Iu, get_Proj_proj(node));
4474 #ifdef DEBUG_libfirm
4475 new_proj->node_nr = node->node_nr;
4481 return be_duplicate_node(node);
4485 * Enters all transform functions into the generic pointer
4487 static void register_transformers(void)
4491 /* first clear the generic function pointer for all ops */
4492 clear_irp_opcodes_generic_func();
4494 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4495 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4533 /* transform ops from intrinsic lowering */
4545 GEN(ia32_l_LLtoFloat);
4546 GEN(ia32_l_FloattoLL);
4552 /* we should never see these nodes */
4567 /* handle generic backend nodes */
4576 op_Mulh = get_op_Mulh();
4585 * Pre-transform all unknown and noreg nodes.
4587 static void ia32_pretransform_node(void *arch_cg) {
4588 ia32_code_gen_t *cg = arch_cg;
4590 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4591 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4592 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4593 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4594 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4595 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4600 * Walker, checks if all ia32 nodes producing more than one result have their
4601 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
4603 static void add_missing_keep_walker(ir_node *node, void *data)
4606 unsigned found_projs = 0;
4607 const ir_edge_t *edge;
4608 ir_mode *mode = get_irn_mode(node);
4613 if(!is_ia32_irn(node))
4616 n_outs = get_ia32_n_res(node);
4619 if(is_ia32_SwitchJmp(node))
4622 assert(n_outs < (int) sizeof(unsigned) * 8);
4623 foreach_out_edge(node, edge) {
4624 ir_node *proj = get_edge_src_irn(edge);
4625 int pn = get_Proj_proj(proj);
4627 if (get_irn_mode(proj) == mode_M)
4630 assert(pn < n_outs);
4631 found_projs |= 1 << pn;
4635 /* are keeps missing? */
4637 for(i = 0; i < n_outs; ++i) {
4640 const arch_register_req_t *req;
4641 const arch_register_class_t *cls;
4643 if(found_projs & (1 << i)) {
4647 req = get_ia32_out_req(node, i);
4652 if(cls == &ia32_reg_classes[CLASS_ia32_flags]) {
4656 block = get_nodes_block(node);
4657 in[0] = new_r_Proj(current_ir_graph, block, node,
4658 arch_register_class_mode(cls), i);
4659 if(last_keep != NULL) {
4660 be_Keep_add_node(last_keep, cls, in[0]);
4662 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
4663 if(sched_is_scheduled(node)) {
4664 sched_add_after(node, last_keep);
4671 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
4674 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
4676 ir_graph *irg = be_get_birg_irg(cg->birg);
4677 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
4680 /* do the transformation */
4681 void ia32_transform_graph(ia32_code_gen_t *cg) {
4683 ir_graph *irg = cg->irg;
4685 register_transformers();
4687 initial_fpcw = NULL;
4689 BE_TIMER_PUSH(t_heights);
4690 heights = heights_new(irg);
4691 BE_TIMER_POP(t_heights);
4692 ia32_calculate_non_address_mode_nodes(cg->birg);
4694 /* the transform phase is not safe for CSE (yet) because several nodes get
4695 * attributes set after their creation */
4696 cse_last = get_opt_cse();
4699 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
4701 set_opt_cse(cse_last);
4703 ia32_free_non_address_mode_nodes();
4704 heights_free(heights);
4708 void ia32_init_transform(void)
4710 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");