2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
36 #include "irgraph_t.h"
41 #include "iredges_t.h"
54 #include "../benode_t.h"
55 #include "../besched.h"
57 #include "../beutil.h"
58 #include "../beirg_t.h"
59 #include "../betranshlp.h"
62 #include "bearch_ia32_t.h"
63 #include "ia32_common_transform.h"
64 #include "ia32_nodes_attr.h"
65 #include "ia32_transform.h"
66 #include "ia32_new_nodes.h"
67 #include "ia32_map_regs.h"
68 #include "ia32_dbg_stat.h"
69 #include "ia32_optimize.h"
70 #include "ia32_util.h"
71 #include "ia32_address_mode.h"
72 #include "ia32_architecture.h"
74 #include "gen_ia32_regalloc_if.h"
76 #define SFP_SIGN "0x80000000"
77 #define DFP_SIGN "0x8000000000000000"
78 #define SFP_ABS "0x7FFFFFFF"
79 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
80 #define DFP_INTMAX "9223372036854775807"
82 #define TP_SFP_SIGN "ia32_sfp_sign"
83 #define TP_DFP_SIGN "ia32_dfp_sign"
84 #define TP_SFP_ABS "ia32_sfp_abs"
85 #define TP_DFP_ABS "ia32_dfp_abs"
86 #define TP_INT_MAX "ia32_int_max"
88 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
89 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
90 #define ENT_SFP_ABS "IA32_SFP_ABS"
91 #define ENT_DFP_ABS "IA32_DFP_ABS"
92 #define ENT_INT_MAX "IA32_INT_MAX"
94 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
95 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
97 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
99 static ir_node *initial_fpcw = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *create_immediate_or_transform(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
132 dbg_info *dbgi, ir_node *block,
133 ir_node *op, ir_node *orig_node);
135 /** Return non-zero is a node represents the 0 constant. */
136 static bool is_Const_0(ir_node *node) {
137 return is_Const(node) && is_Const_null(node);
140 /** Return non-zero is a node represents the 1 constant. */
141 static bool is_Const_1(ir_node *node) {
142 return is_Const(node) && is_Const_one(node);
145 /** Return non-zero is a node represents the -1 constant. */
146 static bool is_Const_Minus_1(ir_node *node) {
147 return is_Const(node) && is_Const_all_one(node);
151 * returns true if constant can be created with a simple float command
153 static bool is_simple_x87_Const(ir_node *node)
155 tarval *tv = get_Const_tarval(node);
156 if (tarval_is_null(tv) || tarval_is_one(tv))
159 /* TODO: match all the other float constants */
164 * returns true if constant can be created with a simple float command
166 static bool is_simple_sse_Const(ir_node *node)
168 tarval *tv = get_Const_tarval(node);
169 ir_mode *mode = get_tarval_mode(tv);
174 if (tarval_is_null(tv) || tarval_is_one(tv))
177 if (mode == mode_D) {
178 unsigned val = get_tarval_sub_bits(tv, 0) |
179 (get_tarval_sub_bits(tv, 1) << 8) |
180 (get_tarval_sub_bits(tv, 2) << 16) |
181 (get_tarval_sub_bits(tv, 3) << 24);
183 /* lower 32bit are zero, really a 32bit constant */
187 /* TODO: match all the other float constants */
192 * Transforms a Const.
194 static ir_node *gen_Const(ir_node *node) {
195 ir_graph *irg = current_ir_graph;
196 ir_node *old_block = get_nodes_block(node);
197 ir_node *block = be_transform_node(old_block);
198 dbg_info *dbgi = get_irn_dbg_info(node);
199 ir_mode *mode = get_irn_mode(node);
201 assert(is_Const(node));
203 if (mode_is_float(mode)) {
205 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
206 ir_node *nomem = new_NoMem();
210 if (ia32_cg_config.use_sse2) {
211 tarval *tv = get_Const_tarval(node);
212 if (tarval_is_null(tv)) {
213 load = new_rd_ia32_xZero(dbgi, irg, block);
214 set_ia32_ls_mode(load, mode);
216 } else if (tarval_is_one(tv)) {
217 int cnst = mode == mode_F ? 26 : 55;
218 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
219 ir_node *imm2 = create_Immediate(NULL, 0, 2);
220 ir_node *pslld, *psrld;
222 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
223 set_ia32_ls_mode(load, mode);
224 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
225 set_ia32_ls_mode(pslld, mode);
226 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
227 set_ia32_ls_mode(psrld, mode);
229 } else if (mode == mode_F) {
230 /* we can place any 32bit constant by using a movd gp, sse */
231 unsigned val = get_tarval_sub_bits(tv, 0) |
232 (get_tarval_sub_bits(tv, 1) << 8) |
233 (get_tarval_sub_bits(tv, 2) << 16) |
234 (get_tarval_sub_bits(tv, 3) << 24);
235 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
236 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
237 set_ia32_ls_mode(load, mode);
240 if (mode == mode_D) {
241 unsigned val = get_tarval_sub_bits(tv, 0) |
242 (get_tarval_sub_bits(tv, 1) << 8) |
243 (get_tarval_sub_bits(tv, 2) << 16) |
244 (get_tarval_sub_bits(tv, 3) << 24);
246 ir_node *imm32 = create_Immediate(NULL, 0, 32);
247 ir_node *cnst, *psllq;
249 /* fine, lower 32bit are zero, produce 32bit value */
250 val = get_tarval_sub_bits(tv, 4) |
251 (get_tarval_sub_bits(tv, 5) << 8) |
252 (get_tarval_sub_bits(tv, 6) << 16) |
253 (get_tarval_sub_bits(tv, 7) << 24);
254 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
255 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
256 set_ia32_ls_mode(load, mode);
257 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
258 set_ia32_ls_mode(psllq, mode);
263 floatent = create_float_const_entity(node);
265 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
267 set_ia32_op_type(load, ia32_AddrModeS);
268 set_ia32_am_sc(load, floatent);
269 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
270 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
273 if (is_Const_null(node)) {
274 load = new_rd_ia32_vfldz(dbgi, irg, block);
276 set_ia32_ls_mode(load, mode);
277 } else if (is_Const_one(node)) {
278 load = new_rd_ia32_vfld1(dbgi, irg, block);
280 set_ia32_ls_mode(load, mode);
282 floatent = create_float_const_entity(node);
284 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
285 set_ia32_op_type(load, ia32_AddrModeS);
286 set_ia32_am_sc(load, floatent);
287 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
288 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
289 /* take the mode from the entity */
290 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
294 /* Const Nodes before the initial IncSP are a bad idea, because
295 * they could be spilled and we have no SP ready at that point yet.
296 * So add a dependency to the initial frame pointer calculation to
297 * avoid that situation.
299 if (get_irg_start_block(irg) == block) {
300 add_irn_dep(load, get_irg_frame(irg));
303 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
305 } else { /* non-float mode */
307 tarval *tv = get_Const_tarval(node);
310 tv = tarval_convert_to(tv, mode_Iu);
312 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
314 panic("couldn't convert constant tarval (%+F)", node);
316 val = get_tarval_long(tv);
318 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
319 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
322 if (get_irg_start_block(irg) == block) {
323 add_irn_dep(cnst, get_irg_frame(irg));
331 * Transforms a SymConst.
333 static ir_node *gen_SymConst(ir_node *node) {
334 ir_graph *irg = current_ir_graph;
335 ir_node *old_block = get_nodes_block(node);
336 ir_node *block = be_transform_node(old_block);
337 dbg_info *dbgi = get_irn_dbg_info(node);
338 ir_mode *mode = get_irn_mode(node);
341 if (mode_is_float(mode)) {
342 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
343 ir_node *nomem = new_NoMem();
345 if (ia32_cg_config.use_sse2)
346 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
348 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
349 set_ia32_am_sc(cnst, get_SymConst_entity(node));
350 set_ia32_use_frame(cnst);
354 if(get_SymConst_kind(node) != symconst_addr_ent) {
355 panic("backend only support symconst_addr_ent (at %+F)", node);
357 entity = get_SymConst_entity(node);
358 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
361 /* Const Nodes before the initial IncSP are a bad idea, because
362 * they could be spilled and we have no SP ready at that point yet
364 if (get_irg_start_block(irg) == block) {
365 add_irn_dep(cnst, get_irg_frame(irg));
368 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
373 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
374 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
375 static const struct {
377 const char *ent_name;
378 const char *cnst_str;
381 } names [ia32_known_const_max] = {
382 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
383 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
384 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
385 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
386 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
388 static ir_entity *ent_cache[ia32_known_const_max];
390 const char *tp_name, *ent_name, *cnst_str;
398 ent_name = names[kct].ent_name;
399 if (! ent_cache[kct]) {
400 tp_name = names[kct].tp_name;
401 cnst_str = names[kct].cnst_str;
403 switch (names[kct].mode) {
404 case 0: mode = mode_Iu; break;
405 case 1: mode = mode_Lu; break;
406 default: mode = mode_F; break;
408 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
409 tp = new_type_primitive(new_id_from_str(tp_name), mode);
410 /* set the specified alignment */
411 set_type_alignment_bytes(tp, names[kct].align);
413 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
415 set_entity_ld_ident(ent, get_entity_ident(ent));
416 set_entity_visibility(ent, visibility_local);
417 set_entity_variability(ent, variability_constant);
418 set_entity_allocation(ent, allocation_static);
420 /* we create a new entity here: It's initialization must resist on the
422 rem = current_ir_graph;
423 current_ir_graph = get_const_code_irg();
424 cnst = new_Const(mode, tv);
425 current_ir_graph = rem;
427 set_atomic_ent_value(ent, cnst);
429 /* cache the entry */
430 ent_cache[kct] = ent;
433 return ent_cache[kct];
437 * return true if the node is a Proj(Load) and could be used in source address
438 * mode for another node. Will return only true if the @p other node is not
439 * dependent on the memory of the Load (for binary operations use the other
440 * input here, for unary operations use NULL).
442 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
443 ir_node *other, ir_node *other2, match_flags_t flags)
448 /* float constants are always available */
449 if (is_Const(node)) {
450 ir_mode *mode = get_irn_mode(node);
451 if (mode_is_float(mode)) {
452 if (ia32_cg_config.use_sse2) {
453 if (is_simple_sse_Const(node))
456 if (is_simple_x87_Const(node))
459 if (get_irn_n_edges(node) > 1)
467 load = get_Proj_pred(node);
468 pn = get_Proj_proj(node);
469 if (!is_Load(load) || pn != pn_Load_res)
471 if (get_nodes_block(load) != block)
473 /* we only use address mode if we're the only user of the load */
474 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
476 /* in some edge cases with address mode we might reach the load normally
477 * and through some AM sequence, if it is already materialized then we
478 * can't create an AM node from it */
479 if (be_is_transformed(node))
482 /* don't do AM if other node inputs depend on the load (via mem-proj) */
483 if (other != NULL && prevents_AM(block, load, other))
486 if (other2 != NULL && prevents_AM(block, load, other2))
492 typedef struct ia32_address_mode_t ia32_address_mode_t;
493 struct ia32_address_mode_t {
498 ia32_op_type_t op_type;
502 unsigned commutative : 1;
503 unsigned ins_permuted : 1;
506 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
510 /* construct load address */
511 memset(addr, 0, sizeof(addr[0]));
512 ia32_create_address_mode(addr, ptr, /*force=*/0);
514 noreg_gp = ia32_new_NoReg_gp(env_cg);
515 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
516 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
517 addr->mem = be_transform_node(mem);
520 static void build_address(ia32_address_mode_t *am, ir_node *node)
522 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
523 ia32_address_t *addr = &am->addr;
529 if (is_Const(node)) {
530 ir_entity *entity = create_float_const_entity(node);
531 addr->base = noreg_gp;
532 addr->index = noreg_gp;
533 addr->mem = new_NoMem();
534 addr->symconst_ent = entity;
536 am->ls_mode = get_type_mode(get_entity_type(entity));
537 am->pinned = op_pin_state_floats;
541 load = get_Proj_pred(node);
542 ptr = get_Load_ptr(load);
543 mem = get_Load_mem(load);
544 new_mem = be_transform_node(mem);
545 am->pinned = get_irn_pinned(load);
546 am->ls_mode = get_Load_mode(load);
547 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
550 /* construct load address */
551 ia32_create_address_mode(addr, ptr, /*force=*/0);
553 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
554 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
558 static void set_address(ir_node *node, const ia32_address_t *addr)
560 set_ia32_am_scale(node, addr->scale);
561 set_ia32_am_sc(node, addr->symconst_ent);
562 set_ia32_am_offs_int(node, addr->offset);
563 if(addr->symconst_sign)
564 set_ia32_am_sc_sign(node);
566 set_ia32_use_frame(node);
567 set_ia32_frame_ent(node, addr->frame_entity);
571 * Apply attributes of a given address mode to a node.
573 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
575 set_address(node, &am->addr);
577 set_ia32_op_type(node, am->op_type);
578 set_ia32_ls_mode(node, am->ls_mode);
579 if (am->pinned == op_pin_state_pinned) {
580 /* beware: some nodes are already pinned and did not allow to change the state */
581 if (get_irn_pinned(node) != op_pin_state_pinned)
582 set_irn_pinned(node, op_pin_state_pinned);
585 set_ia32_commutative(node);
589 * Check, if a given node is a Down-Conv, ie. a integer Conv
590 * from a mode with a mode with more bits to a mode with lesser bits.
591 * Moreover, we return only true if the node has not more than 1 user.
593 * @param node the node
594 * @return non-zero if node is a Down-Conv
596 static int is_downconv(const ir_node *node)
604 /* we only want to skip the conv when we're the only user
605 * (not optimal but for now...)
607 if(get_irn_n_edges(node) > 1)
610 src_mode = get_irn_mode(get_Conv_op(node));
611 dest_mode = get_irn_mode(node);
612 return ia32_mode_needs_gp_reg(src_mode)
613 && ia32_mode_needs_gp_reg(dest_mode)
614 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
617 /* Skip all Down-Conv's on a given node and return the resulting node. */
618 ir_node *ia32_skip_downconv(ir_node *node) {
619 while (is_downconv(node))
620 node = get_Conv_op(node);
625 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
627 ir_mode *mode = get_irn_mode(node);
632 if(mode_is_signed(mode)) {
637 block = get_nodes_block(node);
638 dbgi = get_irn_dbg_info(node);
640 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
644 * matches operands of a node into ia32 addressing/operand modes. This covers
645 * usage of source address mode, immediates, operations with non 32-bit modes,
647 * The resulting data is filled into the @p am struct. block is the block
648 * of the node whose arguments are matched. op1, op2 are the first and second
649 * input that are matched (op1 may be NULL). other_op is another unrelated
650 * input that is not matched! but which is needed sometimes to check if AM
651 * for op1/op2 is legal.
652 * @p flags describes the supported modes of the operation in detail.
654 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
655 ir_node *op1, ir_node *op2, ir_node *other_op,
658 ia32_address_t *addr = &am->addr;
659 ir_mode *mode = get_irn_mode(op2);
660 int mode_bits = get_mode_size_bits(mode);
661 ir_node *noreg_gp, *new_op1, *new_op2;
663 unsigned commutative;
664 int use_am_and_immediates;
667 memset(am, 0, sizeof(am[0]));
669 commutative = (flags & match_commutative) != 0;
670 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
671 use_am = (flags & match_am) != 0;
672 use_immediate = (flags & match_immediate) != 0;
673 assert(!use_am_and_immediates || use_immediate);
676 assert(!commutative || op1 != NULL);
677 assert(use_am || !(flags & match_8bit_am));
678 assert(use_am || !(flags & match_16bit_am));
680 if (mode_bits == 8) {
681 if (!(flags & match_8bit_am))
683 /* we don't automatically add upconvs yet */
684 assert((flags & match_mode_neutral) || (flags & match_8bit));
685 } else if (mode_bits == 16) {
686 if (!(flags & match_16bit_am))
688 /* we don't automatically add upconvs yet */
689 assert((flags & match_mode_neutral) || (flags & match_16bit));
692 /* we can simply skip downconvs for mode neutral nodes: the upper bits
693 * can be random for these operations */
694 if (flags & match_mode_neutral) {
695 op2 = ia32_skip_downconv(op2);
697 op1 = ia32_skip_downconv(op1);
701 /* match immediates. firm nodes are normalized: constants are always on the
704 if (!(flags & match_try_am) && use_immediate) {
705 new_op2 = try_create_Immediate(op2, 0);
708 noreg_gp = ia32_new_NoReg_gp(env_cg);
709 if (new_op2 == NULL &&
710 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
711 build_address(am, op2);
712 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
713 if (mode_is_float(mode)) {
714 new_op2 = ia32_new_NoReg_vfp(env_cg);
718 am->op_type = ia32_AddrModeS;
719 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
721 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
723 build_address(am, op1);
725 if (mode_is_float(mode)) {
726 noreg = ia32_new_NoReg_vfp(env_cg);
731 if (new_op2 != NULL) {
734 new_op1 = be_transform_node(op2);
736 am->ins_permuted = 1;
738 am->op_type = ia32_AddrModeS;
740 am->op_type = ia32_Normal;
742 if (flags & match_try_am) {
748 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
750 new_op2 = be_transform_node(op2);
752 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
754 if (addr->base == NULL)
755 addr->base = noreg_gp;
756 if (addr->index == NULL)
757 addr->index = noreg_gp;
758 if (addr->mem == NULL)
759 addr->mem = new_NoMem();
761 am->new_op1 = new_op1;
762 am->new_op2 = new_op2;
763 am->commutative = commutative;
766 static void set_transformed_and_mark(ir_node *const old_node, ir_node *const new_node)
768 mark_irn_visited(old_node);
769 be_set_transformed_node(old_node, new_node);
772 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
777 if (am->mem_proj == NULL)
780 /* we have to create a mode_T so the old MemProj can attach to us */
781 mode = get_irn_mode(node);
782 load = get_Proj_pred(am->mem_proj);
784 set_transformed_and_mark(load, node);
786 if (mode != mode_T) {
787 set_irn_mode(node, mode_T);
788 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
795 * Construct a standard binary operation, set AM and immediate if required.
797 * @param node The original node for which the binop is created
798 * @param op1 The first operand
799 * @param op2 The second operand
800 * @param func The node constructor function
801 * @return The constructed ia32 node.
803 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
804 construct_binop_func *func, match_flags_t flags)
807 ir_node *block, *new_block, *new_node;
808 ia32_address_mode_t am;
809 ia32_address_t *addr = &am.addr;
811 block = get_nodes_block(node);
812 match_arguments(&am, block, op1, op2, NULL, flags);
814 dbgi = get_irn_dbg_info(node);
815 new_block = be_transform_node(block);
816 new_node = func(dbgi, current_ir_graph, new_block,
817 addr->base, addr->index, addr->mem,
818 am.new_op1, am.new_op2);
819 set_am_attributes(new_node, &am);
820 /* we can't use source address mode anymore when using immediates */
821 if (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
822 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
823 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
825 new_node = fix_mem_proj(new_node, &am);
832 n_ia32_l_binop_right,
833 n_ia32_l_binop_eflags
835 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
836 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
837 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
838 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
839 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
840 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
843 * Construct a binary operation which also consumes the eflags.
845 * @param node The node to transform
846 * @param func The node constructor function
847 * @param flags The match flags
848 * @return The constructor ia32 node
850 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
853 ir_node *src_block = get_nodes_block(node);
854 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
855 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
856 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
858 ir_node *block, *new_node, *new_eflags;
859 ia32_address_mode_t am;
860 ia32_address_t *addr = &am.addr;
862 match_arguments(&am, src_block, op1, op2, eflags, flags);
864 dbgi = get_irn_dbg_info(node);
865 block = be_transform_node(src_block);
866 new_eflags = be_transform_node(eflags);
867 new_node = func(dbgi, current_ir_graph, block, addr->base, addr->index,
868 addr->mem, am.new_op1, am.new_op2, new_eflags);
869 set_am_attributes(new_node, &am);
870 /* we can't use source address mode anymore when using immediates */
871 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
872 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
873 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
875 new_node = fix_mem_proj(new_node, &am);
880 static ir_node *get_fpcw(void)
883 if (initial_fpcw != NULL)
886 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
887 &ia32_fp_cw_regs[REG_FPCW]);
888 initial_fpcw = be_transform_node(fpcw);
894 * Construct a standard binary operation, set AM and immediate if required.
896 * @param op1 The first operand
897 * @param op2 The second operand
898 * @param func The node constructor function
899 * @return The constructed ia32 node.
901 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
902 construct_binop_float_func *func,
905 ir_mode *mode = get_irn_mode(node);
907 ir_node *block, *new_block, *new_node;
908 ia32_address_mode_t am;
909 ia32_address_t *addr = &am.addr;
911 /* cannot use address mode with long double on x87 */
912 if (get_mode_size_bits(mode) > 64)
915 block = get_nodes_block(node);
916 match_arguments(&am, block, op1, op2, NULL, flags);
918 dbgi = get_irn_dbg_info(node);
919 new_block = be_transform_node(block);
920 new_node = func(dbgi, current_ir_graph, new_block,
921 addr->base, addr->index, addr->mem,
922 am.new_op1, am.new_op2, get_fpcw());
923 set_am_attributes(new_node, &am);
925 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
927 new_node = fix_mem_proj(new_node, &am);
933 * Construct a shift/rotate binary operation, sets AM and immediate if required.
935 * @param op1 The first operand
936 * @param op2 The second operand
937 * @param func The node constructor function
938 * @return The constructed ia32 node.
940 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
941 construct_shift_func *func,
945 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
947 assert(! mode_is_float(get_irn_mode(node)));
948 assert(flags & match_immediate);
949 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
951 if (flags & match_mode_neutral) {
952 op1 = ia32_skip_downconv(op1);
953 new_op1 = be_transform_node(op1);
954 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
955 new_op1 = create_upconv(op1, node);
957 new_op1 = be_transform_node(op1);
960 /* the shift amount can be any mode that is bigger than 5 bits, since all
961 * other bits are ignored anyway */
962 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
963 ir_node *const op = get_Conv_op(op2);
964 if (mode_is_float(get_irn_mode(op)))
967 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
969 new_op2 = create_immediate_or_transform(op2, 0);
971 dbgi = get_irn_dbg_info(node);
972 block = get_nodes_block(node);
973 new_block = be_transform_node(block);
974 new_node = func(dbgi, current_ir_graph, new_block, new_op1, new_op2);
975 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
977 /* lowered shift instruction may have a dependency operand, handle it here */
978 if (get_irn_arity(node) == 3) {
979 /* we have a dependency */
980 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
981 add_irn_dep(new_node, new_dep);
989 * Construct a standard unary operation, set AM and immediate if required.
991 * @param op The operand
992 * @param func The node constructor function
993 * @return The constructed ia32 node.
995 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
999 ir_node *block, *new_block, *new_op, *new_node;
1001 assert(flags == 0 || flags == match_mode_neutral);
1002 if (flags & match_mode_neutral) {
1003 op = ia32_skip_downconv(op);
1006 new_op = be_transform_node(op);
1007 dbgi = get_irn_dbg_info(node);
1008 block = get_nodes_block(node);
1009 new_block = be_transform_node(block);
1010 new_node = func(dbgi, current_ir_graph, new_block, new_op);
1012 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1017 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1018 ia32_address_t *addr)
1020 ir_node *base, *index, *res;
1024 base = ia32_new_NoReg_gp(env_cg);
1026 base = be_transform_node(base);
1029 index = addr->index;
1030 if (index == NULL) {
1031 index = ia32_new_NoReg_gp(env_cg);
1033 index = be_transform_node(index);
1036 res = new_rd_ia32_Lea(dbgi, current_ir_graph, block, base, index);
1037 set_address(res, addr);
1043 * Returns non-zero if a given address mode has a symbolic or
1044 * numerical offset != 0.
1046 static int am_has_immediates(const ia32_address_t *addr)
1048 return addr->offset != 0 || addr->symconst_ent != NULL
1049 || addr->frame_entity || addr->use_frame;
1053 * Creates an ia32 Add.
1055 * @return the created ia32 Add node
1057 static ir_node *gen_Add(ir_node *node) {
1058 ir_mode *mode = get_irn_mode(node);
1059 ir_node *op1 = get_Add_left(node);
1060 ir_node *op2 = get_Add_right(node);
1062 ir_node *block, *new_block, *new_node, *add_immediate_op;
1063 ia32_address_t addr;
1064 ia32_address_mode_t am;
1066 if (mode_is_float(mode)) {
1067 if (ia32_cg_config.use_sse2)
1068 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1069 match_commutative | match_am);
1071 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1072 match_commutative | match_am);
1075 ia32_mark_non_am(node);
1077 op2 = ia32_skip_downconv(op2);
1078 op1 = ia32_skip_downconv(op1);
1082 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1083 * 1. Add with immediate -> Lea
1084 * 2. Add with possible source address mode -> Add
1085 * 3. Otherwise -> Lea
1087 memset(&addr, 0, sizeof(addr));
1088 ia32_create_address_mode(&addr, node, /*force=*/1);
1089 add_immediate_op = NULL;
1091 dbgi = get_irn_dbg_info(node);
1092 block = get_nodes_block(node);
1093 new_block = be_transform_node(block);
1096 if(addr.base == NULL && addr.index == NULL) {
1097 ir_graph *irg = current_ir_graph;
1098 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1099 addr.symconst_sign, addr.offset);
1100 add_irn_dep(new_node, get_irg_frame(irg));
1101 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1104 /* add with immediate? */
1105 if(addr.index == NULL) {
1106 add_immediate_op = addr.base;
1107 } else if(addr.base == NULL && addr.scale == 0) {
1108 add_immediate_op = addr.index;
1111 if(add_immediate_op != NULL) {
1112 if(!am_has_immediates(&addr)) {
1113 #ifdef DEBUG_libfirm
1114 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1117 return be_transform_node(add_immediate_op);
1120 new_node = create_lea_from_address(dbgi, new_block, &addr);
1121 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1125 /* test if we can use source address mode */
1126 match_arguments(&am, block, op1, op2, NULL, match_commutative
1127 | match_mode_neutral | match_am | match_immediate | match_try_am);
1129 /* construct an Add with source address mode */
1130 if (am.op_type == ia32_AddrModeS) {
1131 ir_graph *irg = current_ir_graph;
1132 ia32_address_t *am_addr = &am.addr;
1133 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1134 am_addr->index, am_addr->mem, am.new_op1,
1136 set_am_attributes(new_node, &am);
1137 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1139 new_node = fix_mem_proj(new_node, &am);
1144 /* otherwise construct a lea */
1145 new_node = create_lea_from_address(dbgi, new_block, &addr);
1146 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1151 * Creates an ia32 Mul.
1153 * @return the created ia32 Mul node
1155 static ir_node *gen_Mul(ir_node *node) {
1156 ir_node *op1 = get_Mul_left(node);
1157 ir_node *op2 = get_Mul_right(node);
1158 ir_mode *mode = get_irn_mode(node);
1160 if (mode_is_float(mode)) {
1161 if (ia32_cg_config.use_sse2)
1162 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1163 match_commutative | match_am);
1165 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1166 match_commutative | match_am);
1168 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1169 match_commutative | match_am | match_mode_neutral |
1170 match_immediate | match_am_and_immediates);
1174 * Creates an ia32 Mulh.
1175 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1176 * this result while Mul returns the lower 32 bit.
1178 * @return the created ia32 Mulh node
1180 static ir_node *gen_Mulh(ir_node *node)
1182 ir_node *block = get_nodes_block(node);
1183 ir_node *new_block = be_transform_node(block);
1184 ir_graph *irg = current_ir_graph;
1185 dbg_info *dbgi = get_irn_dbg_info(node);
1186 ir_mode *mode = get_irn_mode(node);
1187 ir_node *op1 = get_Mulh_left(node);
1188 ir_node *op2 = get_Mulh_right(node);
1189 ir_node *proj_res_high;
1191 ia32_address_mode_t am;
1192 ia32_address_t *addr = &am.addr;
1194 assert(!mode_is_float(mode) && "Mulh with float not supported");
1195 assert(get_mode_size_bits(mode) == 32);
1197 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1199 if (mode_is_signed(mode)) {
1200 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1201 addr->index, addr->mem, am.new_op1,
1204 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1205 addr->index, addr->mem, am.new_op1,
1209 set_am_attributes(new_node, &am);
1210 /* we can't use source address mode anymore when using immediates */
1211 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1212 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1213 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1215 assert(get_irn_mode(new_node) == mode_T);
1217 fix_mem_proj(new_node, &am);
1219 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1220 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1221 mode_Iu, pn_ia32_IMul1OP_res_high);
1223 return proj_res_high;
1229 * Creates an ia32 And.
1231 * @return The created ia32 And node
1233 static ir_node *gen_And(ir_node *node) {
1234 ir_node *op1 = get_And_left(node);
1235 ir_node *op2 = get_And_right(node);
1236 assert(! mode_is_float(get_irn_mode(node)));
1238 /* is it a zero extension? */
1239 if (is_Const(op2)) {
1240 tarval *tv = get_Const_tarval(op2);
1241 long v = get_tarval_long(tv);
1243 if (v == 0xFF || v == 0xFFFF) {
1244 dbg_info *dbgi = get_irn_dbg_info(node);
1245 ir_node *block = get_nodes_block(node);
1252 assert(v == 0xFFFF);
1255 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1260 return gen_binop(node, op1, op2, new_rd_ia32_And,
1261 match_commutative | match_mode_neutral | match_am
1268 * Creates an ia32 Or.
1270 * @return The created ia32 Or node
1272 static ir_node *gen_Or(ir_node *node) {
1273 ir_node *op1 = get_Or_left(node);
1274 ir_node *op2 = get_Or_right(node);
1276 assert (! mode_is_float(get_irn_mode(node)));
1277 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1278 | match_mode_neutral | match_am | match_immediate);
1284 * Creates an ia32 Eor.
1286 * @return The created ia32 Eor node
1288 static ir_node *gen_Eor(ir_node *node) {
1289 ir_node *op1 = get_Eor_left(node);
1290 ir_node *op2 = get_Eor_right(node);
1292 assert(! mode_is_float(get_irn_mode(node)));
1293 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1294 | match_mode_neutral | match_am | match_immediate);
1299 * Creates an ia32 Sub.
1301 * @return The created ia32 Sub node
1303 static ir_node *gen_Sub(ir_node *node) {
1304 ir_node *op1 = get_Sub_left(node);
1305 ir_node *op2 = get_Sub_right(node);
1306 ir_mode *mode = get_irn_mode(node);
1308 if (mode_is_float(mode)) {
1309 if (ia32_cg_config.use_sse2)
1310 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1312 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1316 if (is_Const(op2)) {
1317 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1321 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1322 | match_am | match_immediate);
1325 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1326 ir_node *const src_val,
1327 ir_node *const src_mem,
1328 ir_node *const am_mem)
1330 if (is_NoMem(am_mem)) {
1331 return be_transform_node(src_mem);
1332 } else if (is_Proj(src_val) &&
1334 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1335 /* avoid memory loop */
1337 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1338 ir_node *const ptr_pred = get_Proj_pred(src_val);
1339 int const arity = get_Sync_n_preds(src_mem);
1344 NEW_ARR_A(ir_node*, ins, arity + 1);
1346 for (i = arity - 1; i >= 0; --i) {
1347 ir_node *const pred = get_Sync_pred(src_mem, i);
1349 /* avoid memory loop */
1350 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1353 ins[n++] = be_transform_node(pred);
1358 return new_r_Sync(irg, block, n, ins);
1362 ins[0] = be_transform_node(src_mem);
1364 return new_r_Sync(irg, block, 2, ins);
1369 * Generates an ia32 DivMod with additional infrastructure for the
1370 * register allocator if needed.
1372 static ir_node *create_Div(ir_node *node)
1374 ir_graph *irg = current_ir_graph;
1375 dbg_info *dbgi = get_irn_dbg_info(node);
1376 ir_node *block = get_nodes_block(node);
1377 ir_node *new_block = be_transform_node(block);
1384 ir_node *sign_extension;
1385 ia32_address_mode_t am;
1386 ia32_address_t *addr = &am.addr;
1388 /* the upper bits have random contents for smaller modes */
1389 switch (get_irn_opcode(node)) {
1391 op1 = get_Div_left(node);
1392 op2 = get_Div_right(node);
1393 mem = get_Div_mem(node);
1394 mode = get_Div_resmode(node);
1397 op1 = get_Mod_left(node);
1398 op2 = get_Mod_right(node);
1399 mem = get_Mod_mem(node);
1400 mode = get_Mod_resmode(node);
1403 op1 = get_DivMod_left(node);
1404 op2 = get_DivMod_right(node);
1405 mem = get_DivMod_mem(node);
1406 mode = get_DivMod_resmode(node);
1409 panic("invalid divmod node %+F", node);
1412 match_arguments(&am, block, op1, op2, NULL, match_am);
1414 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1415 is the memory of the consumed address. We can have only the second op as address
1416 in Div nodes, so check only op2. */
1417 new_mem = transform_AM_mem(irg, block, op2, mem, addr->mem);
1419 if (mode_is_signed(mode)) {
1420 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1421 add_irn_dep(produceval, get_irg_frame(irg));
1422 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1425 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1426 addr->index, new_mem, am.new_op2,
1427 am.new_op1, sign_extension);
1429 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1430 add_irn_dep(sign_extension, get_irg_frame(irg));
1432 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1433 addr->index, new_mem, am.new_op2,
1434 am.new_op1, sign_extension);
1437 set_irn_pinned(new_node, get_irn_pinned(node));
1439 set_am_attributes(new_node, &am);
1440 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1442 new_node = fix_mem_proj(new_node, &am);
1448 static ir_node *gen_Mod(ir_node *node) {
1449 return create_Div(node);
1452 static ir_node *gen_Div(ir_node *node) {
1453 return create_Div(node);
1456 static ir_node *gen_DivMod(ir_node *node) {
1457 return create_Div(node);
1463 * Creates an ia32 floating Div.
1465 * @return The created ia32 xDiv node
1467 static ir_node *gen_Quot(ir_node *node)
1469 ir_node *op1 = get_Quot_left(node);
1470 ir_node *op2 = get_Quot_right(node);
1472 if (ia32_cg_config.use_sse2) {
1473 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1475 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1481 * Creates an ia32 Shl.
1483 * @return The created ia32 Shl node
1485 static ir_node *gen_Shl(ir_node *node) {
1486 ir_node *left = get_Shl_left(node);
1487 ir_node *right = get_Shl_right(node);
1489 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1490 match_mode_neutral | match_immediate);
1494 * Creates an ia32 Shr.
1496 * @return The created ia32 Shr node
1498 static ir_node *gen_Shr(ir_node *node) {
1499 ir_node *left = get_Shr_left(node);
1500 ir_node *right = get_Shr_right(node);
1502 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1508 * Creates an ia32 Sar.
1510 * @return The created ia32 Shrs node
1512 static ir_node *gen_Shrs(ir_node *node) {
1513 ir_node *left = get_Shrs_left(node);
1514 ir_node *right = get_Shrs_right(node);
1515 ir_mode *mode = get_irn_mode(node);
1517 if(is_Const(right) && mode == mode_Is) {
1518 tarval *tv = get_Const_tarval(right);
1519 long val = get_tarval_long(tv);
1521 /* this is a sign extension */
1522 ir_graph *irg = current_ir_graph;
1523 dbg_info *dbgi = get_irn_dbg_info(node);
1524 ir_node *block = be_transform_node(get_nodes_block(node));
1526 ir_node *new_op = be_transform_node(op);
1527 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1528 add_irn_dep(pval, get_irg_frame(irg));
1530 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1534 /* 8 or 16 bit sign extension? */
1535 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1536 ir_node *shl_left = get_Shl_left(left);
1537 ir_node *shl_right = get_Shl_right(left);
1538 if(is_Const(shl_right)) {
1539 tarval *tv1 = get_Const_tarval(right);
1540 tarval *tv2 = get_Const_tarval(shl_right);
1541 if(tv1 == tv2 && tarval_is_long(tv1)) {
1542 long val = get_tarval_long(tv1);
1543 if(val == 16 || val == 24) {
1544 dbg_info *dbgi = get_irn_dbg_info(node);
1545 ir_node *block = get_nodes_block(node);
1555 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1564 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1570 * Creates an ia32 Rol.
1572 * @param op1 The first operator
1573 * @param op2 The second operator
1574 * @return The created ia32 RotL node
1576 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2) {
1577 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1583 * Creates an ia32 Ror.
1584 * NOTE: There is no RotR with immediate because this would always be a RotL
1585 * "imm-mode_size_bits" which can be pre-calculated.
1587 * @param op1 The first operator
1588 * @param op2 The second operator
1589 * @return The created ia32 RotR node
1591 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2) {
1592 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1598 * Creates an ia32 RotR or RotL (depending on the found pattern).
1600 * @return The created ia32 RotL or RotR node
1602 static ir_node *gen_Rotl(ir_node *node) {
1603 ir_node *rotate = NULL;
1604 ir_node *op1 = get_Rotl_left(node);
1605 ir_node *op2 = get_Rotl_right(node);
1607 /* Firm has only RotL, so we are looking for a right (op2)
1608 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1609 that means we can create a RotR instead of an Add and a RotL */
1613 ir_node *left = get_Add_left(add);
1614 ir_node *right = get_Add_right(add);
1615 if (is_Const(right)) {
1616 tarval *tv = get_Const_tarval(right);
1617 ir_mode *mode = get_irn_mode(node);
1618 long bits = get_mode_size_bits(mode);
1620 if (is_Minus(left) &&
1621 tarval_is_long(tv) &&
1622 get_tarval_long(tv) == bits &&
1625 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1626 rotate = gen_Ror(node, op1, get_Minus_op(left));
1631 if (rotate == NULL) {
1632 rotate = gen_Rol(node, op1, op2);
1641 * Transforms a Minus node.
1643 * @return The created ia32 Minus node
1645 static ir_node *gen_Minus(ir_node *node)
1647 ir_node *op = get_Minus_op(node);
1648 ir_node *block = be_transform_node(get_nodes_block(node));
1649 ir_graph *irg = current_ir_graph;
1650 dbg_info *dbgi = get_irn_dbg_info(node);
1651 ir_mode *mode = get_irn_mode(node);
1656 if (mode_is_float(mode)) {
1657 ir_node *new_op = be_transform_node(op);
1658 if (ia32_cg_config.use_sse2) {
1659 /* TODO: non-optimal... if we have many xXors, then we should
1660 * rather create a load for the const and use that instead of
1661 * several AM nodes... */
1662 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1663 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1664 ir_node *nomem = new_rd_NoMem(irg);
1666 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1667 nomem, new_op, noreg_xmm);
1669 size = get_mode_size_bits(mode);
1670 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1672 set_ia32_am_sc(new_node, ent);
1673 set_ia32_op_type(new_node, ia32_AddrModeS);
1674 set_ia32_ls_mode(new_node, mode);
1676 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1679 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1682 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1688 * Transforms a Not node.
1690 * @return The created ia32 Not node
1692 static ir_node *gen_Not(ir_node *node) {
1693 ir_node *op = get_Not_op(node);
1695 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1696 assert (! mode_is_float(get_irn_mode(node)));
1698 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1704 * Transforms an Abs node.
1706 * @return The created ia32 Abs node
1708 static ir_node *gen_Abs(ir_node *node)
1710 ir_node *block = get_nodes_block(node);
1711 ir_node *new_block = be_transform_node(block);
1712 ir_node *op = get_Abs_op(node);
1713 ir_graph *irg = current_ir_graph;
1714 dbg_info *dbgi = get_irn_dbg_info(node);
1715 ir_mode *mode = get_irn_mode(node);
1716 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1717 ir_node *nomem = new_NoMem();
1723 if (mode_is_float(mode)) {
1724 new_op = be_transform_node(op);
1726 if (ia32_cg_config.use_sse2) {
1727 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1728 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1729 nomem, new_op, noreg_fp);
1731 size = get_mode_size_bits(mode);
1732 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1734 set_ia32_am_sc(new_node, ent);
1736 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1738 set_ia32_op_type(new_node, ia32_AddrModeS);
1739 set_ia32_ls_mode(new_node, mode);
1741 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1742 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1745 ir_node *xor, *pval, *sign_extension;
1747 if (get_mode_size_bits(mode) == 32) {
1748 new_op = be_transform_node(op);
1750 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1753 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1754 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1757 add_irn_dep(pval, get_irg_frame(irg));
1758 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1760 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1761 nomem, new_op, sign_extension);
1762 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1764 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1765 nomem, xor, sign_extension);
1766 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1773 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1775 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n) {
1776 dbg_info *dbgi = get_irn_dbg_info(cmp);
1777 ir_node *block = get_nodes_block(cmp);
1778 ir_node *new_block = be_transform_node(block);
1779 ir_node *op1 = be_transform_node(x);
1780 ir_node *op2 = be_transform_node(n);
1782 return new_rd_ia32_Bt(dbgi, current_ir_graph, new_block, op1, op2);
1786 * Transform a node returning a "flag" result.
1788 * @param node the node to transform
1789 * @param pnc_out the compare mode to use
1791 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1800 /* we have a Cmp as input */
1801 if (is_Proj(node)) {
1802 ir_node *pred = get_Proj_pred(node);
1804 pn_Cmp pnc = get_Proj_proj(node);
1805 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1806 ir_node *l = get_Cmp_left(pred);
1807 ir_node *r = get_Cmp_right(pred);
1809 ir_node *la = get_And_left(l);
1810 ir_node *ra = get_And_right(l);
1812 ir_node *c = get_Shl_left(la);
1813 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1814 /* (1 << n) & ra) */
1815 ir_node *n = get_Shl_right(la);
1816 flags = gen_bt(pred, ra, n);
1817 /* we must generate a Jc/Jnc jump */
1818 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1821 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1826 ir_node *c = get_Shl_left(ra);
1827 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1828 /* la & (1 << n)) */
1829 ir_node *n = get_Shl_right(ra);
1830 flags = gen_bt(pred, la, n);
1831 /* we must generate a Jc/Jnc jump */
1832 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1835 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1841 flags = be_transform_node(pred);
1847 /* a mode_b value, we have to compare it against 0 */
1848 dbgi = get_irn_dbg_info(node);
1849 new_block = be_transform_node(get_nodes_block(node));
1850 new_op = be_transform_node(node);
1851 noreg = ia32_new_NoReg_gp(env_cg);
1852 nomem = new_NoMem();
1853 flags = new_rd_ia32_Test(dbgi, current_ir_graph, new_block, noreg, noreg, nomem,
1854 new_op, new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1855 *pnc_out = pn_Cmp_Lg;
1860 * Transforms a Load.
1862 * @return the created ia32 Load node
1864 static ir_node *gen_Load(ir_node *node) {
1865 ir_node *old_block = get_nodes_block(node);
1866 ir_node *block = be_transform_node(old_block);
1867 ir_node *ptr = get_Load_ptr(node);
1868 ir_node *mem = get_Load_mem(node);
1869 ir_node *new_mem = be_transform_node(mem);
1872 ir_graph *irg = current_ir_graph;
1873 dbg_info *dbgi = get_irn_dbg_info(node);
1874 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1875 ir_mode *mode = get_Load_mode(node);
1878 ia32_address_t addr;
1880 /* construct load address */
1881 memset(&addr, 0, sizeof(addr));
1882 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1889 base = be_transform_node(base);
1895 index = be_transform_node(index);
1898 if (mode_is_float(mode)) {
1899 if (ia32_cg_config.use_sse2) {
1900 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1902 res_mode = mode_xmm;
1904 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1906 res_mode = mode_vfp;
1909 assert(mode != mode_b);
1911 /* create a conv node with address mode for smaller modes */
1912 if(get_mode_size_bits(mode) < 32) {
1913 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1914 new_mem, noreg, mode);
1916 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1921 set_irn_pinned(new_node, get_irn_pinned(node));
1922 set_ia32_op_type(new_node, ia32_AddrModeS);
1923 set_ia32_ls_mode(new_node, mode);
1924 set_address(new_node, &addr);
1926 if(get_irn_pinned(node) == op_pin_state_floats) {
1927 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1930 /* make sure we are scheduled behind the initial IncSP/Barrier
1931 * to avoid spills being placed before it
1933 if (block == get_irg_start_block(irg)) {
1934 add_irn_dep(new_node, get_irg_frame(irg));
1937 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1942 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1943 ir_node *ptr, ir_node *other)
1950 /* we only use address mode if we're the only user of the load */
1951 if (get_irn_n_edges(node) > 1)
1954 load = get_Proj_pred(node);
1957 if (get_nodes_block(load) != block)
1960 /* store should have the same pointer as the load */
1961 if (get_Load_ptr(load) != ptr)
1964 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1965 if (other != NULL &&
1966 get_nodes_block(other) == block &&
1967 heights_reachable_in_block(heights, other, load)) {
1974 for (i = get_Sync_n_preds(mem) - 1; i >= 0; --i) {
1975 ir_node *const pred = get_Sync_pred(mem, i);
1977 if (is_Proj(pred) && get_Proj_pred(pred) == load)
1980 if (get_nodes_block(pred) == block &&
1981 heights_reachable_in_block(heights, pred, load)) {
1986 /* Store should be attached to the load */
1987 if (!is_Proj(mem) || get_Proj_pred(mem) != load)
1994 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1995 ir_node *mem, ir_node *ptr, ir_mode *mode,
1996 construct_binop_dest_func *func,
1997 construct_binop_dest_func *func8bit,
1998 match_flags_t flags)
2000 ir_node *src_block = get_nodes_block(node);
2002 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
2003 ir_graph *irg = current_ir_graph;
2010 ia32_address_mode_t am;
2011 ia32_address_t *addr = &am.addr;
2012 memset(&am, 0, sizeof(am));
2014 assert(flags & match_dest_am);
2015 assert(flags & match_immediate); /* there is no destam node without... */
2016 commutative = (flags & match_commutative) != 0;
2018 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
2019 build_address(&am, op1);
2020 new_op = create_immediate_or_transform(op2, 0);
2021 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2022 build_address(&am, op2);
2023 new_op = create_immediate_or_transform(op1, 0);
2028 if(addr->base == NULL)
2029 addr->base = noreg_gp;
2030 if(addr->index == NULL)
2031 addr->index = noreg_gp;
2032 if(addr->mem == NULL)
2033 addr->mem = new_NoMem();
2035 dbgi = get_irn_dbg_info(node);
2036 block = be_transform_node(src_block);
2037 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2039 if(get_mode_size_bits(mode) == 8) {
2040 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2043 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem,
2046 set_address(new_node, addr);
2047 set_ia32_op_type(new_node, ia32_AddrModeD);
2048 set_ia32_ls_mode(new_node, mode);
2049 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2051 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
2052 mem_proj = be_transform_node(am.mem_proj);
2053 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2058 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2059 ir_node *ptr, ir_mode *mode,
2060 construct_unop_dest_func *func)
2062 ir_graph *irg = current_ir_graph;
2063 ir_node *src_block = get_nodes_block(node);
2069 ia32_address_mode_t am;
2070 ia32_address_t *addr = &am.addr;
2071 memset(&am, 0, sizeof(am));
2073 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2076 build_address(&am, op);
2078 dbgi = get_irn_dbg_info(node);
2079 block = be_transform_node(src_block);
2080 new_mem = transform_AM_mem(irg, block, am.am_node, mem, addr->mem);
2081 new_node = func(dbgi, irg, block, addr->base, addr->index, new_mem);
2082 set_address(new_node, addr);
2083 set_ia32_op_type(new_node, ia32_AddrModeD);
2084 set_ia32_ls_mode(new_node, mode);
2085 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2087 set_transformed_and_mark(get_Proj_pred(am.mem_proj), new_node);
2088 mem_proj = be_transform_node(am.mem_proj);
2089 set_transformed_and_mark(mem_proj ? mem_proj : am.mem_proj, new_node);
2094 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2095 ir_mode *mode = get_irn_mode(node);
2096 ir_node *mux_true = get_Mux_true(node);
2097 ir_node *mux_false = get_Mux_false(node);
2108 ia32_address_t addr;
2110 if(get_mode_size_bits(mode) != 8)
2113 if(is_Const_1(mux_true) && is_Const_0(mux_false)) {
2115 } else if(is_Const_0(mux_true) && is_Const_1(mux_false)) {
2121 build_address_ptr(&addr, ptr, mem);
2123 irg = current_ir_graph;
2124 dbgi = get_irn_dbg_info(node);
2125 block = get_nodes_block(node);
2126 new_block = be_transform_node(block);
2127 cond = get_Mux_sel(node);
2128 flags = get_flags_node(cond, &pnc);
2129 new_mem = be_transform_node(mem);
2130 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2131 addr.index, addr.mem, flags, pnc, negated);
2132 set_address(new_node, &addr);
2133 set_ia32_op_type(new_node, ia32_AddrModeD);
2134 set_ia32_ls_mode(new_node, mode);
2135 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2140 static ir_node *try_create_dest_am(ir_node *node) {
2141 ir_node *val = get_Store_value(node);
2142 ir_node *mem = get_Store_mem(node);
2143 ir_node *ptr = get_Store_ptr(node);
2144 ir_mode *mode = get_irn_mode(val);
2145 unsigned bits = get_mode_size_bits(mode);
2150 /* handle only GP modes for now... */
2151 if(!ia32_mode_needs_gp_reg(mode))
2155 /* store must be the only user of the val node */
2156 if(get_irn_n_edges(val) > 1)
2158 /* skip pointless convs */
2160 ir_node *conv_op = get_Conv_op(val);
2161 ir_mode *pred_mode = get_irn_mode(conv_op);
2162 if (!ia32_mode_needs_gp_reg(pred_mode))
2164 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2172 /* value must be in the same block */
2173 if(get_nodes_block(node) != get_nodes_block(val))
2176 switch (get_irn_opcode(val)) {
2178 op1 = get_Add_left(val);
2179 op2 = get_Add_right(val);
2180 if(is_Const_1(op2)) {
2181 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2182 new_rd_ia32_IncMem);
2184 } else if(is_Const_Minus_1(op2)) {
2185 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2186 new_rd_ia32_DecMem);
2189 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2190 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2191 match_dest_am | match_commutative |
2195 op1 = get_Sub_left(val);
2196 op2 = get_Sub_right(val);
2197 if (is_Const(op2)) {
2198 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2200 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2201 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2202 match_dest_am | match_immediate |
2206 op1 = get_And_left(val);
2207 op2 = get_And_right(val);
2208 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2209 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2210 match_dest_am | match_commutative |
2214 op1 = get_Or_left(val);
2215 op2 = get_Or_right(val);
2216 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2217 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2218 match_dest_am | match_commutative |
2222 op1 = get_Eor_left(val);
2223 op2 = get_Eor_right(val);
2224 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2225 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2226 match_dest_am | match_commutative |
2230 op1 = get_Shl_left(val);
2231 op2 = get_Shl_right(val);
2232 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2233 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2234 match_dest_am | match_immediate);
2237 op1 = get_Shr_left(val);
2238 op2 = get_Shr_right(val);
2239 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2240 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2241 match_dest_am | match_immediate);
2244 op1 = get_Shrs_left(val);
2245 op2 = get_Shrs_right(val);
2246 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2247 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2248 match_dest_am | match_immediate);
2251 op1 = get_Rotl_left(val);
2252 op2 = get_Rotl_right(val);
2253 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2254 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2255 match_dest_am | match_immediate);
2257 /* TODO: match ROR patterns... */
2259 new_node = try_create_SetMem(val, ptr, mem);
2262 op1 = get_Minus_op(val);
2263 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2266 /* should be lowered already */
2267 assert(mode != mode_b);
2268 op1 = get_Not_op(val);
2269 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2275 if(new_node != NULL) {
2276 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2277 get_irn_pinned(node) == op_pin_state_pinned) {
2278 set_irn_pinned(new_node, op_pin_state_pinned);
2285 static int is_float_to_int32_conv(const ir_node *node)
2287 ir_mode *mode = get_irn_mode(node);
2291 if(get_mode_size_bits(mode) != 32 || !ia32_mode_needs_gp_reg(mode))
2293 /* don't report unsigned as conv to 32bit, because we really need to do
2294 * a vfist with 64bit signed in this case */
2295 if(!mode_is_signed(mode))
2300 conv_op = get_Conv_op(node);
2301 conv_mode = get_irn_mode(conv_op);
2303 if(!mode_is_float(conv_mode))
2310 * Transform a Store(floatConst).
2312 * @return the created ia32 Store node
2314 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2316 ir_mode *mode = get_irn_mode(cns);
2317 unsigned size = get_mode_size_bytes(mode);
2318 tarval *tv = get_Const_tarval(cns);
2319 ir_node *block = get_nodes_block(node);
2320 ir_node *new_block = be_transform_node(block);
2321 ir_node *ptr = get_Store_ptr(node);
2322 ir_node *mem = get_Store_mem(node);
2323 ir_graph *irg = current_ir_graph;
2324 dbg_info *dbgi = get_irn_dbg_info(node);
2328 ia32_address_t addr;
2330 assert(size % 4 == 0);
2333 build_address_ptr(&addr, ptr, mem);
2337 get_tarval_sub_bits(tv, ofs) |
2338 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2339 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2340 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2341 ir_node *imm = create_Immediate(NULL, 0, val);
2343 ir_node *new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2344 addr.index, addr.mem, imm);
2346 set_irn_pinned(new_node, get_irn_pinned(node));
2347 set_ia32_op_type(new_node, ia32_AddrModeD);
2348 set_ia32_ls_mode(new_node, mode_Iu);
2349 set_address(new_node, &addr);
2350 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2352 ins[i++] = new_node;
2357 } while (size != 0);
2359 return i == 1 ? ins[0] : new_rd_Sync(dbgi, irg, new_block, i, ins);
2363 * Generate a vfist or vfisttp instruction.
2365 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2366 ir_node *mem, ir_node *val, ir_node **fist)
2370 if (ia32_cg_config.use_fisttp) {
2371 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2372 if other users exists */
2373 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2374 ir_node *vfisttp = new_rd_ia32_vfisttp(dbgi, irg, block, base, index, mem, val);
2375 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2376 be_new_Keep(reg_class, irg, block, 1, &value);
2378 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2381 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2384 new_node = new_rd_ia32_vfist(dbgi, irg, block, base, index, mem, val, trunc_mode);
2390 * Transforms a normal Store.
2392 * @return the created ia32 Store node
2394 static ir_node *gen_normal_Store(ir_node *node)
2396 ir_node *val = get_Store_value(node);
2397 ir_mode *mode = get_irn_mode(val);
2398 ir_node *block = get_nodes_block(node);
2399 ir_node *new_block = be_transform_node(block);
2400 ir_node *ptr = get_Store_ptr(node);
2401 ir_node *mem = get_Store_mem(node);
2402 ir_graph *irg = current_ir_graph;
2403 dbg_info *dbgi = get_irn_dbg_info(node);
2404 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2405 ir_node *new_val, *new_node, *store;
2406 ia32_address_t addr;
2408 /* check for destination address mode */
2409 new_node = try_create_dest_am(node);
2410 if (new_node != NULL)
2413 /* construct store address */
2414 memset(&addr, 0, sizeof(addr));
2415 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2417 if (addr.base == NULL) {
2420 addr.base = be_transform_node(addr.base);
2423 if (addr.index == NULL) {
2426 addr.index = be_transform_node(addr.index);
2428 addr.mem = be_transform_node(mem);
2430 if (mode_is_float(mode)) {
2431 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2433 while (is_Conv(val) && mode == get_irn_mode(val)) {
2434 ir_node *op = get_Conv_op(val);
2435 if (!mode_is_float(get_irn_mode(op)))
2439 new_val = be_transform_node(val);
2440 if (ia32_cg_config.use_sse2) {
2441 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2442 addr.index, addr.mem, new_val);
2444 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2445 addr.index, addr.mem, new_val, mode);
2448 } else if (!ia32_cg_config.use_sse2 && is_float_to_int32_conv(val)) {
2449 val = get_Conv_op(val);
2451 /* TODO: is this optimisation still necessary at all (middleend)? */
2452 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2453 while (is_Conv(val)) {
2454 ir_node *op = get_Conv_op(val);
2455 if (!mode_is_float(get_irn_mode(op)))
2457 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2461 new_val = be_transform_node(val);
2462 new_node = gen_vfist(dbgi, irg, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2464 new_val = create_immediate_or_transform(val, 0);
2465 assert(mode != mode_b);
2467 if (get_mode_size_bits(mode) == 8) {
2468 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2469 addr.index, addr.mem, new_val);
2471 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2472 addr.index, addr.mem, new_val);
2477 set_irn_pinned(store, get_irn_pinned(node));
2478 set_ia32_op_type(store, ia32_AddrModeD);
2479 set_ia32_ls_mode(store, mode);
2481 set_address(store, &addr);
2482 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2488 * Transforms a Store.
2490 * @return the created ia32 Store node
2492 static ir_node *gen_Store(ir_node *node)
2494 ir_node *val = get_Store_value(node);
2495 ir_mode *mode = get_irn_mode(val);
2497 if (mode_is_float(mode) && is_Const(val)) {
2500 /* we are storing a floating point constant */
2501 if (ia32_cg_config.use_sse2) {
2502 transform = !is_simple_sse_Const(val);
2504 transform = !is_simple_x87_Const(val);
2507 return gen_float_const_Store(node, val);
2509 return gen_normal_Store(node);
2513 * Transforms a Switch.
2515 * @return the created ia32 SwitchJmp node
2517 static ir_node *create_Switch(ir_node *node)
2519 ir_graph *irg = current_ir_graph;
2520 dbg_info *dbgi = get_irn_dbg_info(node);
2521 ir_node *block = be_transform_node(get_nodes_block(node));
2522 ir_node *sel = get_Cond_selector(node);
2523 ir_node *new_sel = be_transform_node(sel);
2524 int switch_min = INT_MAX;
2525 int switch_max = INT_MIN;
2526 long default_pn = get_Cond_defaultProj(node);
2528 const ir_edge_t *edge;
2530 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2532 /* determine the smallest switch case value */
2533 foreach_out_edge(node, edge) {
2534 ir_node *proj = get_edge_src_irn(edge);
2535 long pn = get_Proj_proj(proj);
2536 if(pn == default_pn)
2545 if((unsigned) (switch_max - switch_min) > 256000) {
2546 panic("Size of switch %+F bigger than 256000", node);
2549 if (switch_min != 0) {
2550 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2552 /* if smallest switch case is not 0 we need an additional sub */
2553 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2554 add_ia32_am_offs_int(new_sel, -switch_min);
2555 set_ia32_op_type(new_sel, ia32_AddrModeS);
2557 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2560 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2561 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2567 * Transform a Cond node.
2569 static ir_node *gen_Cond(ir_node *node) {
2570 ir_node *block = get_nodes_block(node);
2571 ir_node *new_block = be_transform_node(block);
2572 ir_graph *irg = current_ir_graph;
2573 dbg_info *dbgi = get_irn_dbg_info(node);
2574 ir_node *sel = get_Cond_selector(node);
2575 ir_mode *sel_mode = get_irn_mode(sel);
2576 ir_node *flags = NULL;
2580 if (sel_mode != mode_b) {
2581 return create_Switch(node);
2584 /* we get flags from a Cmp */
2585 flags = get_flags_node(sel, &pnc);
2587 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2588 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2593 static ir_node *gen_be_Copy(ir_node *node)
2595 ir_node *new_node = be_duplicate_node(node);
2596 ir_mode *mode = get_irn_mode(new_node);
2598 if (ia32_mode_needs_gp_reg(mode)) {
2599 set_irn_mode(new_node, mode_Iu);
2605 static ir_node *create_Fucom(ir_node *node)
2607 ir_graph *irg = current_ir_graph;
2608 dbg_info *dbgi = get_irn_dbg_info(node);
2609 ir_node *block = get_nodes_block(node);
2610 ir_node *new_block = be_transform_node(block);
2611 ir_node *left = get_Cmp_left(node);
2612 ir_node *new_left = be_transform_node(left);
2613 ir_node *right = get_Cmp_right(node);
2617 if(ia32_cg_config.use_fucomi) {
2618 new_right = be_transform_node(right);
2619 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2621 set_ia32_commutative(new_node);
2622 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2624 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2625 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2628 new_right = be_transform_node(right);
2629 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2633 set_ia32_commutative(new_node);
2635 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2637 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2638 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2644 static ir_node *create_Ucomi(ir_node *node)
2646 ir_graph *irg = current_ir_graph;
2647 dbg_info *dbgi = get_irn_dbg_info(node);
2648 ir_node *src_block = get_nodes_block(node);
2649 ir_node *new_block = be_transform_node(src_block);
2650 ir_node *left = get_Cmp_left(node);
2651 ir_node *right = get_Cmp_right(node);
2653 ia32_address_mode_t am;
2654 ia32_address_t *addr = &am.addr;
2656 match_arguments(&am, src_block, left, right, NULL,
2657 match_commutative | match_am);
2659 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2660 addr->mem, am.new_op1, am.new_op2,
2662 set_am_attributes(new_node, &am);
2664 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2666 new_node = fix_mem_proj(new_node, &am);
2672 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2673 * to fold an and into a test node
2675 static bool can_fold_test_and(ir_node *node)
2677 const ir_edge_t *edge;
2679 /** we can only have eq and lg projs */
2680 foreach_out_edge(node, edge) {
2681 ir_node *proj = get_edge_src_irn(edge);
2682 pn_Cmp pnc = get_Proj_proj(proj);
2683 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2691 * returns true if it is assured, that the upper bits of a node are "clean"
2692 * which means for a 16 or 8 bit value, that the upper bits in the register
2693 * are 0 for unsigned and a copy of the last significant bit for unsigned
2696 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2698 assert(ia32_mode_needs_gp_reg(mode));
2699 if (get_mode_size_bits(mode) >= 32)
2702 if (is_Proj(transformed_node))
2703 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2705 if (is_ia32_Conv_I2I(transformed_node)
2706 || is_ia32_Conv_I2I8Bit(transformed_node)) {
2707 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2708 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2710 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2716 if (is_ia32_Shr(transformed_node) && !mode_is_signed(mode)) {
2717 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2718 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2719 const ia32_immediate_attr_t *attr
2720 = get_ia32_immediate_attr_const(right);
2721 if (attr->symconst == 0
2722 && (unsigned) attr->offset >= (32 - get_mode_size_bits(mode))) {
2726 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2729 if (is_ia32_And(transformed_node) && !mode_is_signed(mode)) {
2730 ir_node *right = get_irn_n(transformed_node, n_ia32_And_right);
2731 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2732 const ia32_immediate_attr_t *attr
2733 = get_ia32_immediate_attr_const(right);
2734 if (attr->symconst == 0
2735 && (unsigned) attr->offset
2736 <= (0xffffffff >> (32 - get_mode_size_bits(mode)))) {
2743 /* TODO recurse on Or, Xor, ... if appropriate? */
2745 if (is_ia32_Immediate(transformed_node)
2746 || is_ia32_Const(transformed_node)) {
2747 const ia32_immediate_attr_t *attr
2748 = get_ia32_immediate_attr_const(transformed_node);
2749 if (mode_is_signed(mode)) {
2750 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2751 if (shifted == 0 || shifted == -1)
2754 unsigned long shifted = (unsigned long) attr->offset;
2755 shifted >>= get_mode_size_bits(mode);
2765 * Generate code for a Cmp.
2767 static ir_node *gen_Cmp(ir_node *node)
2769 ir_graph *irg = current_ir_graph;
2770 dbg_info *dbgi = get_irn_dbg_info(node);
2771 ir_node *block = get_nodes_block(node);
2772 ir_node *new_block = be_transform_node(block);
2773 ir_node *left = get_Cmp_left(node);
2774 ir_node *right = get_Cmp_right(node);
2775 ir_mode *cmp_mode = get_irn_mode(left);
2777 ia32_address_mode_t am;
2778 ia32_address_t *addr = &am.addr;
2781 if(mode_is_float(cmp_mode)) {
2782 if (ia32_cg_config.use_sse2) {
2783 return create_Ucomi(node);
2785 return create_Fucom(node);
2789 assert(ia32_mode_needs_gp_reg(cmp_mode));
2791 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2792 cmp_unsigned = !mode_is_signed(cmp_mode);
2793 if (is_Const_0(right) &&
2795 get_irn_n_edges(left) == 1 &&
2796 can_fold_test_and(node)) {
2797 /* Test(and_left, and_right) */
2798 ir_node *and_left = get_And_left(left);
2799 ir_node *and_right = get_And_right(left);
2801 /* matze: code here used mode instead of cmd_mode, I think it is always
2802 * the same as cmp_mode, but I leave this here to see if this is really
2805 assert(get_irn_mode(and_left) == cmp_mode);
2807 match_arguments(&am, block, and_left, and_right, NULL,
2809 match_am | match_8bit_am | match_16bit_am |
2810 match_am_and_immediates | match_immediate |
2811 match_8bit | match_16bit);
2813 /* use 32bit compare mode if possible since the opcode is smaller */
2814 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2815 upper_bits_clean(am.new_op2, cmp_mode)) {
2816 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2819 if (get_mode_size_bits(cmp_mode) == 8) {
2820 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2821 addr->index, addr->mem, am.new_op1,
2822 am.new_op2, am.ins_permuted,
2825 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2826 addr->index, addr->mem, am.new_op1,
2827 am.new_op2, am.ins_permuted,
2831 /* Cmp(left, right) */
2832 match_arguments(&am, block, left, right, NULL,
2833 match_commutative | match_am | match_8bit_am |
2834 match_16bit_am | match_am_and_immediates |
2835 match_immediate | match_8bit | match_16bit);
2836 /* use 32bit compare mode if possible since the opcode is smaller */
2837 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2838 upper_bits_clean(am.new_op2, cmp_mode)) {
2839 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2842 if (get_mode_size_bits(cmp_mode) == 8) {
2843 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2844 addr->index, addr->mem, am.new_op1,
2845 am.new_op2, am.ins_permuted,
2848 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2849 addr->index, addr->mem, am.new_op1,
2850 am.new_op2, am.ins_permuted, cmp_unsigned);
2853 set_am_attributes(new_node, &am);
2854 set_ia32_ls_mode(new_node, cmp_mode);
2856 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2858 new_node = fix_mem_proj(new_node, &am);
2863 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2866 ir_graph *irg = current_ir_graph;
2867 dbg_info *dbgi = get_irn_dbg_info(node);
2868 ir_node *block = get_nodes_block(node);
2869 ir_node *new_block = be_transform_node(block);
2870 ir_node *val_true = get_Mux_true(node);
2871 ir_node *val_false = get_Mux_false(node);
2873 match_flags_t match_flags;
2874 ia32_address_mode_t am;
2875 ia32_address_t *addr;
2877 assert(ia32_cg_config.use_cmov);
2878 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2882 match_flags = match_commutative | match_am | match_16bit_am |
2885 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2887 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2888 addr->mem, am.new_op1, am.new_op2, new_flags,
2889 am.ins_permuted, pnc);
2890 set_am_attributes(new_node, &am);
2892 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2894 new_node = fix_mem_proj(new_node, &am);
2900 * Creates a ia32 Setcc instruction.
2902 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2903 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2906 ir_graph *irg = current_ir_graph;
2907 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2908 ir_node *nomem = new_NoMem();
2909 ir_mode *mode = get_irn_mode(orig_node);
2912 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2913 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2915 /* we might need to conv the result up */
2916 if (get_mode_size_bits(mode) > 8) {
2917 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2918 nomem, new_node, mode_Bu);
2919 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2926 * Create instruction for an unsigned Difference or Zero.
2928 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b) {
2929 ir_graph *irg = current_ir_graph;
2930 ir_mode *mode = get_irn_mode(psi);
2931 ir_node *new_node, *sub, *sbb, *eflags, *block, *noreg, *tmpreg, *nomem;
2934 new_node = gen_binop(psi, a, b, new_rd_ia32_Sub,
2935 match_mode_neutral | match_am | match_immediate | match_two_users);
2937 block = get_nodes_block(new_node);
2939 if (is_Proj(new_node)) {
2940 sub = get_Proj_pred(new_node);
2941 assert(is_ia32_Sub(sub));
2944 set_irn_mode(sub, mode_T);
2945 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
2947 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
2949 dbgi = get_irn_dbg_info(psi);
2950 noreg = ia32_new_NoReg_gp(env_cg);
2951 tmpreg = new_rd_ia32_ProduceVal(dbgi, irg, block);
2952 nomem = new_NoMem();
2953 sbb = new_rd_ia32_Sbb(dbgi, irg, block, noreg, noreg, nomem, tmpreg, tmpreg, eflags);
2955 new_node = new_rd_ia32_And(dbgi, irg, block, noreg, noreg, nomem, new_node, sbb);
2956 set_ia32_commutative(new_node);
2961 * Transforms a Mux node into CMov.
2963 * @return The transformed node.
2965 static ir_node *gen_Mux(ir_node *node)
2967 dbg_info *dbgi = get_irn_dbg_info(node);
2968 ir_node *block = get_nodes_block(node);
2969 ir_node *new_block = be_transform_node(block);
2970 ir_node *mux_true = get_Mux_true(node);
2971 ir_node *mux_false = get_Mux_false(node);
2972 ir_node *cond = get_Mux_sel(node);
2973 ir_mode *mode = get_irn_mode(node);
2976 assert(get_irn_mode(cond) == mode_b);
2978 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
2979 if (mode_is_float(mode)) {
2980 ir_node *cmp = get_Proj_pred(cond);
2981 ir_node *cmp_left = get_Cmp_left(cmp);
2982 ir_node *cmp_right = get_Cmp_right(cmp);
2983 pn_Cmp pnc = get_Proj_proj(cond);
2985 if (ia32_cg_config.use_sse2) {
2986 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
2987 if (cmp_left == mux_true && cmp_right == mux_false) {
2988 /* Mux(a <= b, a, b) => MIN */
2989 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
2990 match_commutative | match_am | match_two_users);
2991 } else if (cmp_left == mux_false && cmp_right == mux_true) {
2992 /* Mux(a <= b, b, a) => MAX */
2993 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
2994 match_commutative | match_am | match_two_users);
2996 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
2997 if (cmp_left == mux_true && cmp_right == mux_false) {
2998 /* Mux(a >= b, a, b) => MAX */
2999 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMax,
3000 match_commutative | match_am | match_two_users);
3001 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3002 /* Mux(a >= b, b, a) => MIN */
3003 return gen_binop(node, cmp_left, cmp_right, new_rd_ia32_xMin,
3004 match_commutative | match_am | match_two_users);
3008 panic("cannot transform floating point Mux");
3014 assert(ia32_mode_needs_gp_reg(mode));
3016 if (is_Proj(cond)) {
3017 ir_node *cmp = get_Proj_pred(cond);
3019 ir_node *cmp_left = get_Cmp_left(cmp);
3020 ir_node *cmp_right = get_Cmp_right(cmp);
3021 pn_Cmp pnc = get_Proj_proj(cond);
3023 /* check for unsigned Doz first */
3024 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3025 is_Const_0(mux_false) && is_Sub(mux_true) &&
3026 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3027 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3028 return create_Doz(node, cmp_left, cmp_right);
3029 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3030 is_Const_0(mux_true) && is_Sub(mux_false) &&
3031 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3032 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3033 return create_Doz(node, cmp_left, cmp_right);
3038 flags = get_flags_node(cond, &pnc);
3040 if (is_Const(mux_true) && is_Const(mux_false)) {
3041 /* both are const, good */
3042 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3043 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3044 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3045 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3047 /* Not that simple. */
3052 new_node = create_CMov(node, cond, flags, pnc);
3060 * Create a conversion from x87 state register to general purpose.
3062 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
3063 ir_node *block = be_transform_node(get_nodes_block(node));
3064 ir_node *op = get_Conv_op(node);
3065 ir_node *new_op = be_transform_node(op);
3066 ia32_code_gen_t *cg = env_cg;
3067 ir_graph *irg = current_ir_graph;
3068 dbg_info *dbgi = get_irn_dbg_info(node);
3069 ir_node *noreg = ia32_new_NoReg_gp(cg);
3070 ir_mode *mode = get_irn_mode(node);
3071 ir_node *fist, *load, *mem;
3073 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, new_NoMem(), new_op, &fist);
3074 set_irn_pinned(fist, op_pin_state_floats);
3075 set_ia32_use_frame(fist);
3076 set_ia32_op_type(fist, ia32_AddrModeD);
3078 assert(get_mode_size_bits(mode) <= 32);
3079 /* exception we can only store signed 32 bit integers, so for unsigned
3080 we store a 64bit (signed) integer and load the lower bits */
3081 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3082 set_ia32_ls_mode(fist, mode_Ls);
3084 set_ia32_ls_mode(fist, mode_Is);
3086 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
3089 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, mem);
3091 set_irn_pinned(load, op_pin_state_floats);
3092 set_ia32_use_frame(load);
3093 set_ia32_op_type(load, ia32_AddrModeS);
3094 set_ia32_ls_mode(load, mode_Is);
3095 if(get_ia32_ls_mode(fist) == mode_Ls) {
3096 ia32_attr_t *attr = get_ia32_attr(load);
3097 attr->data.need_64bit_stackent = 1;
3099 ia32_attr_t *attr = get_ia32_attr(load);
3100 attr->data.need_32bit_stackent = 1;
3102 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
3104 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3108 * Creates a x87 strict Conv by placing a Store and a Load
3110 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3112 ir_node *block = get_nodes_block(node);
3113 ir_graph *irg = current_ir_graph;
3114 dbg_info *dbgi = get_irn_dbg_info(node);
3115 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3116 ir_node *nomem = new_NoMem();
3117 ir_node *frame = get_irg_frame(irg);
3118 ir_node *store, *load;
3121 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
3123 set_ia32_use_frame(store);
3124 set_ia32_op_type(store, ia32_AddrModeD);
3125 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
3127 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
3129 set_ia32_use_frame(load);
3130 set_ia32_op_type(load, ia32_AddrModeS);
3131 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3133 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3138 * Create a conversion from general purpose to x87 register
3140 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
3141 ir_node *src_block = get_nodes_block(node);
3142 ir_node *block = be_transform_node(src_block);
3143 ir_graph *irg = current_ir_graph;
3144 dbg_info *dbgi = get_irn_dbg_info(node);
3145 ir_node *op = get_Conv_op(node);
3146 ir_node *new_op = NULL;
3150 ir_mode *store_mode;
3156 /* fild can use source AM if the operand is a signed 32bit integer */
3157 if (src_mode == mode_Is) {
3158 ia32_address_mode_t am;
3160 match_arguments(&am, src_block, NULL, op, NULL,
3161 match_am | match_try_am);
3162 if (am.op_type == ia32_AddrModeS) {
3163 ia32_address_t *addr = &am.addr;
3165 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
3166 addr->index, addr->mem);
3167 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3170 set_am_attributes(fild, &am);
3171 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3173 fix_mem_proj(fild, &am);
3178 if(new_op == NULL) {
3179 new_op = be_transform_node(op);
3182 noreg = ia32_new_NoReg_gp(env_cg);
3183 nomem = new_NoMem();
3184 mode = get_irn_mode(op);
3186 /* first convert to 32 bit signed if necessary */
3187 src_bits = get_mode_size_bits(src_mode);
3188 if (src_bits == 8) {
3189 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
3191 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3193 } else if (src_bits < 32) {
3194 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
3196 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3200 assert(get_mode_size_bits(mode) == 32);
3203 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
3206 set_ia32_use_frame(store);
3207 set_ia32_op_type(store, ia32_AddrModeD);
3208 set_ia32_ls_mode(store, mode_Iu);
3210 /* exception for 32bit unsigned, do a 64bit spill+load */
3211 if(!mode_is_signed(mode)) {
3214 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3216 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
3217 get_irg_frame(irg), noreg, nomem,
3220 set_ia32_use_frame(zero_store);
3221 set_ia32_op_type(zero_store, ia32_AddrModeD);
3222 add_ia32_am_offs_int(zero_store, 4);
3223 set_ia32_ls_mode(zero_store, mode_Iu);
3228 store = new_rd_Sync(dbgi, irg, block, 2, in);
3229 store_mode = mode_Ls;
3231 store_mode = mode_Is;
3235 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3237 set_ia32_use_frame(fild);
3238 set_ia32_op_type(fild, ia32_AddrModeS);
3239 set_ia32_ls_mode(fild, store_mode);
3241 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3247 * Create a conversion from one integer mode into another one
3249 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3250 dbg_info *dbgi, ir_node *block, ir_node *op,
3253 ir_graph *irg = current_ir_graph;
3254 int src_bits = get_mode_size_bits(src_mode);
3255 int tgt_bits = get_mode_size_bits(tgt_mode);
3256 ir_node *new_block = be_transform_node(block);
3258 ir_mode *smaller_mode;
3260 ia32_address_mode_t am;
3261 ia32_address_t *addr = &am.addr;
3264 if (src_bits < tgt_bits) {
3265 smaller_mode = src_mode;
3266 smaller_bits = src_bits;
3268 smaller_mode = tgt_mode;
3269 smaller_bits = tgt_bits;
3272 #ifdef DEBUG_libfirm
3274 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3279 match_arguments(&am, block, NULL, op, NULL,
3280 match_8bit | match_16bit |
3281 match_am | match_8bit_am | match_16bit_am);
3283 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3284 /* unnecessary conv. in theory it shouldn't have been AM */
3285 assert(is_ia32_NoReg_GP(addr->base));
3286 assert(is_ia32_NoReg_GP(addr->index));
3287 assert(is_NoMem(addr->mem));
3288 assert(am.addr.offset == 0);
3289 assert(am.addr.symconst_ent == NULL);
3293 if (smaller_bits == 8) {
3294 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3295 addr->index, addr->mem, am.new_op2,
3298 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3299 addr->index, addr->mem, am.new_op2,
3302 set_am_attributes(new_node, &am);
3303 /* match_arguments assume that out-mode = in-mode, this isn't true here
3305 set_ia32_ls_mode(new_node, smaller_mode);
3306 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3307 new_node = fix_mem_proj(new_node, &am);
3312 * Transforms a Conv node.
3314 * @return The created ia32 Conv node
3316 static ir_node *gen_Conv(ir_node *node) {
3317 ir_node *block = get_nodes_block(node);
3318 ir_node *new_block = be_transform_node(block);
3319 ir_node *op = get_Conv_op(node);
3320 ir_node *new_op = NULL;
3321 ir_graph *irg = current_ir_graph;
3322 dbg_info *dbgi = get_irn_dbg_info(node);
3323 ir_mode *src_mode = get_irn_mode(op);
3324 ir_mode *tgt_mode = get_irn_mode(node);
3325 int src_bits = get_mode_size_bits(src_mode);
3326 int tgt_bits = get_mode_size_bits(tgt_mode);
3327 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3328 ir_node *nomem = new_rd_NoMem(irg);
3329 ir_node *res = NULL;
3331 if (src_mode == mode_b) {
3332 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3333 /* nothing to do, we already model bools as 0/1 ints */
3334 return be_transform_node(op);
3337 if (src_mode == tgt_mode) {
3338 if (get_Conv_strict(node)) {
3339 if (ia32_cg_config.use_sse2) {
3340 /* when we are in SSE mode, we can kill all strict no-op conversion */
3341 return be_transform_node(op);
3344 /* this should be optimized already, but who knows... */
3345 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3346 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3347 return be_transform_node(op);
3351 if (mode_is_float(src_mode)) {
3352 new_op = be_transform_node(op);
3353 /* we convert from float ... */
3354 if (mode_is_float(tgt_mode)) {
3355 if(src_mode == mode_E && tgt_mode == mode_D
3356 && !get_Conv_strict(node)) {
3357 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3362 if (ia32_cg_config.use_sse2) {
3363 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3364 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3366 set_ia32_ls_mode(res, tgt_mode);
3368 if(get_Conv_strict(node)) {
3369 res = gen_x87_strict_conv(tgt_mode, new_op);
3370 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3373 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3378 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3379 if (ia32_cg_config.use_sse2) {
3380 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3382 set_ia32_ls_mode(res, src_mode);
3384 return gen_x87_fp_to_gp(node);
3388 /* we convert from int ... */
3389 if (mode_is_float(tgt_mode)) {
3391 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3392 if (ia32_cg_config.use_sse2) {
3393 new_op = be_transform_node(op);
3394 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3396 set_ia32_ls_mode(res, tgt_mode);
3398 res = gen_x87_gp_to_fp(node, src_mode);
3399 if(get_Conv_strict(node)) {
3400 /* The strict-Conv is only necessary, if the int mode has more bits
3401 * than the float mantissa */
3402 size_t int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3403 size_t float_mantissa;
3404 /* FIXME There is no way to get the mantissa size of a mode */
3405 switch (get_mode_size_bits(tgt_mode)) {
3406 case 32: float_mantissa = 23 + 1; break; // + 1 for implicit 1
3407 case 64: float_mantissa = 52 + 1; break;
3409 case 96: float_mantissa = 64; break;
3410 default: float_mantissa = 0; break;
3412 if (float_mantissa < int_mantissa) {
3413 res = gen_x87_strict_conv(tgt_mode, res);
3414 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3419 } else if(tgt_mode == mode_b) {
3420 /* mode_b lowering already took care that we only have 0/1 values */
3421 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3422 src_mode, tgt_mode));
3423 return be_transform_node(op);
3426 if (src_bits == tgt_bits) {
3427 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3428 src_mode, tgt_mode));
3429 return be_transform_node(op);
3432 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3440 static ir_node *create_immediate_or_transform(ir_node *node,
3441 char immediate_constraint_type)
3443 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3444 if (new_node == NULL) {
3445 new_node = be_transform_node(node);
3451 * Transforms a FrameAddr into an ia32 Add.
3453 static ir_node *gen_be_FrameAddr(ir_node *node) {
3454 ir_node *block = be_transform_node(get_nodes_block(node));
3455 ir_node *op = be_get_FrameAddr_frame(node);
3456 ir_node *new_op = be_transform_node(op);
3457 ir_graph *irg = current_ir_graph;
3458 dbg_info *dbgi = get_irn_dbg_info(node);
3459 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3462 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3463 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3464 set_ia32_use_frame(new_node);
3466 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3472 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3474 static ir_node *gen_be_Return(ir_node *node) {
3475 ir_graph *irg = current_ir_graph;
3476 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3477 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3478 ir_entity *ent = get_irg_entity(irg);
3479 ir_type *tp = get_entity_type(ent);
3484 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3485 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3488 int pn_ret_val, pn_ret_mem, arity, i;
3490 assert(ret_val != NULL);
3491 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3492 return be_duplicate_node(node);
3495 res_type = get_method_res_type(tp, 0);
3497 if (! is_Primitive_type(res_type)) {
3498 return be_duplicate_node(node);
3501 mode = get_type_mode(res_type);
3502 if (! mode_is_float(mode)) {
3503 return be_duplicate_node(node);
3506 assert(get_method_n_ress(tp) == 1);
3508 pn_ret_val = get_Proj_proj(ret_val);
3509 pn_ret_mem = get_Proj_proj(ret_mem);
3511 /* get the Barrier */
3512 barrier = get_Proj_pred(ret_val);
3514 /* get result input of the Barrier */
3515 ret_val = get_irn_n(barrier, pn_ret_val);
3516 new_ret_val = be_transform_node(ret_val);
3518 /* get memory input of the Barrier */
3519 ret_mem = get_irn_n(barrier, pn_ret_mem);
3520 new_ret_mem = be_transform_node(ret_mem);
3522 frame = get_irg_frame(irg);
3524 dbgi = get_irn_dbg_info(barrier);
3525 block = be_transform_node(get_nodes_block(barrier));
3527 noreg = ia32_new_NoReg_gp(env_cg);
3529 /* store xmm0 onto stack */
3530 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3531 new_ret_mem, new_ret_val);
3532 set_ia32_ls_mode(sse_store, mode);
3533 set_ia32_op_type(sse_store, ia32_AddrModeD);
3534 set_ia32_use_frame(sse_store);
3536 /* load into x87 register */
3537 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3538 set_ia32_op_type(fld, ia32_AddrModeS);
3539 set_ia32_use_frame(fld);
3541 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3542 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3544 /* create a new barrier */
3545 arity = get_irn_arity(barrier);
3546 in = alloca(arity * sizeof(in[0]));
3547 for (i = 0; i < arity; ++i) {
3550 if (i == pn_ret_val) {
3552 } else if (i == pn_ret_mem) {
3555 ir_node *in = get_irn_n(barrier, i);
3556 new_in = be_transform_node(in);
3561 new_barrier = new_ir_node(dbgi, irg, block,
3562 get_irn_op(barrier), get_irn_mode(barrier),
3564 copy_node_attr(barrier, new_barrier);
3565 be_duplicate_deps(barrier, new_barrier);
3566 set_transformed_and_mark(barrier, new_barrier);
3568 /* transform normally */
3569 return be_duplicate_node(node);
3573 * Transform a be_AddSP into an ia32_SubSP.
3575 static ir_node *gen_be_AddSP(ir_node *node)
3577 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3578 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3580 return gen_binop(node, sp, sz, new_rd_ia32_SubSP,
3581 match_am | match_immediate);
3585 * Transform a be_SubSP into an ia32_AddSP
3587 static ir_node *gen_be_SubSP(ir_node *node)
3589 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3590 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3592 return gen_binop(node, sp, sz, new_rd_ia32_AddSP,
3593 match_am | match_immediate);
3597 * Change some phi modes
3599 static ir_node *gen_Phi(ir_node *node) {
3600 ir_node *block = be_transform_node(get_nodes_block(node));
3601 ir_graph *irg = current_ir_graph;
3602 dbg_info *dbgi = get_irn_dbg_info(node);
3603 ir_mode *mode = get_irn_mode(node);
3606 if(ia32_mode_needs_gp_reg(mode)) {
3607 /* we shouldn't have any 64bit stuff around anymore */
3608 assert(get_mode_size_bits(mode) <= 32);
3609 /* all integer operations are on 32bit registers now */
3611 } else if(mode_is_float(mode)) {
3612 if (ia32_cg_config.use_sse2) {
3619 /* phi nodes allow loops, so we use the old arguments for now
3620 * and fix this later */
3621 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3622 get_irn_in(node) + 1);
3623 copy_node_attr(node, phi);
3624 be_duplicate_deps(node, phi);
3626 be_set_transformed_node(node, phi);
3627 be_enqueue_preds(node);
3635 static ir_node *gen_IJmp(ir_node *node)
3637 ir_node *block = get_nodes_block(node);
3638 ir_node *new_block = be_transform_node(block);
3639 dbg_info *dbgi = get_irn_dbg_info(node);
3640 ir_node *op = get_IJmp_target(node);
3642 ia32_address_mode_t am;
3643 ia32_address_t *addr = &am.addr;
3645 assert(get_irn_mode(op) == mode_P);
3647 match_arguments(&am, block, NULL, op, NULL,
3648 match_am | match_8bit_am | match_16bit_am |
3649 match_immediate | match_8bit | match_16bit);
3651 new_node = new_rd_ia32_IJmp(dbgi, current_ir_graph, new_block,
3652 addr->base, addr->index, addr->mem,
3654 set_am_attributes(new_node, &am);
3655 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3657 new_node = fix_mem_proj(new_node, &am);
3663 * Transform a Bound node.
3665 static ir_node *gen_Bound(ir_node *node)
3668 ir_node *lower = get_Bound_lower(node);
3669 dbg_info *dbgi = get_irn_dbg_info(node);
3671 if (is_Const_0(lower)) {
3672 /* typical case for Java */
3673 ir_node *sub, *res, *flags, *block;
3674 ir_graph *irg = current_ir_graph;
3676 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3677 new_rd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3679 block = get_nodes_block(res);
3680 if (! is_Proj(res)) {
3682 set_irn_mode(sub, mode_T);
3683 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3685 sub = get_Proj_pred(res);
3687 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3688 new_node = new_rd_ia32_Jcc(dbgi, irg, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3689 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3691 panic("generic Bound not supported in ia32 Backend");
3697 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3699 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3700 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3702 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
3703 match_immediate | match_mode_neutral);
3706 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3708 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3709 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3710 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
3714 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3716 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3717 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3718 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
3722 static ir_node *gen_ia32_l_Add(ir_node *node) {
3723 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3724 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3725 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
3726 match_commutative | match_am | match_immediate |
3727 match_mode_neutral);
3729 if(is_Proj(lowered)) {
3730 lowered = get_Proj_pred(lowered);
3732 assert(is_ia32_Add(lowered));
3733 set_irn_mode(lowered, mode_T);
3739 static ir_node *gen_ia32_l_Adc(ir_node *node)
3741 return gen_binop_flags(node, new_rd_ia32_Adc,
3742 match_commutative | match_am | match_immediate |
3743 match_mode_neutral);
3747 * Transforms a l_MulS into a "real" MulS node.
3749 * @return the created ia32 Mul node
3751 static ir_node *gen_ia32_l_Mul(ir_node *node) {
3752 ir_node *left = get_binop_left(node);
3753 ir_node *right = get_binop_right(node);
3755 return gen_binop(node, left, right, new_rd_ia32_Mul,
3756 match_commutative | match_am | match_mode_neutral);
3760 * Transforms a l_IMulS into a "real" IMul1OPS node.
3762 * @return the created ia32 IMul1OP node
3764 static ir_node *gen_ia32_l_IMul(ir_node *node) {
3765 ir_node *left = get_binop_left(node);
3766 ir_node *right = get_binop_right(node);
3768 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
3769 match_commutative | match_am | match_mode_neutral);
3772 static ir_node *gen_ia32_l_Sub(ir_node *node) {
3773 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3774 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3775 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
3776 match_am | match_immediate | match_mode_neutral);
3778 if(is_Proj(lowered)) {
3779 lowered = get_Proj_pred(lowered);
3781 assert(is_ia32_Sub(lowered));
3782 set_irn_mode(lowered, mode_T);
3788 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
3789 return gen_binop_flags(node, new_rd_ia32_Sbb,
3790 match_am | match_immediate | match_mode_neutral);
3794 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3795 * op1 - target to be shifted
3796 * op2 - contains bits to be shifted into target
3798 * Only op3 can be an immediate.
3800 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3801 ir_node *low, ir_node *count)
3803 ir_node *block = get_nodes_block(node);
3804 ir_node *new_block = be_transform_node(block);
3805 ir_graph *irg = current_ir_graph;
3806 dbg_info *dbgi = get_irn_dbg_info(node);
3807 ir_node *new_high = be_transform_node(high);
3808 ir_node *new_low = be_transform_node(low);
3812 /* the shift amount can be any mode that is bigger than 5 bits, since all
3813 * other bits are ignored anyway */
3814 while (is_Conv(count) &&
3815 get_irn_n_edges(count) == 1 &&
3816 mode_is_int(get_irn_mode(count))) {
3817 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3818 count = get_Conv_op(count);
3820 new_count = create_immediate_or_transform(count, 0);
3822 if (is_ia32_l_ShlD(node)) {
3823 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
3826 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
3829 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3834 static ir_node *gen_ia32_l_ShlD(ir_node *node)
3836 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
3837 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
3838 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
3839 return gen_lowered_64bit_shifts(node, high, low, count);
3842 static ir_node *gen_ia32_l_ShrD(ir_node *node)
3844 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
3845 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
3846 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
3847 return gen_lowered_64bit_shifts(node, high, low, count);
3850 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
3851 ir_node *src_block = get_nodes_block(node);
3852 ir_node *block = be_transform_node(src_block);
3853 ir_graph *irg = current_ir_graph;
3854 dbg_info *dbgi = get_irn_dbg_info(node);
3855 ir_node *frame = get_irg_frame(irg);
3856 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3857 ir_node *nomem = new_NoMem();
3858 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
3859 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
3860 ir_node *new_val_low = be_transform_node(val_low);
3861 ir_node *new_val_high = be_transform_node(val_high);
3866 ir_node *store_high;
3868 if(!mode_is_signed(get_irn_mode(val_high))) {
3869 panic("unsigned long long -> float not supported yet (%+F)", node);
3873 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3875 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
3877 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
3878 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
3880 set_ia32_use_frame(store_low);
3881 set_ia32_use_frame(store_high);
3882 set_ia32_op_type(store_low, ia32_AddrModeD);
3883 set_ia32_op_type(store_high, ia32_AddrModeD);
3884 set_ia32_ls_mode(store_low, mode_Iu);
3885 set_ia32_ls_mode(store_high, mode_Is);
3886 add_ia32_am_offs_int(store_high, 4);
3890 sync = new_rd_Sync(dbgi, irg, block, 2, in);
3893 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
3895 set_ia32_use_frame(fild);
3896 set_ia32_op_type(fild, ia32_AddrModeS);
3897 set_ia32_ls_mode(fild, mode_Ls);
3899 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3901 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3904 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
3905 ir_node *src_block = get_nodes_block(node);
3906 ir_node *block = be_transform_node(src_block);
3907 ir_graph *irg = current_ir_graph;
3908 dbg_info *dbgi = get_irn_dbg_info(node);
3909 ir_node *frame = get_irg_frame(irg);
3910 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3911 ir_node *nomem = new_NoMem();
3912 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
3913 ir_node *new_val = be_transform_node(val);
3914 ir_node *fist, *mem;
3916 mem = gen_vfist(dbgi, irg, block, frame, noreg, nomem, new_val, &fist);
3917 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
3918 set_ia32_use_frame(fist);
3919 set_ia32_op_type(fist, ia32_AddrModeD);
3920 set_ia32_ls_mode(fist, mode_Ls);
3926 * the BAD transformer.
3928 static ir_node *bad_transform(ir_node *node) {
3929 panic("No transform function for %+F available.", node);
3933 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
3934 ir_graph *irg = current_ir_graph;
3935 ir_node *block = be_transform_node(get_nodes_block(node));
3936 ir_node *pred = get_Proj_pred(node);
3937 ir_node *new_pred = be_transform_node(pred);
3938 ir_node *frame = get_irg_frame(irg);
3939 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3940 dbg_info *dbgi = get_irn_dbg_info(node);
3941 long pn = get_Proj_proj(node);
3946 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
3947 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3948 set_ia32_use_frame(load);
3949 set_ia32_op_type(load, ia32_AddrModeS);
3950 set_ia32_ls_mode(load, mode_Iu);
3951 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
3952 * 32 bit from it with this particular load */
3953 attr = get_ia32_attr(load);
3954 attr->data.need_64bit_stackent = 1;
3956 if (pn == pn_ia32_l_FloattoLL_res_high) {
3957 add_ia32_am_offs_int(load, 4);
3959 assert(pn == pn_ia32_l_FloattoLL_res_low);
3962 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3968 * Transform the Projs of an AddSP.
3970 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
3971 ir_node *block = be_transform_node(get_nodes_block(node));
3972 ir_node *pred = get_Proj_pred(node);
3973 ir_node *new_pred = be_transform_node(pred);
3974 ir_graph *irg = current_ir_graph;
3975 dbg_info *dbgi = get_irn_dbg_info(node);
3976 long proj = get_Proj_proj(node);
3978 if (proj == pn_be_AddSP_sp) {
3979 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3980 pn_ia32_SubSP_stack);
3981 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
3983 } else if(proj == pn_be_AddSP_res) {
3984 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
3985 pn_ia32_SubSP_addr);
3986 } else if (proj == pn_be_AddSP_M) {
3987 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
3990 panic("No idea how to transform proj->AddSP");
3994 * Transform the Projs of a SubSP.
3996 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
3997 ir_node *block = be_transform_node(get_nodes_block(node));
3998 ir_node *pred = get_Proj_pred(node);
3999 ir_node *new_pred = be_transform_node(pred);
4000 ir_graph *irg = current_ir_graph;
4001 dbg_info *dbgi = get_irn_dbg_info(node);
4002 long proj = get_Proj_proj(node);
4004 if (proj == pn_be_SubSP_sp) {
4005 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4006 pn_ia32_AddSP_stack);
4007 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4009 } else if (proj == pn_be_SubSP_M) {
4010 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4013 panic("No idea how to transform proj->SubSP");
4017 * Transform and renumber the Projs from a Load.
4019 static ir_node *gen_Proj_Load(ir_node *node) {
4021 ir_node *block = be_transform_node(get_nodes_block(node));
4022 ir_node *pred = get_Proj_pred(node);
4023 ir_graph *irg = current_ir_graph;
4024 dbg_info *dbgi = get_irn_dbg_info(node);
4025 long proj = get_Proj_proj(node);
4027 /* loads might be part of source address mode matches, so we don't
4028 * transform the ProjMs yet (with the exception of loads whose result is
4031 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4034 /* this is needed, because sometimes we have loops that are only
4035 reachable through the ProjM */
4036 be_enqueue_preds(node);
4037 /* do it in 2 steps, to silence firm verifier */
4038 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4039 set_Proj_proj(res, pn_ia32_mem);
4043 /* renumber the proj */
4044 new_pred = be_transform_node(pred);
4045 if (is_ia32_Load(new_pred)) {
4048 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4050 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4051 case pn_Load_X_regular:
4052 return new_rd_Jmp(dbgi, irg, block);
4053 case pn_Load_X_except:
4054 /* This Load might raise an exception. Mark it. */
4055 set_ia32_exc_label(new_pred, 1);
4056 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4060 } else if (is_ia32_Conv_I2I(new_pred) ||
4061 is_ia32_Conv_I2I8Bit(new_pred)) {
4062 set_irn_mode(new_pred, mode_T);
4063 if (proj == pn_Load_res) {
4064 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4065 } else if (proj == pn_Load_M) {
4066 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4068 } else if (is_ia32_xLoad(new_pred)) {
4071 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4073 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4074 case pn_Load_X_regular:
4075 return new_rd_Jmp(dbgi, irg, block);
4076 case pn_Load_X_except:
4077 /* This Load might raise an exception. Mark it. */
4078 set_ia32_exc_label(new_pred, 1);
4079 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4083 } else if (is_ia32_vfld(new_pred)) {
4086 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4088 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4089 case pn_Load_X_regular:
4090 return new_rd_Jmp(dbgi, irg, block);
4091 case pn_Load_X_except:
4092 /* This Load might raise an exception. Mark it. */
4093 set_ia32_exc_label(new_pred, 1);
4094 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4099 /* can happen for ProJMs when source address mode happened for the
4102 /* however it should not be the result proj, as that would mean the
4103 load had multiple users and should not have been used for
4105 if (proj != pn_Load_M) {
4106 panic("internal error: transformed node not a Load");
4108 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4111 panic("No idea how to transform proj");
4115 * Transform and renumber the Projs from a DivMod like instruction.
4117 static ir_node *gen_Proj_DivMod(ir_node *node) {
4118 ir_node *block = be_transform_node(get_nodes_block(node));
4119 ir_node *pred = get_Proj_pred(node);
4120 ir_node *new_pred = be_transform_node(pred);
4121 ir_graph *irg = current_ir_graph;
4122 dbg_info *dbgi = get_irn_dbg_info(node);
4123 long proj = get_Proj_proj(node);
4125 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4127 switch (get_irn_opcode(pred)) {
4131 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4133 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4134 case pn_Div_X_regular:
4135 return new_rd_Jmp(dbgi, irg, block);
4136 case pn_Div_X_except:
4137 set_ia32_exc_label(new_pred, 1);
4138 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4146 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4148 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4149 case pn_Mod_X_except:
4150 set_ia32_exc_label(new_pred, 1);
4151 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4159 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4160 case pn_DivMod_res_div:
4161 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4162 case pn_DivMod_res_mod:
4163 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4164 case pn_DivMod_X_regular:
4165 return new_rd_Jmp(dbgi, irg, block);
4166 case pn_DivMod_X_except:
4167 set_ia32_exc_label(new_pred, 1);
4168 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4177 panic("No idea how to transform proj->DivMod");
4181 * Transform and renumber the Projs from a CopyB.
4183 static ir_node *gen_Proj_CopyB(ir_node *node) {
4184 ir_node *block = be_transform_node(get_nodes_block(node));
4185 ir_node *pred = get_Proj_pred(node);
4186 ir_node *new_pred = be_transform_node(pred);
4187 ir_graph *irg = current_ir_graph;
4188 dbg_info *dbgi = get_irn_dbg_info(node);
4189 long proj = get_Proj_proj(node);
4192 case pn_CopyB_M_regular:
4193 if (is_ia32_CopyB_i(new_pred)) {
4194 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4195 } else if (is_ia32_CopyB(new_pred)) {
4196 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4203 panic("No idea how to transform proj->CopyB");
4207 * Transform and renumber the Projs from a Quot.
4209 static ir_node *gen_Proj_Quot(ir_node *node) {
4210 ir_node *block = be_transform_node(get_nodes_block(node));
4211 ir_node *pred = get_Proj_pred(node);
4212 ir_node *new_pred = be_transform_node(pred);
4213 ir_graph *irg = current_ir_graph;
4214 dbg_info *dbgi = get_irn_dbg_info(node);
4215 long proj = get_Proj_proj(node);
4219 if (is_ia32_xDiv(new_pred)) {
4220 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4221 } else if (is_ia32_vfdiv(new_pred)) {
4222 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4226 if (is_ia32_xDiv(new_pred)) {
4227 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4228 } else if (is_ia32_vfdiv(new_pred)) {
4229 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4232 case pn_Quot_X_regular:
4233 case pn_Quot_X_except:
4238 panic("No idea how to transform proj->Quot");
4241 static ir_node *gen_be_Call(ir_node *node) {
4242 ir_node *res = be_duplicate_node(node);
4245 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4247 /* Run the x87 simulator if the call returns a float value */
4248 call_tp = be_Call_get_type(node);
4249 if (get_method_n_ress(call_tp) > 0) {
4250 ir_type *const res_type = get_method_res_type(call_tp, 0);
4251 ir_mode *const res_mode = get_type_mode(res_type);
4253 if (res_mode != NULL && mode_is_float(res_mode)) {
4254 env_cg->do_x87_sim = 1;
4261 static ir_node *gen_be_IncSP(ir_node *node) {
4262 ir_node *res = be_duplicate_node(node);
4263 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4269 * Transform the Projs from a be_Call.
4271 static ir_node *gen_Proj_be_Call(ir_node *node) {
4272 ir_node *block = be_transform_node(get_nodes_block(node));
4273 ir_node *call = get_Proj_pred(node);
4274 ir_node *new_call = be_transform_node(call);
4275 ir_graph *irg = current_ir_graph;
4276 dbg_info *dbgi = get_irn_dbg_info(node);
4277 ir_type *method_type = be_Call_get_type(call);
4278 int n_res = get_method_n_ress(method_type);
4279 long proj = get_Proj_proj(node);
4280 ir_mode *mode = get_irn_mode(node);
4282 const arch_register_class_t *cls;
4284 /* The following is kinda tricky: If we're using SSE, then we have to
4285 * move the result value of the call in floating point registers to an
4286 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4287 * after the call, we have to make sure to correctly make the
4288 * MemProj and the result Proj use these 2 nodes
4290 if (proj == pn_be_Call_M_regular) {
4291 // get new node for result, are we doing the sse load/store hack?
4292 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4293 ir_node *call_res_new;
4294 ir_node *call_res_pred = NULL;
4296 if (call_res != NULL) {
4297 call_res_new = be_transform_node(call_res);
4298 call_res_pred = get_Proj_pred(call_res_new);
4301 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4302 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4303 pn_be_Call_M_regular);
4305 assert(is_ia32_xLoad(call_res_pred));
4306 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4310 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4311 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4313 ir_node *frame = get_irg_frame(irg);
4314 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4316 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4319 /* in case there is no memory output: create one to serialize the copy
4321 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4322 pn_be_Call_M_regular);
4323 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4324 pn_be_Call_first_res);
4326 /* store st(0) onto stack */
4327 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4329 set_ia32_op_type(fstp, ia32_AddrModeD);
4330 set_ia32_use_frame(fstp);
4332 /* load into SSE register */
4333 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4335 set_ia32_op_type(sse_load, ia32_AddrModeS);
4336 set_ia32_use_frame(sse_load);
4338 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4344 /* transform call modes */
4345 if (mode_is_data(mode)) {
4346 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4350 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4354 * Transform the Projs from a Cmp.
4356 static ir_node *gen_Proj_Cmp(ir_node *node)
4358 /* this probably means not all mode_b nodes were lowered... */
4359 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4364 * Transform the Projs from a Bound.
4366 static ir_node *gen_Proj_Bound(ir_node *node)
4368 ir_node *new_node, *block;
4369 ir_node *pred = get_Proj_pred(node);
4371 switch (get_Proj_proj(node)) {
4373 return be_transform_node(get_Bound_mem(pred));
4374 case pn_Bound_X_regular:
4375 new_node = be_transform_node(pred);
4376 block = get_nodes_block(new_node);
4377 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
4378 case pn_Bound_X_except:
4379 new_node = be_transform_node(pred);
4380 block = get_nodes_block(new_node);
4381 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
4383 return be_transform_node(get_Bound_index(pred));
4385 panic("unsupported Proj from Bound");
4389 static ir_node *gen_Proj_ASM(ir_node *node)
4395 if (get_irn_mode(node) != mode_M)
4396 return be_duplicate_node(node);
4398 pred = get_Proj_pred(node);
4399 new_pred = be_transform_node(pred);
4400 block = get_nodes_block(new_pred);
4401 return new_r_Proj(current_ir_graph, block, new_pred, mode_M,
4402 get_ia32_n_res(new_pred) + 1);
4406 * Transform and potentially renumber Proj nodes.
4408 static ir_node *gen_Proj(ir_node *node) {
4409 ir_node *pred = get_Proj_pred(node);
4412 switch (get_irn_opcode(pred)) {
4414 proj = get_Proj_proj(node);
4415 if (proj == pn_Store_M) {
4416 return be_transform_node(pred);
4418 panic("No idea how to transform proj->Store");
4421 return gen_Proj_Load(node);
4423 return gen_Proj_ASM(node);
4427 return gen_Proj_DivMod(node);
4429 return gen_Proj_CopyB(node);
4431 return gen_Proj_Quot(node);
4433 return gen_Proj_be_SubSP(node);
4435 return gen_Proj_be_AddSP(node);
4437 return gen_Proj_be_Call(node);
4439 return gen_Proj_Cmp(node);
4441 return gen_Proj_Bound(node);
4443 proj = get_Proj_proj(node);
4444 if (proj == pn_Start_X_initial_exec) {
4445 ir_node *block = get_nodes_block(pred);
4446 dbg_info *dbgi = get_irn_dbg_info(node);
4449 /* we exchange the ProjX with a jump */
4450 block = be_transform_node(block);
4451 jump = new_rd_Jmp(dbgi, current_ir_graph, block);
4454 if (node == be_get_old_anchor(anchor_tls)) {
4455 return gen_Proj_tls(node);
4460 if (is_ia32_l_FloattoLL(pred)) {
4461 return gen_Proj_l_FloattoLL(node);
4463 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4467 ir_mode *mode = get_irn_mode(node);
4468 if (ia32_mode_needs_gp_reg(mode)) {
4469 ir_node *new_pred = be_transform_node(pred);
4470 ir_node *block = be_transform_node(get_nodes_block(node));
4471 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
4472 mode_Iu, get_Proj_proj(node));
4473 #ifdef DEBUG_libfirm
4474 new_proj->node_nr = node->node_nr;
4480 return be_duplicate_node(node);
4484 * Enters all transform functions into the generic pointer
4486 static void register_transformers(void)
4490 /* first clear the generic function pointer for all ops */
4491 clear_irp_opcodes_generic_func();
4493 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4494 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4532 /* transform ops from intrinsic lowering */
4544 GEN(ia32_l_LLtoFloat);
4545 GEN(ia32_l_FloattoLL);
4551 /* we should never see these nodes */
4566 /* handle generic backend nodes */
4575 op_Mulh = get_op_Mulh();
4584 * Pre-transform all unknown and noreg nodes.
4586 static void ia32_pretransform_node(void *arch_cg) {
4587 ia32_code_gen_t *cg = arch_cg;
4589 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4590 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4591 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4592 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4593 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4594 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4599 * Walker, checks if all ia32 nodes producing more than one result have their
4600 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
4602 static void add_missing_keep_walker(ir_node *node, void *data)
4605 unsigned found_projs = 0;
4606 const ir_edge_t *edge;
4607 ir_mode *mode = get_irn_mode(node);
4612 if(!is_ia32_irn(node))
4615 n_outs = get_ia32_n_res(node);
4618 if(is_ia32_SwitchJmp(node))
4621 assert(n_outs < (int) sizeof(unsigned) * 8);
4622 foreach_out_edge(node, edge) {
4623 ir_node *proj = get_edge_src_irn(edge);
4624 int pn = get_Proj_proj(proj);
4626 if (get_irn_mode(proj) == mode_M)
4629 assert(pn < n_outs);
4630 found_projs |= 1 << pn;
4634 /* are keeps missing? */
4636 for(i = 0; i < n_outs; ++i) {
4639 const arch_register_req_t *req;
4640 const arch_register_class_t *cls;
4642 if(found_projs & (1 << i)) {
4646 req = get_ia32_out_req(node, i);
4651 if(cls == &ia32_reg_classes[CLASS_ia32_flags]) {
4655 block = get_nodes_block(node);
4656 in[0] = new_r_Proj(current_ir_graph, block, node,
4657 arch_register_class_mode(cls), i);
4658 if(last_keep != NULL) {
4659 be_Keep_add_node(last_keep, cls, in[0]);
4661 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
4662 if(sched_is_scheduled(node)) {
4663 sched_add_after(node, last_keep);
4670 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
4673 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
4675 ir_graph *irg = be_get_birg_irg(cg->birg);
4676 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
4679 /* do the transformation */
4680 void ia32_transform_graph(ia32_code_gen_t *cg) {
4682 ir_graph *irg = cg->irg;
4684 register_transformers();
4686 initial_fpcw = NULL;
4688 BE_TIMER_PUSH(t_heights);
4689 heights = heights_new(irg);
4690 BE_TIMER_POP(t_heights);
4691 ia32_calculate_non_address_mode_nodes(cg->birg);
4693 /* the transform phase is not safe for CSE (yet) because several nodes get
4694 * attributes set after their creation */
4695 cse_last = get_opt_cse();
4698 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
4700 set_opt_cse(cse_last);
4702 ia32_free_non_address_mode_nodes();
4703 heights_free(heights);
4707 void ia32_init_transform(void)
4709 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");