2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
35 #include "irgraph_t.h"
40 #include "iredges_t.h"
52 #include "../benode_t.h"
53 #include "../besched.h"
55 #include "../beutil.h"
56 #include "../beirg_t.h"
57 #include "../betranshlp.h"
60 #include "bearch_ia32_t.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
79 #define TP_SFP_SIGN "ia32_sfp_sign"
80 #define TP_DFP_SIGN "ia32_dfp_sign"
81 #define TP_SFP_ABS "ia32_sfp_abs"
82 #define TP_DFP_ABS "ia32_dfp_abs"
83 #define TP_INT_MAX "ia32_int_max"
85 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
86 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
87 #define ENT_SFP_ABS "IA32_SFP_ABS"
88 #define ENT_DFP_ABS "IA32_DFP_ABS"
89 #define ENT_INT_MAX "IA32_INT_MAX"
91 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
92 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
94 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
96 /** hold the current code generator during transformation */
97 static ia32_code_gen_t *env_cg = NULL;
98 static ir_node *initial_fpcw = NULL;
99 static heights_t *heights = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *try_create_Immediate(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_immediate_or_transform(ir_node *node,
132 char immediate_constraint_type);
134 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
135 dbg_info *dbgi, ir_node *block,
136 ir_node *op, ir_node *orig_node);
139 * Return true if a mode can be stored in the GP register set
141 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
142 if(mode == mode_fpcw)
144 if(get_mode_size_bits(mode) > 32)
146 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
150 * creates a unique ident by adding a number to a tag
152 * @param tag the tag string, must contain a %d if a number
155 static ident *unique_id(const char *tag)
157 static unsigned id = 0;
160 snprintf(str, sizeof(str), tag, ++id);
161 return new_id_from_str(str);
165 * Get a primitive type for a mode.
167 static ir_type *get_prim_type(pmap *types, ir_mode *mode)
169 pmap_entry *e = pmap_find(types, mode);
174 snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
175 res = new_type_primitive(new_id_from_str(buf), mode);
176 set_type_alignment_bytes(res, 16);
177 pmap_insert(types, mode, res);
185 * Get an atomic entity that is initialized with a tarval
187 static ir_entity *create_float_const_entity(ir_node *cnst)
189 ia32_isa_t *isa = env_cg->isa;
190 tarval *tv = get_Const_tarval(cnst);
191 pmap_entry *e = pmap_find(isa->tv_ent, tv);
196 ir_mode *mode = get_irn_mode(cnst);
197 ir_type *tp = get_Const_type(cnst);
198 if (tp == firm_unknown_type)
199 tp = get_prim_type(isa->types, mode);
201 res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
203 set_entity_ld_ident(res, get_entity_ident(res));
204 set_entity_visibility(res, visibility_local);
205 set_entity_variability(res, variability_constant);
206 set_entity_allocation(res, allocation_static);
208 /* we create a new entity here: It's initialization must resist on the
210 rem = current_ir_graph;
211 current_ir_graph = get_const_code_irg();
212 set_atomic_ent_value(res, new_Const_type(tv, tp));
213 current_ir_graph = rem;
215 pmap_insert(isa->tv_ent, tv, res);
223 static int is_Const_0(ir_node *node) {
224 return is_Const(node) && is_Const_null(node);
227 static int is_Const_1(ir_node *node) {
228 return is_Const(node) && is_Const_one(node);
231 static int is_Const_Minus_1(ir_node *node) {
232 return is_Const(node) && is_Const_all_one(node);
236 * returns true if constant can be created with a simple float command
238 static int is_simple_x87_Const(ir_node *node)
240 tarval *tv = get_Const_tarval(node);
242 if(tarval_is_null(tv) || tarval_is_one(tv))
245 /* TODO: match all the other float constants */
250 * Transforms a Const.
252 static ir_node *gen_Const(ir_node *node) {
253 ir_graph *irg = current_ir_graph;
254 ir_node *old_block = get_nodes_block(node);
255 ir_node *block = be_transform_node(old_block);
256 dbg_info *dbgi = get_irn_dbg_info(node);
257 ir_mode *mode = get_irn_mode(node);
259 assert(is_Const(node));
261 if (mode_is_float(mode)) {
263 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
264 ir_node *nomem = new_NoMem();
268 if (ia32_cg_config.use_sse2) {
269 if (is_Const_null(node)) {
270 load = new_rd_ia32_xZero(dbgi, irg, block);
271 set_ia32_ls_mode(load, mode);
274 floatent = create_float_const_entity(node);
276 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
278 set_ia32_op_type(load, ia32_AddrModeS);
279 set_ia32_am_sc(load, floatent);
280 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
281 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
284 if (is_Const_null(node)) {
285 load = new_rd_ia32_vfldz(dbgi, irg, block);
287 } else if (is_Const_one(node)) {
288 load = new_rd_ia32_vfld1(dbgi, irg, block);
291 floatent = create_float_const_entity(node);
293 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
294 set_ia32_op_type(load, ia32_AddrModeS);
295 set_ia32_am_sc(load, floatent);
296 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
297 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
299 set_ia32_ls_mode(load, mode);
302 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
304 /* Const Nodes before the initial IncSP are a bad idea, because
305 * they could be spilled and we have no SP ready at that point yet.
306 * So add a dependency to the initial frame pointer calculation to
307 * avoid that situation.
309 if (get_irg_start_block(irg) == block) {
310 add_irn_dep(load, get_irg_frame(irg));
313 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
317 tarval *tv = get_Const_tarval(node);
320 tv = tarval_convert_to(tv, mode_Iu);
322 if(tv == get_tarval_bad() || tv == get_tarval_undefined()
324 panic("couldn't convert constant tarval (%+F)", node);
326 val = get_tarval_long(tv);
328 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
329 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
332 if (get_irg_start_block(irg) == block) {
333 add_irn_dep(cnst, get_irg_frame(irg));
341 * Transforms a SymConst.
343 static ir_node *gen_SymConst(ir_node *node) {
344 ir_graph *irg = current_ir_graph;
345 ir_node *old_block = get_nodes_block(node);
346 ir_node *block = be_transform_node(old_block);
347 dbg_info *dbgi = get_irn_dbg_info(node);
348 ir_mode *mode = get_irn_mode(node);
351 if (mode_is_float(mode)) {
352 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
353 ir_node *nomem = new_NoMem();
355 if (ia32_cg_config.use_sse2)
356 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
358 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
359 set_ia32_am_sc(cnst, get_SymConst_entity(node));
360 set_ia32_use_frame(cnst);
364 if(get_SymConst_kind(node) != symconst_addr_ent) {
365 panic("backend only support symconst_addr_ent (at %+F)", node);
367 entity = get_SymConst_entity(node);
368 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
371 /* Const Nodes before the initial IncSP are a bad idea, because
372 * they could be spilled and we have no SP ready at that point yet
374 if (get_irg_start_block(irg) == block) {
375 add_irn_dep(cnst, get_irg_frame(irg));
378 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
383 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
384 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
385 static const struct {
387 const char *ent_name;
388 const char *cnst_str;
391 } names [ia32_known_const_max] = {
392 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
393 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
394 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
395 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
396 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
398 static ir_entity *ent_cache[ia32_known_const_max];
400 const char *tp_name, *ent_name, *cnst_str;
408 ent_name = names[kct].ent_name;
409 if (! ent_cache[kct]) {
410 tp_name = names[kct].tp_name;
411 cnst_str = names[kct].cnst_str;
413 switch (names[kct].mode) {
414 case 0: mode = mode_Iu; break;
415 case 1: mode = mode_Lu; break;
416 default: mode = mode_F; break;
418 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
419 tp = new_type_primitive(new_id_from_str(tp_name), mode);
420 /* set the specified alignment */
421 set_type_alignment_bytes(tp, names[kct].align);
423 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
425 set_entity_ld_ident(ent, get_entity_ident(ent));
426 set_entity_visibility(ent, visibility_local);
427 set_entity_variability(ent, variability_constant);
428 set_entity_allocation(ent, allocation_static);
430 /* we create a new entity here: It's initialization must resist on the
432 rem = current_ir_graph;
433 current_ir_graph = get_const_code_irg();
434 cnst = new_Const(mode, tv);
435 current_ir_graph = rem;
437 set_atomic_ent_value(ent, cnst);
439 /* cache the entry */
440 ent_cache[kct] = ent;
443 return ent_cache[kct];
448 * Prints the old node name on cg obst and returns a pointer to it.
450 const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
451 ia32_isa_t *isa = (ia32_isa_t *)cg->arch_env->isa;
453 lc_eoprintf(firm_get_arg_env(), isa->name_obst, "%+F", irn);
454 obstack_1grow(isa->name_obst, 0);
455 return obstack_finish(isa->name_obst);
460 * return true if the node is a Proj(Load) and could be used in source address
461 * mode for another node. Will return only true if the @p other node is not
462 * dependent on the memory of the Load (for binary operations use the other
463 * input here, for unary operations use NULL).
465 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
466 ir_node *other, ir_node *other2)
468 ir_mode *mode = get_irn_mode(node);
472 /* float constants are always available */
473 if(is_Const(node) && mode_is_float(mode)) {
474 if(!is_simple_x87_Const(node))
476 if(get_irn_n_edges(node) > 1)
483 load = get_Proj_pred(node);
484 pn = get_Proj_proj(node);
485 if(!is_Load(load) || pn != pn_Load_res)
487 if(get_nodes_block(load) != block)
489 /* we only use address mode if we're the only user of the load */
490 if(get_irn_n_edges(node) > 1)
492 /* in some edge cases with address mode we might reach the load normally
493 * and through some AM sequence, if it is already materialized then we
494 * can't create an AM node from it */
495 if(be_is_transformed(node))
498 /* don't do AM if other node inputs depend on the load (via mem-proj) */
499 if(other != NULL && get_nodes_block(other) == block
500 && heights_reachable_in_block(heights, other, load))
502 if(other2 != NULL && get_nodes_block(other2) == block
503 && heights_reachable_in_block(heights, other2, load))
509 typedef struct ia32_address_mode_t ia32_address_mode_t;
510 struct ia32_address_mode_t {
514 ia32_op_type_t op_type;
518 unsigned commutative : 1;
519 unsigned ins_permuted : 1;
522 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
524 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
526 /* construct load address */
527 memset(addr, 0, sizeof(addr[0]));
528 ia32_create_address_mode(addr, ptr, /*force=*/0);
530 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
531 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
532 addr->mem = be_transform_node(mem);
535 static void build_address(ia32_address_mode_t *am, ir_node *node)
537 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
538 ia32_address_t *addr = &am->addr;
545 ir_entity *entity = create_float_const_entity(node);
546 addr->base = noreg_gp;
547 addr->index = noreg_gp;
548 addr->mem = new_NoMem();
549 addr->symconst_ent = entity;
551 am->ls_mode = get_irn_mode(node);
552 am->pinned = op_pin_state_floats;
556 load = get_Proj_pred(node);
557 ptr = get_Load_ptr(load);
558 mem = get_Load_mem(load);
559 new_mem = be_transform_node(mem);
560 am->pinned = get_irn_pinned(load);
561 am->ls_mode = get_Load_mode(load);
562 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
564 /* construct load address */
565 ia32_create_address_mode(addr, ptr, /*force=*/0);
567 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
568 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
572 static void set_address(ir_node *node, const ia32_address_t *addr)
574 set_ia32_am_scale(node, addr->scale);
575 set_ia32_am_sc(node, addr->symconst_ent);
576 set_ia32_am_offs_int(node, addr->offset);
577 if(addr->symconst_sign)
578 set_ia32_am_sc_sign(node);
580 set_ia32_use_frame(node);
581 set_ia32_frame_ent(node, addr->frame_entity);
584 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
586 set_address(node, &am->addr);
588 set_ia32_op_type(node, am->op_type);
589 set_ia32_ls_mode(node, am->ls_mode);
590 if(am->pinned == op_pin_state_pinned && get_irn_pinned(node) != op_pin_state_pinned) {
591 set_irn_pinned(node, am->pinned);
594 set_ia32_commutative(node);
598 * Check, if a given node is a Down-Conv, ie. a integer Conv
599 * from a mode with a mode with more bits to a mode with lesser bits.
600 * Moreover, we return only true if the node has not more than 1 user.
602 * @param node the node
603 * @return non-zero if node is a Down-Conv
605 static int is_downconv(const ir_node *node)
613 /* we only want to skip the conv when we're the only user
614 * (not optimal but for now...)
616 if(get_irn_n_edges(node) > 1)
619 src_mode = get_irn_mode(get_Conv_op(node));
620 dest_mode = get_irn_mode(node);
621 return mode_needs_gp_reg(src_mode)
622 && mode_needs_gp_reg(dest_mode)
623 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
626 /* Skip all Down-Conv's on a given node and return the resulting node. */
627 ir_node *ia32_skip_downconv(ir_node *node) {
628 while (is_downconv(node))
629 node = get_Conv_op(node);
635 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
637 ir_mode *mode = get_irn_mode(node);
642 if(mode_is_signed(mode)) {
647 block = get_nodes_block(node);
648 dbgi = get_irn_dbg_info(node);
650 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
655 * matches operands of a node into ia32 addressing/operand modes. This covers
656 * usage of source address mode, immediates, operations with non 32-bit modes,
658 * The resulting data is filled into the @p am struct. block is the block
659 * of the node whose arguments are matched. op1, op2 are the first and second
660 * input that are matched (op1 may be NULL). other_op is another unrelated
661 * input that is not matched! but which is needed sometimes to check if AM
662 * for op1/op2 is legal.
663 * @p flags describes the supported modes of the operation in detail.
665 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
666 ir_node *op1, ir_node *op2, ir_node *other_op,
669 ia32_address_t *addr = &am->addr;
670 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
673 ir_mode *mode = get_irn_mode(op2);
675 unsigned commutative;
676 int use_am_and_immediates;
678 int mode_bits = get_mode_size_bits(mode);
680 memset(am, 0, sizeof(am[0]));
682 commutative = (flags & match_commutative) != 0;
683 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
684 use_am = (flags & match_am) != 0;
685 use_immediate = (flags & match_immediate) != 0;
686 assert(!use_am_and_immediates || use_immediate);
689 assert(!commutative || op1 != NULL);
690 assert(use_am || !(flags & match_8bit_am));
691 assert(use_am || !(flags & match_16bit_am));
694 if (! (flags & match_8bit_am))
696 /* we don't automatically add upconvs yet */
697 assert((flags & match_mode_neutral) || (flags & match_8bit));
698 } else if(mode_bits == 16) {
699 if(! (flags & match_16bit_am))
701 /* we don't automatically add upconvs yet */
702 assert((flags & match_mode_neutral) || (flags & match_16bit));
705 /* we can simply skip downconvs for mode neutral nodes: the upper bits
706 * can be random for these operations */
707 if(flags & match_mode_neutral) {
708 op2 = ia32_skip_downconv(op2);
710 op1 = ia32_skip_downconv(op1);
714 /* match immediates. firm nodes are normalized: constants are always on the
717 if(! (flags & match_try_am) && use_immediate) {
718 new_op2 = try_create_Immediate(op2, 0);
722 && use_am && ia32_use_source_address_mode(block, op2, op1, other_op)) {
723 build_address(am, op2);
724 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
725 if(mode_is_float(mode)) {
726 new_op2 = ia32_new_NoReg_vfp(env_cg);
730 am->op_type = ia32_AddrModeS;
731 } else if(commutative && (new_op2 == NULL || use_am_and_immediates) &&
733 && ia32_use_source_address_mode(block, op1, op2, other_op)) {
735 build_address(am, op1);
737 if(mode_is_float(mode)) {
738 noreg = ia32_new_NoReg_vfp(env_cg);
743 if(new_op2 != NULL) {
746 new_op1 = be_transform_node(op2);
748 am->ins_permuted = 1;
750 am->op_type = ia32_AddrModeS;
752 if(flags & match_try_am) {
755 am->op_type = ia32_Normal;
759 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
761 new_op2 = be_transform_node(op2);
762 am->op_type = ia32_Normal;
763 am->ls_mode = get_irn_mode(op2);
764 if(flags & match_mode_neutral)
765 am->ls_mode = mode_Iu;
767 if(addr->base == NULL)
768 addr->base = noreg_gp;
769 if(addr->index == NULL)
770 addr->index = noreg_gp;
771 if(addr->mem == NULL)
772 addr->mem = new_NoMem();
774 am->new_op1 = new_op1;
775 am->new_op2 = new_op2;
776 am->commutative = commutative;
779 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
781 ir_graph *irg = current_ir_graph;
785 if(am->mem_proj == NULL)
788 /* we have to create a mode_T so the old MemProj can attach to us */
789 mode = get_irn_mode(node);
790 load = get_Proj_pred(am->mem_proj);
792 mark_irn_visited(load);
793 be_set_transformed_node(load, node);
796 set_irn_mode(node, mode_T);
797 return new_rd_Proj(NULL, irg, get_nodes_block(node), node, mode, pn_ia32_res);
804 * Construct a standard binary operation, set AM and immediate if required.
806 * @param op1 The first operand
807 * @param op2 The second operand
808 * @param func The node constructor function
809 * @return The constructed ia32 node.
811 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
812 construct_binop_func *func, match_flags_t flags)
814 ir_node *block = get_nodes_block(node);
815 ir_node *new_block = be_transform_node(block);
816 ir_graph *irg = current_ir_graph;
817 dbg_info *dbgi = get_irn_dbg_info(node);
819 ia32_address_mode_t am;
820 ia32_address_t *addr = &am.addr;
822 match_arguments(&am, block, op1, op2, NULL, flags);
824 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
825 am.new_op1, am.new_op2);
826 set_am_attributes(new_node, &am);
827 /* we can't use source address mode anymore when using immediates */
828 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
829 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
830 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
832 new_node = fix_mem_proj(new_node, &am);
839 n_ia32_l_binop_right,
840 n_ia32_l_binop_eflags
842 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
843 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
844 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
845 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_left, n_Sbb_left)
846 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_right, n_Sbb_right)
847 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
850 * Construct a binary operation which also consumes the eflags.
852 * @param node The node to transform
853 * @param func The node constructor function
854 * @param flags The match flags
855 * @return The constructor ia32 node
857 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
860 ir_node *src_block = get_nodes_block(node);
861 ir_node *block = be_transform_node(src_block);
862 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
863 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
864 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
865 ir_node *new_eflags = be_transform_node(eflags);
866 ir_graph *irg = current_ir_graph;
867 dbg_info *dbgi = get_irn_dbg_info(node);
869 ia32_address_mode_t am;
870 ia32_address_t *addr = &am.addr;
872 match_arguments(&am, src_block, op1, op2, NULL, flags);
874 new_node = func(dbgi, irg, block, addr->base, addr->index,
875 addr->mem, am.new_op1, am.new_op2, new_eflags);
876 set_am_attributes(new_node, &am);
877 /* we can't use source address mode anymore when using immediates */
878 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
879 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
880 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
882 new_node = fix_mem_proj(new_node, &am);
887 static ir_node *get_fpcw(void)
890 if(initial_fpcw != NULL)
893 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
894 &ia32_fp_cw_regs[REG_FPCW]);
895 initial_fpcw = be_transform_node(fpcw);
901 * Construct a standard binary operation, set AM and immediate if required.
903 * @param op1 The first operand
904 * @param op2 The second operand
905 * @param func The node constructor function
906 * @return The constructed ia32 node.
908 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
909 construct_binop_float_func *func,
912 ir_graph *irg = current_ir_graph;
913 dbg_info *dbgi = get_irn_dbg_info(node);
914 ir_node *block = get_nodes_block(node);
915 ir_node *new_block = be_transform_node(block);
916 ir_mode *mode = get_irn_mode(node);
918 ia32_address_mode_t am;
919 ia32_address_t *addr = &am.addr;
921 /* cannot use addresmode with long double on x87 */
922 if (get_mode_size_bits(mode) > 64)
925 match_arguments(&am, block, op1, op2, NULL, flags);
927 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
928 am.new_op1, am.new_op2, get_fpcw());
929 set_am_attributes(new_node, &am);
931 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
933 new_node = fix_mem_proj(new_node, &am);
939 * Construct a shift/rotate binary operation, sets AM and immediate if required.
941 * @param op1 The first operand
942 * @param op2 The second operand
943 * @param func The node constructor function
944 * @return The constructed ia32 node.
946 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
947 construct_shift_func *func,
950 dbg_info *dbgi = get_irn_dbg_info(node);
951 ir_graph *irg = current_ir_graph;
952 ir_node *block = get_nodes_block(node);
953 ir_node *new_block = be_transform_node(block);
958 assert(! mode_is_float(get_irn_mode(node)));
959 assert(flags & match_immediate);
960 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
962 if(flags & match_mode_neutral) {
963 op1 = ia32_skip_downconv(op1);
965 new_op1 = be_transform_node(op1);
967 /* the shift amount can be any mode that is bigger than 5 bits, since all
968 * other bits are ignored anyway */
969 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
970 op2 = get_Conv_op(op2);
971 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
973 new_op2 = create_immediate_or_transform(op2, 0);
975 new_node = func(dbgi, irg, new_block, new_op1, new_op2);
976 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
978 /* lowered shift instruction may have a dependency operand, handle it here */
979 if (get_irn_arity(node) == 3) {
980 /* we have a dependency */
981 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
982 add_irn_dep(new_node, new_dep);
990 * Construct a standard unary operation, set AM and immediate if required.
992 * @param op The operand
993 * @param func The node constructor function
994 * @return The constructed ia32 node.
996 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
999 ir_graph *irg = current_ir_graph;
1000 dbg_info *dbgi = get_irn_dbg_info(node);
1001 ir_node *block = get_nodes_block(node);
1002 ir_node *new_block = be_transform_node(block);
1006 assert(flags == 0 || flags == match_mode_neutral);
1007 if(flags & match_mode_neutral) {
1008 op = ia32_skip_downconv(op);
1011 new_op = be_transform_node(op);
1012 new_node = func(dbgi, irg, new_block, new_op);
1014 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1019 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1020 ia32_address_t *addr)
1022 ir_graph *irg = current_ir_graph;
1023 ir_node *base = addr->base;
1024 ir_node *index = addr->index;
1028 base = ia32_new_NoReg_gp(env_cg);
1030 base = be_transform_node(base);
1034 index = ia32_new_NoReg_gp(env_cg);
1036 index = be_transform_node(index);
1039 res = new_rd_ia32_Lea(dbgi, irg, block, base, index);
1040 set_address(res, addr);
1045 static int am_has_immediates(const ia32_address_t *addr)
1047 return addr->offset != 0 || addr->symconst_ent != NULL
1048 || addr->frame_entity || addr->use_frame;
1052 * Creates an ia32 Add.
1054 * @return the created ia32 Add node
1056 static ir_node *gen_Add(ir_node *node) {
1057 ir_graph *irg = current_ir_graph;
1058 dbg_info *dbgi = get_irn_dbg_info(node);
1059 ir_node *block = get_nodes_block(node);
1060 ir_node *new_block = be_transform_node(block);
1061 ir_node *op1 = get_Add_left(node);
1062 ir_node *op2 = get_Add_right(node);
1063 ir_mode *mode = get_irn_mode(node);
1065 ir_node *add_immediate_op;
1066 ia32_address_t addr;
1067 ia32_address_mode_t am;
1069 if (mode_is_float(mode)) {
1070 if (ia32_cg_config.use_sse2)
1071 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1072 match_commutative | match_am);
1074 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1075 match_commutative | match_am);
1078 ia32_mark_non_am(node);
1080 op2 = ia32_skip_downconv(op2);
1081 op1 = ia32_skip_downconv(op1);
1085 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1086 * 1. Add with immediate -> Lea
1087 * 2. Add with possible source address mode -> Add
1088 * 3. Otherwise -> Lea
1090 memset(&addr, 0, sizeof(addr));
1091 ia32_create_address_mode(&addr, node, /*force=*/1);
1092 add_immediate_op = NULL;
1094 if(addr.base == NULL && addr.index == NULL) {
1095 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1096 addr.symconst_sign, addr.offset);
1097 add_irn_dep(new_node, get_irg_frame(irg));
1098 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1101 /* add with immediate? */
1102 if(addr.index == NULL) {
1103 add_immediate_op = addr.base;
1104 } else if(addr.base == NULL && addr.scale == 0) {
1105 add_immediate_op = addr.index;
1108 if(add_immediate_op != NULL) {
1109 if(!am_has_immediates(&addr)) {
1110 #ifdef DEBUG_libfirm
1111 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1114 return be_transform_node(add_immediate_op);
1117 new_node = create_lea_from_address(dbgi, new_block, &addr);
1118 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1122 /* test if we can use source address mode */
1123 match_arguments(&am, block, op1, op2, NULL, match_commutative
1124 | match_mode_neutral | match_am | match_immediate | match_try_am);
1126 /* construct an Add with source address mode */
1127 if (am.op_type == ia32_AddrModeS) {
1128 ia32_address_t *am_addr = &am.addr;
1129 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1130 am_addr->index, am_addr->mem, am.new_op1,
1132 set_am_attributes(new_node, &am);
1133 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1135 new_node = fix_mem_proj(new_node, &am);
1140 /* otherwise construct a lea */
1141 new_node = create_lea_from_address(dbgi, new_block, &addr);
1142 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1147 * Creates an ia32 Mul.
1149 * @return the created ia32 Mul node
1151 static ir_node *gen_Mul(ir_node *node) {
1152 ir_node *op1 = get_Mul_left(node);
1153 ir_node *op2 = get_Mul_right(node);
1154 ir_mode *mode = get_irn_mode(node);
1156 if (mode_is_float(mode)) {
1157 if (ia32_cg_config.use_sse2)
1158 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1159 match_commutative | match_am);
1161 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1162 match_commutative | match_am);
1165 /* for the lower 32bit of the result it doesn't matter whether we use
1166 * signed or unsigned multiplication so we use IMul as it has fewer
1168 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1169 match_commutative | match_am | match_mode_neutral |
1170 match_immediate | match_am_and_immediates);
1174 * Creates an ia32 Mulh.
1175 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1176 * this result while Mul returns the lower 32 bit.
1178 * @return the created ia32 Mulh node
1180 static ir_node *gen_Mulh(ir_node *node)
1182 ir_node *block = get_nodes_block(node);
1183 ir_node *new_block = be_transform_node(block);
1184 ir_graph *irg = current_ir_graph;
1185 dbg_info *dbgi = get_irn_dbg_info(node);
1186 ir_mode *mode = get_irn_mode(node);
1187 ir_node *op1 = get_Mulh_left(node);
1188 ir_node *op2 = get_Mulh_right(node);
1189 ir_node *proj_res_high;
1191 ia32_address_mode_t am;
1192 ia32_address_t *addr = &am.addr;
1194 assert(!mode_is_float(mode) && "Mulh with float not supported");
1195 assert(get_mode_size_bits(mode) == 32);
1197 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1199 if (mode_is_signed(mode)) {
1200 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1201 addr->index, addr->mem, am.new_op1,
1204 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1205 addr->index, addr->mem, am.new_op1,
1209 set_am_attributes(new_node, &am);
1210 /* we can't use source address mode anymore when using immediates */
1211 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1212 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1213 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1215 assert(get_irn_mode(new_node) == mode_T);
1217 fix_mem_proj(new_node, &am);
1219 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1220 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1221 mode_Iu, pn_ia32_IMul1OP_res_high);
1223 return proj_res_high;
1229 * Creates an ia32 And.
1231 * @return The created ia32 And node
1233 static ir_node *gen_And(ir_node *node) {
1234 ir_node *op1 = get_And_left(node);
1235 ir_node *op2 = get_And_right(node);
1236 assert(! mode_is_float(get_irn_mode(node)));
1238 /* is it a zero extension? */
1239 if (is_Const(op2)) {
1240 tarval *tv = get_Const_tarval(op2);
1241 long v = get_tarval_long(tv);
1243 if (v == 0xFF || v == 0xFFFF) {
1244 dbg_info *dbgi = get_irn_dbg_info(node);
1245 ir_node *block = get_nodes_block(node);
1252 assert(v == 0xFFFF);
1255 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1261 return gen_binop(node, op1, op2, new_rd_ia32_And,
1262 match_commutative | match_mode_neutral | match_am
1269 * Creates an ia32 Or.
1271 * @return The created ia32 Or node
1273 static ir_node *gen_Or(ir_node *node) {
1274 ir_node *op1 = get_Or_left(node);
1275 ir_node *op2 = get_Or_right(node);
1277 assert (! mode_is_float(get_irn_mode(node)));
1278 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1279 | match_mode_neutral | match_am | match_immediate);
1285 * Creates an ia32 Eor.
1287 * @return The created ia32 Eor node
1289 static ir_node *gen_Eor(ir_node *node) {
1290 ir_node *op1 = get_Eor_left(node);
1291 ir_node *op2 = get_Eor_right(node);
1293 assert(! mode_is_float(get_irn_mode(node)));
1294 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1295 | match_mode_neutral | match_am | match_immediate);
1300 * Creates an ia32 Sub.
1302 * @return The created ia32 Sub node
1304 static ir_node *gen_Sub(ir_node *node) {
1305 ir_node *op1 = get_Sub_left(node);
1306 ir_node *op2 = get_Sub_right(node);
1307 ir_mode *mode = get_irn_mode(node);
1309 if (mode_is_float(mode)) {
1310 if (ia32_cg_config.use_sse2)
1311 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1313 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1318 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1322 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1323 | match_am | match_immediate);
1327 * Generates an ia32 DivMod with additional infrastructure for the
1328 * register allocator if needed.
1330 static ir_node *create_Div(ir_node *node)
1332 ir_graph *irg = current_ir_graph;
1333 dbg_info *dbgi = get_irn_dbg_info(node);
1334 ir_node *block = get_nodes_block(node);
1335 ir_node *new_block = be_transform_node(block);
1342 ir_node *sign_extension;
1343 ia32_address_mode_t am;
1344 ia32_address_t *addr = &am.addr;
1346 /* the upper bits have random contents for smaller modes */
1347 switch (get_irn_opcode(node)) {
1349 op1 = get_Div_left(node);
1350 op2 = get_Div_right(node);
1351 mem = get_Div_mem(node);
1352 mode = get_Div_resmode(node);
1355 op1 = get_Mod_left(node);
1356 op2 = get_Mod_right(node);
1357 mem = get_Mod_mem(node);
1358 mode = get_Mod_resmode(node);
1361 op1 = get_DivMod_left(node);
1362 op2 = get_DivMod_right(node);
1363 mem = get_DivMod_mem(node);
1364 mode = get_DivMod_resmode(node);
1367 panic("invalid divmod node %+F", node);
1370 match_arguments(&am, block, op1, op2, NULL, match_am);
1372 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1373 is the memory of the consumed address. We can have only the second op as address
1374 in Div nodes, so check only op2. */
1375 if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
1376 new_mem = be_transform_node(mem);
1377 if(!is_NoMem(addr->mem)) {
1381 new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
1384 new_mem = addr->mem;
1387 if (mode_is_signed(mode)) {
1388 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1389 add_irn_dep(produceval, get_irg_frame(irg));
1390 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1393 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1394 addr->index, new_mem, am.new_op1,
1395 sign_extension, am.new_op2);
1397 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1398 add_irn_dep(sign_extension, get_irg_frame(irg));
1400 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1401 addr->index, new_mem, am.new_op1,
1402 sign_extension, am.new_op2);
1405 set_irn_pinned(new_node, get_irn_pinned(node));
1407 set_am_attributes(new_node, &am);
1408 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1410 new_node = fix_mem_proj(new_node, &am);
1416 static ir_node *gen_Mod(ir_node *node) {
1417 return create_Div(node);
1420 static ir_node *gen_Div(ir_node *node) {
1421 return create_Div(node);
1424 static ir_node *gen_DivMod(ir_node *node) {
1425 return create_Div(node);
1431 * Creates an ia32 floating Div.
1433 * @return The created ia32 xDiv node
1435 static ir_node *gen_Quot(ir_node *node)
1437 ir_node *op1 = get_Quot_left(node);
1438 ir_node *op2 = get_Quot_right(node);
1440 if (ia32_cg_config.use_sse2) {
1441 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1443 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1449 * Creates an ia32 Shl.
1451 * @return The created ia32 Shl node
1453 static ir_node *gen_Shl(ir_node *node) {
1454 ir_node *left = get_Shl_left(node);
1455 ir_node *right = get_Shl_right(node);
1457 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1458 match_mode_neutral | match_immediate);
1462 * Creates an ia32 Shr.
1464 * @return The created ia32 Shr node
1466 static ir_node *gen_Shr(ir_node *node) {
1467 ir_node *left = get_Shr_left(node);
1468 ir_node *right = get_Shr_right(node);
1470 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1476 * Creates an ia32 Sar.
1478 * @return The created ia32 Shrs node
1480 static ir_node *gen_Shrs(ir_node *node) {
1481 ir_node *left = get_Shrs_left(node);
1482 ir_node *right = get_Shrs_right(node);
1483 ir_mode *mode = get_irn_mode(node);
1485 if(is_Const(right) && mode == mode_Is) {
1486 tarval *tv = get_Const_tarval(right);
1487 long val = get_tarval_long(tv);
1489 /* this is a sign extension */
1490 ir_graph *irg = current_ir_graph;
1491 dbg_info *dbgi = get_irn_dbg_info(node);
1492 ir_node *block = be_transform_node(get_nodes_block(node));
1494 ir_node *new_op = be_transform_node(op);
1495 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1496 add_irn_dep(pval, get_irg_frame(irg));
1498 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1502 /* 8 or 16 bit sign extension? */
1503 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1504 ir_node *shl_left = get_Shl_left(left);
1505 ir_node *shl_right = get_Shl_right(left);
1506 if(is_Const(shl_right)) {
1507 tarval *tv1 = get_Const_tarval(right);
1508 tarval *tv2 = get_Const_tarval(shl_right);
1509 if(tv1 == tv2 && tarval_is_long(tv1)) {
1510 long val = get_tarval_long(tv1);
1511 if(val == 16 || val == 24) {
1512 dbg_info *dbgi = get_irn_dbg_info(node);
1513 ir_node *block = get_nodes_block(node);
1523 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1532 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1538 * Creates an ia32 RotL.
1540 * @param op1 The first operator
1541 * @param op2 The second operator
1542 * @return The created ia32 RotL node
1544 static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
1545 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1551 * Creates an ia32 RotR.
1552 * NOTE: There is no RotR with immediate because this would always be a RotL
1553 * "imm-mode_size_bits" which can be pre-calculated.
1555 * @param op1 The first operator
1556 * @param op2 The second operator
1557 * @return The created ia32 RotR node
1559 static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
1560 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1566 * Creates an ia32 RotR or RotL (depending on the found pattern).
1568 * @return The created ia32 RotL or RotR node
1570 static ir_node *gen_Rot(ir_node *node) {
1571 ir_node *rotate = NULL;
1572 ir_node *op1 = get_Rot_left(node);
1573 ir_node *op2 = get_Rot_right(node);
1575 /* Firm has only Rot (which is a RotL), so we are looking for a right (op2)
1576 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1577 that means we can create a RotR instead of an Add and a RotL */
1579 if (get_irn_op(op2) == op_Add) {
1581 ir_node *left = get_Add_left(add);
1582 ir_node *right = get_Add_right(add);
1583 if (is_Const(right)) {
1584 tarval *tv = get_Const_tarval(right);
1585 ir_mode *mode = get_irn_mode(node);
1586 long bits = get_mode_size_bits(mode);
1588 if (get_irn_op(left) == op_Minus &&
1589 tarval_is_long(tv) &&
1590 get_tarval_long(tv) == bits &&
1593 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1594 rotate = gen_RotR(node, op1, get_Minus_op(left));
1599 if (rotate == NULL) {
1600 rotate = gen_RotL(node, op1, op2);
1609 * Transforms a Minus node.
1611 * @return The created ia32 Minus node
1613 static ir_node *gen_Minus(ir_node *node)
1615 ir_node *op = get_Minus_op(node);
1616 ir_node *block = be_transform_node(get_nodes_block(node));
1617 ir_graph *irg = current_ir_graph;
1618 dbg_info *dbgi = get_irn_dbg_info(node);
1619 ir_mode *mode = get_irn_mode(node);
1624 if (mode_is_float(mode)) {
1625 ir_node *new_op = be_transform_node(op);
1626 if (ia32_cg_config.use_sse2) {
1627 /* TODO: non-optimal... if we have many xXors, then we should
1628 * rather create a load for the const and use that instead of
1629 * several AM nodes... */
1630 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1631 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1632 ir_node *nomem = new_rd_NoMem(irg);
1634 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1635 nomem, new_op, noreg_xmm);
1637 size = get_mode_size_bits(mode);
1638 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1640 set_ia32_am_sc(new_node, ent);
1641 set_ia32_op_type(new_node, ia32_AddrModeS);
1642 set_ia32_ls_mode(new_node, mode);
1644 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1647 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1650 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1656 * Transforms a Not node.
1658 * @return The created ia32 Not node
1660 static ir_node *gen_Not(ir_node *node) {
1661 ir_node *op = get_Not_op(node);
1663 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1664 assert (! mode_is_float(get_irn_mode(node)));
1666 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1672 * Transforms an Abs node.
1674 * @return The created ia32 Abs node
1676 static ir_node *gen_Abs(ir_node *node)
1678 ir_node *block = get_nodes_block(node);
1679 ir_node *new_block = be_transform_node(block);
1680 ir_node *op = get_Abs_op(node);
1681 ir_graph *irg = current_ir_graph;
1682 dbg_info *dbgi = get_irn_dbg_info(node);
1683 ir_mode *mode = get_irn_mode(node);
1684 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1685 ir_node *nomem = new_NoMem();
1691 if (mode_is_float(mode)) {
1692 new_op = be_transform_node(op);
1694 if (ia32_cg_config.use_sse2) {
1695 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1696 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1697 nomem, new_op, noreg_fp);
1699 size = get_mode_size_bits(mode);
1700 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1702 set_ia32_am_sc(new_node, ent);
1704 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1706 set_ia32_op_type(new_node, ia32_AddrModeS);
1707 set_ia32_ls_mode(new_node, mode);
1709 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1710 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1713 ir_node *xor, *pval, *sign_extension;
1715 if (get_mode_size_bits(mode) == 32) {
1716 new_op = be_transform_node(op);
1718 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1721 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1722 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1725 add_irn_dep(pval, get_irg_frame(irg));
1726 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1728 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1729 nomem, new_op, sign_extension);
1730 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1732 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1733 nomem, xor, sign_extension);
1734 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1740 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1742 ir_graph *irg = current_ir_graph;
1750 /* we have a Cmp as input */
1752 ir_node *pred = get_Proj_pred(node);
1754 flags = be_transform_node(pred);
1755 *pnc_out = get_Proj_proj(node);
1760 /* a mode_b value, we have to compare it against 0 */
1761 dbgi = get_irn_dbg_info(node);
1762 new_block = be_transform_node(get_nodes_block(node));
1763 new_op = be_transform_node(node);
1764 noreg = ia32_new_NoReg_gp(env_cg);
1765 nomem = new_NoMem();
1766 flags = new_rd_ia32_Test(dbgi, irg, new_block, noreg, noreg, nomem,
1767 new_op, new_op, 0, 0);
1768 *pnc_out = pn_Cmp_Lg;
1773 * Transforms a Load.
1775 * @return the created ia32 Load node
1777 static ir_node *gen_Load(ir_node *node) {
1778 ir_node *old_block = get_nodes_block(node);
1779 ir_node *block = be_transform_node(old_block);
1780 ir_node *ptr = get_Load_ptr(node);
1781 ir_node *mem = get_Load_mem(node);
1782 ir_node *new_mem = be_transform_node(mem);
1785 ir_graph *irg = current_ir_graph;
1786 dbg_info *dbgi = get_irn_dbg_info(node);
1787 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1788 ir_mode *mode = get_Load_mode(node);
1791 ia32_address_t addr;
1793 /* construct load address */
1794 memset(&addr, 0, sizeof(addr));
1795 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1802 base = be_transform_node(base);
1808 index = be_transform_node(index);
1811 if (mode_is_float(mode)) {
1812 if (ia32_cg_config.use_sse2) {
1813 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1815 res_mode = mode_xmm;
1817 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1819 res_mode = mode_vfp;
1822 assert(mode != mode_b);
1824 /* create a conv node with address mode for smaller modes */
1825 if(get_mode_size_bits(mode) < 32) {
1826 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1827 new_mem, noreg, mode);
1829 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1834 set_irn_pinned(new_node, get_irn_pinned(node));
1835 set_ia32_op_type(new_node, ia32_AddrModeS);
1836 set_ia32_ls_mode(new_node, mode);
1837 set_address(new_node, &addr);
1839 if(get_irn_pinned(node) == op_pin_state_floats) {
1840 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1843 /* make sure we are scheduled behind the initial IncSP/Barrier
1844 * to avoid spills being placed before it
1846 if (block == get_irg_start_block(irg)) {
1847 add_irn_dep(new_node, get_irg_frame(irg));
1850 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1855 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1856 ir_node *ptr, ir_node *other)
1863 /* we only use address mode if we're the only user of the load */
1864 if(get_irn_n_edges(node) > 1)
1867 load = get_Proj_pred(node);
1870 if(get_nodes_block(load) != block)
1873 /* Store should be attached to the load */
1874 if(!is_Proj(mem) || get_Proj_pred(mem) != load)
1876 /* store should have the same pointer as the load */
1877 if(get_Load_ptr(load) != ptr)
1880 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1881 if(other != NULL && get_nodes_block(other) == block
1882 && heights_reachable_in_block(heights, other, load))
1888 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1889 ir_node *mem, ir_node *ptr, ir_mode *mode,
1890 construct_binop_dest_func *func,
1891 construct_binop_dest_func *func8bit,
1892 match_flags_t flags)
1894 ir_node *src_block = get_nodes_block(node);
1896 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1897 ir_graph *irg = current_ir_graph;
1902 ia32_address_mode_t am;
1903 ia32_address_t *addr = &am.addr;
1904 memset(&am, 0, sizeof(am));
1906 assert(flags & match_dest_am);
1907 assert(flags & match_immediate); /* there is no destam node without... */
1908 commutative = (flags & match_commutative) != 0;
1910 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
1911 build_address(&am, op1);
1912 new_op = create_immediate_or_transform(op2, 0);
1913 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
1914 build_address(&am, op2);
1915 new_op = create_immediate_or_transform(op1, 0);
1920 if(addr->base == NULL)
1921 addr->base = noreg_gp;
1922 if(addr->index == NULL)
1923 addr->index = noreg_gp;
1924 if(addr->mem == NULL)
1925 addr->mem = new_NoMem();
1927 dbgi = get_irn_dbg_info(node);
1928 block = be_transform_node(src_block);
1929 if(get_mode_size_bits(mode) == 8) {
1930 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
1933 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
1936 set_address(new_node, addr);
1937 set_ia32_op_type(new_node, ia32_AddrModeD);
1938 set_ia32_ls_mode(new_node, mode);
1939 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1944 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
1945 ir_node *ptr, ir_mode *mode,
1946 construct_unop_dest_func *func)
1948 ir_graph *irg = current_ir_graph;
1949 ir_node *src_block = get_nodes_block(node);
1953 ia32_address_mode_t am;
1954 ia32_address_t *addr = &am.addr;
1955 memset(&am, 0, sizeof(am));
1957 if(!use_dest_am(src_block, op, mem, ptr, NULL))
1960 build_address(&am, op);
1962 dbgi = get_irn_dbg_info(node);
1963 block = be_transform_node(src_block);
1964 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
1965 set_address(new_node, addr);
1966 set_ia32_op_type(new_node, ia32_AddrModeD);
1967 set_ia32_ls_mode(new_node, mode);
1968 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1973 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
1974 ir_mode *mode = get_irn_mode(node);
1975 ir_node *psi_true = get_Psi_val(node, 0);
1976 ir_node *psi_default = get_Psi_default(node);
1987 ia32_address_t addr;
1989 if(get_mode_size_bits(mode) != 8)
1992 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
1994 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2000 build_address_ptr(&addr, ptr, mem);
2002 irg = current_ir_graph;
2003 dbgi = get_irn_dbg_info(node);
2004 block = get_nodes_block(node);
2005 new_block = be_transform_node(block);
2006 cond = get_Psi_cond(node, 0);
2007 flags = get_flags_node(cond, &pnc);
2008 new_mem = be_transform_node(mem);
2009 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2010 addr.index, addr.mem, flags, pnc, negated);
2011 set_address(new_node, &addr);
2012 set_ia32_op_type(new_node, ia32_AddrModeD);
2013 set_ia32_ls_mode(new_node, mode);
2014 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2019 static ir_node *try_create_dest_am(ir_node *node) {
2020 ir_node *val = get_Store_value(node);
2021 ir_node *mem = get_Store_mem(node);
2022 ir_node *ptr = get_Store_ptr(node);
2023 ir_mode *mode = get_irn_mode(val);
2024 unsigned bits = get_mode_size_bits(mode);
2029 /* handle only GP modes for now... */
2030 if(!mode_needs_gp_reg(mode))
2034 /* store must be the only user of the val node */
2035 if(get_irn_n_edges(val) > 1)
2037 /* skip pointless convs */
2039 ir_node *conv_op = get_Conv_op(val);
2040 ir_mode *pred_mode = get_irn_mode(conv_op);
2041 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2049 /* value must be in the same block */
2050 if(get_nodes_block(node) != get_nodes_block(val))
2053 switch(get_irn_opcode(val)) {
2055 op1 = get_Add_left(val);
2056 op2 = get_Add_right(val);
2057 if(is_Const_1(op2)) {
2058 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2059 new_rd_ia32_IncMem);
2061 } else if(is_Const_Minus_1(op2)) {
2062 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2063 new_rd_ia32_DecMem);
2066 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2067 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2068 match_dest_am | match_commutative |
2072 op1 = get_Sub_left(val);
2073 op2 = get_Sub_right(val);
2075 ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
2078 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2079 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2080 match_dest_am | match_immediate |
2084 op1 = get_And_left(val);
2085 op2 = get_And_right(val);
2086 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2087 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2088 match_dest_am | match_commutative |
2092 op1 = get_Or_left(val);
2093 op2 = get_Or_right(val);
2094 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2095 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2096 match_dest_am | match_commutative |
2100 op1 = get_Eor_left(val);
2101 op2 = get_Eor_right(val);
2102 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2103 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2104 match_dest_am | match_commutative |
2108 op1 = get_Shl_left(val);
2109 op2 = get_Shl_right(val);
2110 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2111 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2112 match_dest_am | match_immediate);
2115 op1 = get_Shr_left(val);
2116 op2 = get_Shr_right(val);
2117 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2118 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2119 match_dest_am | match_immediate);
2122 op1 = get_Shrs_left(val);
2123 op2 = get_Shrs_right(val);
2124 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2125 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2126 match_dest_am | match_immediate);
2129 op1 = get_Rot_left(val);
2130 op2 = get_Rot_right(val);
2131 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2132 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2133 match_dest_am | match_immediate);
2135 /* TODO: match ROR patterns... */
2137 new_node = try_create_SetMem(val, ptr, mem);
2140 op1 = get_Minus_op(val);
2141 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2144 /* should be lowered already */
2145 assert(mode != mode_b);
2146 op1 = get_Not_op(val);
2147 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2153 if(new_node != NULL) {
2154 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2155 get_irn_pinned(node) == op_pin_state_pinned) {
2156 set_irn_pinned(new_node, op_pin_state_pinned);
2163 static int is_float_to_int32_conv(const ir_node *node)
2165 ir_mode *mode = get_irn_mode(node);
2169 if(get_mode_size_bits(mode) != 32 || !mode_needs_gp_reg(mode))
2174 conv_op = get_Conv_op(node);
2175 conv_mode = get_irn_mode(conv_op);
2177 if(!mode_is_float(conv_mode))
2184 * Transforms a Store.
2186 * @return the created ia32 Store node
2188 static ir_node *gen_Store(ir_node *node)
2190 ir_node *block = get_nodes_block(node);
2191 ir_node *new_block = be_transform_node(block);
2192 ir_node *ptr = get_Store_ptr(node);
2193 ir_node *val = get_Store_value(node);
2194 ir_node *mem = get_Store_mem(node);
2195 ir_graph *irg = current_ir_graph;
2196 dbg_info *dbgi = get_irn_dbg_info(node);
2197 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2198 ir_mode *mode = get_irn_mode(val);
2201 ia32_address_t addr;
2203 /* check for destination address mode */
2204 new_node = try_create_dest_am(node);
2205 if(new_node != NULL)
2208 /* construct store address */
2209 memset(&addr, 0, sizeof(addr));
2210 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2212 if(addr.base == NULL) {
2215 addr.base = be_transform_node(addr.base);
2218 if(addr.index == NULL) {
2221 addr.index = be_transform_node(addr.index);
2223 addr.mem = be_transform_node(mem);
2225 if (mode_is_float(mode)) {
2226 /* convs (and strict-convs) before stores are unnecessary if the mode
2228 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2229 val = get_Conv_op(val);
2231 new_val = be_transform_node(val);
2232 if (ia32_cg_config.use_sse2) {
2233 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2234 addr.index, addr.mem, new_val);
2236 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2237 addr.index, addr.mem, new_val, mode);
2239 } else if(is_float_to_int32_conv(val)) {
2240 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2241 val = get_Conv_op(val);
2243 /* convs (and strict-convs) before stores are unnecessary if the mode
2245 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2246 val = get_Conv_op(val);
2248 new_val = be_transform_node(val);
2250 new_node = new_rd_ia32_vfist(dbgi, irg, new_block, addr.base,
2251 addr.index, addr.mem, new_val, trunc_mode);
2253 new_val = create_immediate_or_transform(val, 0);
2254 assert(mode != mode_b);
2256 if (get_mode_size_bits(mode) == 8) {
2257 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2258 addr.index, addr.mem, new_val);
2260 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2261 addr.index, addr.mem, new_val);
2265 set_irn_pinned(new_node, get_irn_pinned(node));
2266 set_ia32_op_type(new_node, ia32_AddrModeD);
2267 set_ia32_ls_mode(new_node, mode);
2269 set_address(new_node, &addr);
2270 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2275 static ir_node *create_Switch(ir_node *node)
2277 ir_graph *irg = current_ir_graph;
2278 dbg_info *dbgi = get_irn_dbg_info(node);
2279 ir_node *block = be_transform_node(get_nodes_block(node));
2280 ir_node *sel = get_Cond_selector(node);
2281 ir_node *new_sel = be_transform_node(sel);
2282 int switch_min = INT_MAX;
2283 int switch_max = INT_MIN;
2284 long default_pn = get_Cond_defaultProj(node);
2286 const ir_edge_t *edge;
2288 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2290 /* determine the smallest switch case value */
2291 foreach_out_edge(node, edge) {
2292 ir_node *proj = get_edge_src_irn(edge);
2293 long pn = get_Proj_proj(proj);
2294 if(pn == default_pn)
2303 if((unsigned) (switch_max - switch_min) > 256000) {
2304 panic("Size of switch %+F bigger than 256000", node);
2307 if (switch_min != 0) {
2308 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2310 /* if smallest switch case is not 0 we need an additional sub */
2311 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2312 add_ia32_am_offs_int(new_sel, -switch_min);
2313 set_ia32_op_type(new_sel, ia32_AddrModeS);
2315 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2318 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2319 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2324 static ir_node *gen_Cond(ir_node *node) {
2325 ir_node *block = get_nodes_block(node);
2326 ir_node *new_block = be_transform_node(block);
2327 ir_graph *irg = current_ir_graph;
2328 dbg_info *dbgi = get_irn_dbg_info(node);
2329 ir_node *sel = get_Cond_selector(node);
2330 ir_mode *sel_mode = get_irn_mode(sel);
2331 ir_node *flags = NULL;
2335 if (sel_mode != mode_b) {
2336 return create_Switch(node);
2339 /* we get flags from a cmp */
2340 flags = get_flags_node(sel, &pnc);
2342 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2343 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2351 * Transforms a CopyB node.
2353 * @return The transformed node.
2355 static ir_node *gen_CopyB(ir_node *node) {
2356 ir_node *block = be_transform_node(get_nodes_block(node));
2357 ir_node *src = get_CopyB_src(node);
2358 ir_node *new_src = be_transform_node(src);
2359 ir_node *dst = get_CopyB_dst(node);
2360 ir_node *new_dst = be_transform_node(dst);
2361 ir_node *mem = get_CopyB_mem(node);
2362 ir_node *new_mem = be_transform_node(mem);
2363 ir_node *res = NULL;
2364 ir_graph *irg = current_ir_graph;
2365 dbg_info *dbgi = get_irn_dbg_info(node);
2366 int size = get_type_size_bytes(get_CopyB_type(node));
2369 /* If we have to copy more than 32 bytes, we use REP MOVSx and */
2370 /* then we need the size explicitly in ECX. */
2371 if (size >= 32 * 4) {
2372 rem = size & 0x3; /* size % 4 */
2375 res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
2376 add_irn_dep(res, get_irg_frame(irg));
2378 res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
2381 ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
2384 res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
2387 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
2392 static ir_node *gen_be_Copy(ir_node *node)
2394 ir_node *new_node = be_duplicate_node(node);
2395 ir_mode *mode = get_irn_mode(new_node);
2397 if (mode_needs_gp_reg(mode)) {
2398 set_irn_mode(new_node, mode_Iu);
2404 static ir_node *create_Fucom(ir_node *node)
2406 ir_graph *irg = current_ir_graph;
2407 dbg_info *dbgi = get_irn_dbg_info(node);
2408 ir_node *block = get_nodes_block(node);
2409 ir_node *new_block = be_transform_node(block);
2410 ir_node *left = get_Cmp_left(node);
2411 ir_node *new_left = be_transform_node(left);
2412 ir_node *right = get_Cmp_right(node);
2416 if(ia32_cg_config.use_fucomi) {
2417 new_right = be_transform_node(right);
2418 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2420 set_ia32_commutative(new_node);
2421 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2423 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2424 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2427 new_right = be_transform_node(right);
2428 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2432 set_ia32_commutative(new_node);
2434 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2436 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2437 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2443 static ir_node *create_Ucomi(ir_node *node)
2445 ir_graph *irg = current_ir_graph;
2446 dbg_info *dbgi = get_irn_dbg_info(node);
2447 ir_node *src_block = get_nodes_block(node);
2448 ir_node *new_block = be_transform_node(src_block);
2449 ir_node *left = get_Cmp_left(node);
2450 ir_node *right = get_Cmp_right(node);
2452 ia32_address_mode_t am;
2453 ia32_address_t *addr = &am.addr;
2455 match_arguments(&am, src_block, left, right, NULL,
2456 match_commutative | match_am);
2458 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2459 addr->mem, am.new_op1, am.new_op2,
2461 set_am_attributes(new_node, &am);
2463 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2465 new_node = fix_mem_proj(new_node, &am);
2471 * helper function: checks wether all Cmp projs are Lg or Eq which is needed
2472 * to fold an and into a test node
2474 static int can_fold_test_and(ir_node *node)
2476 const ir_edge_t *edge;
2478 /** we can only have eq and lg projs */
2479 foreach_out_edge(node, edge) {
2480 ir_node *proj = get_edge_src_irn(edge);
2481 pn_Cmp pnc = get_Proj_proj(proj);
2482 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2489 static ir_node *gen_Cmp(ir_node *node)
2491 ir_graph *irg = current_ir_graph;
2492 dbg_info *dbgi = get_irn_dbg_info(node);
2493 ir_node *block = get_nodes_block(node);
2494 ir_node *new_block = be_transform_node(block);
2495 ir_node *left = get_Cmp_left(node);
2496 ir_node *right = get_Cmp_right(node);
2497 ir_mode *cmp_mode = get_irn_mode(left);
2499 ia32_address_mode_t am;
2500 ia32_address_t *addr = &am.addr;
2503 if(mode_is_float(cmp_mode)) {
2504 if (ia32_cg_config.use_sse2) {
2505 return create_Ucomi(node);
2507 return create_Fucom(node);
2511 assert(mode_needs_gp_reg(cmp_mode));
2513 /* we prefer the Test instruction where possible except cases where
2514 * we can use SourceAM */
2515 cmp_unsigned = !mode_is_signed(cmp_mode);
2516 if (is_Const_0(right)) {
2518 get_irn_n_edges(left) == 1 &&
2519 can_fold_test_and(node)) {
2520 /* Test(and_left, and_right) */
2521 ir_node *and_left = get_And_left(left);
2522 ir_node *and_right = get_And_right(left);
2523 ir_mode *mode = get_irn_mode(and_left);
2525 match_arguments(&am, block, and_left, and_right, NULL,
2527 match_am | match_8bit_am | match_16bit_am |
2528 match_am_and_immediates | match_immediate |
2529 match_8bit | match_16bit);
2530 if (get_mode_size_bits(mode) == 8) {
2531 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2532 addr->index, addr->mem, am.new_op1,
2533 am.new_op2, am.ins_permuted,
2536 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2537 addr->index, addr->mem, am.new_op1,
2538 am.new_op2, am.ins_permuted, cmp_unsigned);
2541 match_arguments(&am, block, NULL, left, NULL,
2542 match_am | match_8bit_am | match_16bit_am |
2543 match_8bit | match_16bit);
2544 if (am.op_type == ia32_AddrModeS) {
2546 ir_node *imm_zero = try_create_Immediate(right, 0);
2547 if (get_mode_size_bits(cmp_mode) == 8) {
2548 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2549 addr->index, addr->mem, am.new_op2,
2550 imm_zero, am.ins_permuted,
2553 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2554 addr->index, addr->mem, am.new_op2,
2555 imm_zero, am.ins_permuted, cmp_unsigned);
2558 /* Test(left, left) */
2559 if (get_mode_size_bits(cmp_mode) == 8) {
2560 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2561 addr->index, addr->mem, am.new_op2,
2562 am.new_op2, am.ins_permuted,
2565 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2566 addr->index, addr->mem, am.new_op2,
2567 am.new_op2, am.ins_permuted,
2573 /* Cmp(left, right) */
2574 match_arguments(&am, block, left, right, NULL,
2575 match_commutative | match_am | match_8bit_am |
2576 match_16bit_am | match_am_and_immediates |
2577 match_immediate | match_8bit | match_16bit);
2578 if (get_mode_size_bits(cmp_mode) == 8) {
2579 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2580 addr->index, addr->mem, am.new_op1,
2581 am.new_op2, am.ins_permuted,
2584 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2585 addr->index, addr->mem, am.new_op1,
2586 am.new_op2, am.ins_permuted, cmp_unsigned);
2589 set_am_attributes(new_node, &am);
2590 assert(cmp_mode != NULL);
2591 set_ia32_ls_mode(new_node, cmp_mode);
2593 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2595 new_node = fix_mem_proj(new_node, &am);
2600 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2603 ir_graph *irg = current_ir_graph;
2604 dbg_info *dbgi = get_irn_dbg_info(node);
2605 ir_node *block = get_nodes_block(node);
2606 ir_node *new_block = be_transform_node(block);
2607 ir_node *val_true = get_Psi_val(node, 0);
2608 ir_node *val_false = get_Psi_default(node);
2610 match_flags_t match_flags;
2611 ia32_address_mode_t am;
2612 ia32_address_t *addr;
2614 assert(ia32_cg_config.use_cmov);
2615 assert(mode_needs_gp_reg(get_irn_mode(val_true)));
2619 match_flags = match_commutative | match_am | match_16bit_am |
2622 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2624 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2625 addr->mem, am.new_op1, am.new_op2, new_flags,
2626 am.ins_permuted, pnc);
2627 set_am_attributes(new_node, &am);
2629 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2631 new_node = fix_mem_proj(new_node, &am);
2638 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2639 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2642 ir_graph *irg = current_ir_graph;
2643 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2644 ir_node *nomem = new_NoMem();
2645 ir_mode *mode = get_irn_mode(orig_node);
2648 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2649 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2651 /* we might need to conv the result up */
2652 if(get_mode_size_bits(mode) > 8) {
2653 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2654 nomem, new_node, mode_Bu);
2655 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2662 * Transforms a Psi node into CMov.
2664 * @return The transformed node.
2666 static ir_node *gen_Psi(ir_node *node)
2668 dbg_info *dbgi = get_irn_dbg_info(node);
2669 ir_node *block = get_nodes_block(node);
2670 ir_node *new_block = be_transform_node(block);
2671 ir_node *psi_true = get_Psi_val(node, 0);
2672 ir_node *psi_default = get_Psi_default(node);
2673 ir_node *cond = get_Psi_cond(node, 0);
2674 ir_node *flags = NULL;
2678 assert(get_Psi_n_conds(node) == 1);
2679 assert(get_irn_mode(cond) == mode_b);
2680 assert(mode_needs_gp_reg(get_irn_mode(node)));
2682 flags = get_flags_node(cond, &pnc);
2684 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2685 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 0);
2686 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2687 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 1);
2689 new_node = create_CMov(node, cond, flags, pnc);
2696 * Create a conversion from x87 state register to general purpose.
2698 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
2699 ir_node *block = be_transform_node(get_nodes_block(node));
2700 ir_node *op = get_Conv_op(node);
2701 ir_node *new_op = be_transform_node(op);
2702 ia32_code_gen_t *cg = env_cg;
2703 ir_graph *irg = current_ir_graph;
2704 dbg_info *dbgi = get_irn_dbg_info(node);
2705 ir_node *noreg = ia32_new_NoReg_gp(cg);
2706 ir_node *trunc_mode = ia32_new_Fpu_truncate(cg);
2707 ir_mode *mode = get_irn_mode(node);
2708 ir_node *fist, *load;
2711 fist = new_rd_ia32_vfist(dbgi, irg, block, get_irg_frame(irg), noreg,
2712 new_NoMem(), new_op, trunc_mode);
2714 set_irn_pinned(fist, op_pin_state_floats);
2715 set_ia32_use_frame(fist);
2716 set_ia32_op_type(fist, ia32_AddrModeD);
2718 assert(get_mode_size_bits(mode) <= 32);
2719 /* exception we can only store signed 32 bit integers, so for unsigned
2720 we store a 64bit (signed) integer and load the lower bits */
2721 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
2722 set_ia32_ls_mode(fist, mode_Ls);
2724 set_ia32_ls_mode(fist, mode_Is);
2726 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
2729 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, fist);
2731 set_irn_pinned(load, op_pin_state_floats);
2732 set_ia32_use_frame(load);
2733 set_ia32_op_type(load, ia32_AddrModeS);
2734 set_ia32_ls_mode(load, mode_Is);
2735 if(get_ia32_ls_mode(fist) == mode_Ls) {
2736 ia32_attr_t *attr = get_ia32_attr(load);
2737 attr->data.need_64bit_stackent = 1;
2739 ia32_attr_t *attr = get_ia32_attr(load);
2740 attr->data.need_32bit_stackent = 1;
2742 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
2744 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
2748 * Creates a x87 strict Conv by placing a Sore and a Load
2750 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
2752 ir_node *block = get_nodes_block(node);
2753 ir_graph *irg = current_ir_graph;
2754 dbg_info *dbgi = get_irn_dbg_info(node);
2755 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2756 ir_node *nomem = new_NoMem();
2757 ir_node *frame = get_irg_frame(irg);
2758 ir_node *store, *load;
2761 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
2763 set_ia32_use_frame(store);
2764 set_ia32_op_type(store, ia32_AddrModeD);
2765 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2767 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
2769 set_ia32_use_frame(load);
2770 set_ia32_op_type(load, ia32_AddrModeS);
2771 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
2773 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
2777 static ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
2779 ir_graph *irg = current_ir_graph;
2780 ir_node *start_block = get_irg_start_block(irg);
2781 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block,
2782 symconst, symconst_sign, val);
2783 arch_set_irn_register(env_cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
2789 * Create a conversion from general purpose to x87 register
2791 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
2792 ir_node *src_block = get_nodes_block(node);
2793 ir_node *block = be_transform_node(src_block);
2794 ir_graph *irg = current_ir_graph;
2795 dbg_info *dbgi = get_irn_dbg_info(node);
2796 ir_node *op = get_Conv_op(node);
2797 ir_node *new_op = NULL;
2801 ir_mode *store_mode;
2807 /* fild can use source AM if the operand is a signed 32bit integer */
2808 if (src_mode == mode_Is) {
2809 ia32_address_mode_t am;
2811 match_arguments(&am, src_block, NULL, op, NULL,
2812 match_am | match_try_am);
2813 if (am.op_type == ia32_AddrModeS) {
2814 ia32_address_t *addr = &am.addr;
2816 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
2817 addr->index, addr->mem);
2818 new_node = new_r_Proj(irg, block, fild, mode_vfp,
2821 set_am_attributes(fild, &am);
2822 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
2824 fix_mem_proj(fild, &am);
2829 if(new_op == NULL) {
2830 new_op = be_transform_node(op);
2833 noreg = ia32_new_NoReg_gp(env_cg);
2834 nomem = new_NoMem();
2835 mode = get_irn_mode(op);
2837 /* first convert to 32 bit signed if necessary */
2838 src_bits = get_mode_size_bits(src_mode);
2839 if (src_bits == 8) {
2840 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
2842 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2844 } else if (src_bits < 32) {
2845 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
2847 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2851 assert(get_mode_size_bits(mode) == 32);
2854 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
2857 set_ia32_use_frame(store);
2858 set_ia32_op_type(store, ia32_AddrModeD);
2859 set_ia32_ls_mode(store, mode_Iu);
2861 /* exception for 32bit unsigned, do a 64bit spill+load */
2862 if(!mode_is_signed(mode)) {
2865 ir_node *zero_const = create_Immediate(NULL, 0, 0);
2867 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
2868 get_irg_frame(irg), noreg, nomem,
2871 set_ia32_use_frame(zero_store);
2872 set_ia32_op_type(zero_store, ia32_AddrModeD);
2873 add_ia32_am_offs_int(zero_store, 4);
2874 set_ia32_ls_mode(zero_store, mode_Iu);
2879 store = new_rd_Sync(dbgi, irg, block, 2, in);
2880 store_mode = mode_Ls;
2882 store_mode = mode_Is;
2886 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
2888 set_ia32_use_frame(fild);
2889 set_ia32_op_type(fild, ia32_AddrModeS);
2890 set_ia32_ls_mode(fild, store_mode);
2892 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
2898 * Create a conversion from one integer mode into another one
2900 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
2901 dbg_info *dbgi, ir_node *block, ir_node *op,
2904 ir_graph *irg = current_ir_graph;
2905 int src_bits = get_mode_size_bits(src_mode);
2906 int tgt_bits = get_mode_size_bits(tgt_mode);
2907 ir_node *new_block = be_transform_node(block);
2909 ir_mode *smaller_mode;
2911 ia32_address_mode_t am;
2912 ia32_address_t *addr = &am.addr;
2915 if (src_bits < tgt_bits) {
2916 smaller_mode = src_mode;
2917 smaller_bits = src_bits;
2919 smaller_mode = tgt_mode;
2920 smaller_bits = tgt_bits;
2923 #ifdef DEBUG_libfirm
2925 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
2930 match_arguments(&am, block, NULL, op, NULL,
2931 match_8bit | match_16bit |
2932 match_am | match_8bit_am | match_16bit_am);
2933 if (smaller_bits == 8) {
2934 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
2935 addr->index, addr->mem, am.new_op2,
2938 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
2939 addr->index, addr->mem, am.new_op2,
2942 set_am_attributes(new_node, &am);
2943 /* match_arguments assume that out-mode = in-mode, this isn't true here
2945 set_ia32_ls_mode(new_node, smaller_mode);
2946 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2947 new_node = fix_mem_proj(new_node, &am);
2952 * Transforms a Conv node.
2954 * @return The created ia32 Conv node
2956 static ir_node *gen_Conv(ir_node *node) {
2957 ir_node *block = get_nodes_block(node);
2958 ir_node *new_block = be_transform_node(block);
2959 ir_node *op = get_Conv_op(node);
2960 ir_node *new_op = NULL;
2961 ir_graph *irg = current_ir_graph;
2962 dbg_info *dbgi = get_irn_dbg_info(node);
2963 ir_mode *src_mode = get_irn_mode(op);
2964 ir_mode *tgt_mode = get_irn_mode(node);
2965 int src_bits = get_mode_size_bits(src_mode);
2966 int tgt_bits = get_mode_size_bits(tgt_mode);
2967 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2968 ir_node *nomem = new_rd_NoMem(irg);
2969 ir_node *res = NULL;
2971 if (src_mode == mode_b) {
2972 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
2973 /* nothing to do, we already model bools as 0/1 ints */
2974 return be_transform_node(op);
2977 if (src_mode == tgt_mode) {
2978 if (get_Conv_strict(node)) {
2979 if (ia32_cg_config.use_sse2) {
2980 /* when we are in SSE mode, we can kill all strict no-op conversion */
2981 return be_transform_node(op);
2984 /* this should be optimized already, but who knows... */
2985 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
2986 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
2987 return be_transform_node(op);
2991 if (mode_is_float(src_mode)) {
2992 new_op = be_transform_node(op);
2993 /* we convert from float ... */
2994 if (mode_is_float(tgt_mode)) {
2995 if(src_mode == mode_E && tgt_mode == mode_D
2996 && !get_Conv_strict(node)) {
2997 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3002 if (ia32_cg_config.use_sse2) {
3003 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3004 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3006 set_ia32_ls_mode(res, tgt_mode);
3008 if(get_Conv_strict(node)) {
3009 res = gen_x87_strict_conv(tgt_mode, new_op);
3010 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3013 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3018 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3019 if (ia32_cg_config.use_sse2) {
3020 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3022 set_ia32_ls_mode(res, src_mode);
3024 return gen_x87_fp_to_gp(node);
3028 /* we convert from int ... */
3029 if (mode_is_float(tgt_mode)) {
3031 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3032 if (ia32_cg_config.use_sse2) {
3033 new_op = be_transform_node(op);
3034 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3036 set_ia32_ls_mode(res, tgt_mode);
3038 res = gen_x87_gp_to_fp(node, src_mode);
3039 if(get_Conv_strict(node)) {
3040 res = gen_x87_strict_conv(tgt_mode, res);
3041 SET_IA32_ORIG_NODE(get_Proj_pred(res),
3042 ia32_get_old_node_name(env_cg, node));
3046 } else if(tgt_mode == mode_b) {
3047 /* mode_b lowering already took care that we only have 0/1 values */
3048 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3049 src_mode, tgt_mode));
3050 return be_transform_node(op);
3053 if (src_bits == tgt_bits) {
3054 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3055 src_mode, tgt_mode));
3056 return be_transform_node(op);
3059 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3067 static int check_immediate_constraint(long val, char immediate_constraint_type)
3069 switch (immediate_constraint_type) {
3073 return val >= 0 && val <= 32;
3075 return val >= 0 && val <= 63;
3077 return val >= -128 && val <= 127;
3079 return val == 0xff || val == 0xffff;
3081 return val >= 0 && val <= 3;
3083 return val >= 0 && val <= 255;
3085 return val >= 0 && val <= 127;
3089 panic("Invalid immediate constraint found");
3093 static ir_node *try_create_Immediate(ir_node *node,
3094 char immediate_constraint_type)
3097 tarval *offset = NULL;
3098 int offset_sign = 0;
3100 ir_entity *symconst_ent = NULL;
3101 int symconst_sign = 0;
3103 ir_node *cnst = NULL;
3104 ir_node *symconst = NULL;
3107 mode = get_irn_mode(node);
3108 if(!mode_is_int(mode) && !mode_is_reference(mode)) {
3112 if(is_Minus(node)) {
3114 node = get_Minus_op(node);
3117 if(is_Const(node)) {
3120 offset_sign = minus;
3121 } else if(is_SymConst(node)) {
3124 symconst_sign = minus;
3125 } else if(is_Add(node)) {
3126 ir_node *left = get_Add_left(node);
3127 ir_node *right = get_Add_right(node);
3128 if(is_Const(left) && is_SymConst(right)) {
3131 symconst_sign = minus;
3132 offset_sign = minus;
3133 } else if(is_SymConst(left) && is_Const(right)) {
3136 symconst_sign = minus;
3137 offset_sign = minus;
3139 } else if(is_Sub(node)) {
3140 ir_node *left = get_Sub_left(node);
3141 ir_node *right = get_Sub_right(node);
3142 if(is_Const(left) && is_SymConst(right)) {
3145 symconst_sign = !minus;
3146 offset_sign = minus;
3147 } else if(is_SymConst(left) && is_Const(right)) {
3150 symconst_sign = minus;
3151 offset_sign = !minus;
3158 offset = get_Const_tarval(cnst);
3159 if(tarval_is_long(offset)) {
3160 val = get_tarval_long(offset);
3162 ir_fprintf(stderr, "Optimisation Warning: tarval from %+F is not a "
3167 if(!check_immediate_constraint(val, immediate_constraint_type))
3170 if(symconst != NULL) {
3171 if(immediate_constraint_type != 0) {
3172 /* we need full 32bits for symconsts */
3176 /* unfortunately the assembler/linker doesn't support -symconst */
3180 if(get_SymConst_kind(symconst) != symconst_addr_ent)
3182 symconst_ent = get_SymConst_entity(symconst);
3184 if(cnst == NULL && symconst == NULL)
3187 if(offset_sign && offset != NULL) {
3188 offset = tarval_neg(offset);
3191 new_node = create_Immediate(symconst_ent, symconst_sign, val);
3196 static ir_node *create_immediate_or_transform(ir_node *node,
3197 char immediate_constraint_type)
3199 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3200 if (new_node == NULL) {
3201 new_node = be_transform_node(node);
3206 static const arch_register_req_t no_register_req = {
3207 arch_register_req_type_none,
3208 NULL, /* regclass */
3209 NULL, /* limit bitset */
3211 0 /* different pos */
3215 * An assembler constraint.
3217 typedef struct constraint_t constraint_t;
3218 struct constraint_t {
3221 const arch_register_req_t **out_reqs;
3223 const arch_register_req_t *req;
3224 unsigned immediate_possible;
3225 char immediate_type;
3228 static void parse_asm_constraint(int pos, constraint_t *constraint, const char *c)
3230 int immediate_possible = 0;
3231 char immediate_type = 0;
3232 unsigned limited = 0;
3233 const arch_register_class_t *cls = NULL;
3234 ir_graph *irg = current_ir_graph;
3235 struct obstack *obst = get_irg_obstack(irg);
3236 arch_register_req_t *req;
3237 unsigned *limited_ptr = NULL;
3241 /* TODO: replace all the asserts with nice error messages */
3244 /* a memory constraint: no need to do anything in backend about it
3245 * (the dependencies are already respected by the memory edge of
3247 constraint->req = &no_register_req;
3259 assert(cls == NULL ||
3260 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3261 cls = &ia32_reg_classes[CLASS_ia32_gp];
3262 limited |= 1 << REG_EAX;
3265 assert(cls == NULL ||
3266 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3267 cls = &ia32_reg_classes[CLASS_ia32_gp];
3268 limited |= 1 << REG_EBX;
3271 assert(cls == NULL ||
3272 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3273 cls = &ia32_reg_classes[CLASS_ia32_gp];
3274 limited |= 1 << REG_ECX;
3277 assert(cls == NULL ||
3278 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3279 cls = &ia32_reg_classes[CLASS_ia32_gp];
3280 limited |= 1 << REG_EDX;
3283 assert(cls == NULL ||
3284 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3285 cls = &ia32_reg_classes[CLASS_ia32_gp];
3286 limited |= 1 << REG_EDI;
3289 assert(cls == NULL ||
3290 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3291 cls = &ia32_reg_classes[CLASS_ia32_gp];
3292 limited |= 1 << REG_ESI;
3295 case 'q': /* q means lower part of the regs only, this makes no
3296 * difference to Q for us (we only assigne whole registers) */
3297 assert(cls == NULL ||
3298 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3299 cls = &ia32_reg_classes[CLASS_ia32_gp];
3300 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3304 assert(cls == NULL ||
3305 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3306 cls = &ia32_reg_classes[CLASS_ia32_gp];
3307 limited |= 1 << REG_EAX | 1 << REG_EDX;
3310 assert(cls == NULL ||
3311 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3312 cls = &ia32_reg_classes[CLASS_ia32_gp];
3313 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3314 1 << REG_EDX | 1 << REG_ESI | 1 << REG_EDI |
3321 assert(cls == NULL);
3322 cls = &ia32_reg_classes[CLASS_ia32_gp];
3328 /* TODO: mark values so the x87 simulator knows about t and u */
3329 assert(cls == NULL);
3330 cls = &ia32_reg_classes[CLASS_ia32_vfp];
3335 assert(cls == NULL);
3336 /* TODO: check that sse2 is supported */
3337 cls = &ia32_reg_classes[CLASS_ia32_xmm];
3347 assert(!immediate_possible);
3348 immediate_possible = 1;
3349 immediate_type = *c;
3353 assert(!immediate_possible);
3354 immediate_possible = 1;
3358 assert(!immediate_possible && cls == NULL);
3359 immediate_possible = 1;
3360 cls = &ia32_reg_classes[CLASS_ia32_gp];
3373 assert(constraint->is_in && "can only specify same constraint "
3376 sscanf(c, "%d%n", &same_as, &p);
3384 /* memory constraint no need to do anything in backend about it
3385 * (the dependencies are already respected by the memory edge of
3387 constraint->req = &no_register_req;
3390 case 'E': /* no float consts yet */
3391 case 'F': /* no float consts yet */
3392 case 's': /* makes no sense on x86 */
3393 case 'X': /* we can't support that in firm */
3396 case '<': /* no autodecrement on x86 */
3397 case '>': /* no autoincrement on x86 */
3398 case 'C': /* sse constant not supported yet */
3399 case 'G': /* 80387 constant not supported yet */
3400 case 'y': /* we don't support mmx registers yet */
3401 case 'Z': /* not available in 32 bit mode */
3402 case 'e': /* not available in 32 bit mode */
3403 panic("unsupported asm constraint '%c' found in (%+F)",
3404 *c, current_ir_graph);
3407 panic("unknown asm constraint '%c' found in (%+F)", *c,
3415 const arch_register_req_t *other_constr;
3417 assert(cls == NULL && "same as and register constraint not supported");
3418 assert(!immediate_possible && "same as and immediate constraint not "
3420 assert(same_as < constraint->n_outs && "wrong constraint number in "
3421 "same_as constraint");
3423 other_constr = constraint->out_reqs[same_as];
3425 req = obstack_alloc(obst, sizeof(req[0]));
3426 req->cls = other_constr->cls;
3427 req->type = arch_register_req_type_should_be_same;
3428 req->limited = NULL;
3429 req->other_same = 1U << pos;
3430 req->other_different = 0;
3432 /* switch constraints. This is because in firm we have same_as
3433 * constraints on the output constraints while in the gcc asm syntax
3434 * they are specified on the input constraints */
3435 constraint->req = other_constr;
3436 constraint->out_reqs[same_as] = req;
3437 constraint->immediate_possible = 0;
3441 if(immediate_possible && cls == NULL) {
3442 cls = &ia32_reg_classes[CLASS_ia32_gp];
3444 assert(!immediate_possible || cls == &ia32_reg_classes[CLASS_ia32_gp]);
3445 assert(cls != NULL);
3447 if(immediate_possible) {
3448 assert(constraint->is_in
3449 && "immediate make no sense for output constraints");
3451 /* todo: check types (no float input on 'r' constrained in and such... */
3454 req = obstack_alloc(obst, sizeof(req[0]) + sizeof(unsigned));
3455 limited_ptr = (unsigned*) (req+1);
3457 req = obstack_alloc(obst, sizeof(req[0]));
3459 memset(req, 0, sizeof(req[0]));
3462 req->type = arch_register_req_type_limited;
3463 *limited_ptr = limited;
3464 req->limited = limited_ptr;
3466 req->type = arch_register_req_type_normal;
3470 constraint->req = req;
3471 constraint->immediate_possible = immediate_possible;
3472 constraint->immediate_type = immediate_type;
3475 static void parse_clobber(ir_node *node, int pos, constraint_t *constraint,
3476 const char *clobber)
3478 ir_graph *irg = get_irn_irg(node);
3479 struct obstack *obst = get_irg_obstack(irg);
3480 const arch_register_t *reg = NULL;
3483 arch_register_req_t *req;
3484 const arch_register_class_t *cls;
3489 /* TODO: construct a hashmap instead of doing linear search for clobber
3491 for(c = 0; c < N_CLASSES; ++c) {
3492 cls = & ia32_reg_classes[c];
3493 for(r = 0; r < cls->n_regs; ++r) {
3494 const arch_register_t *temp_reg = arch_register_for_index(cls, r);
3495 if(strcmp(temp_reg->name, clobber) == 0
3496 || (c == CLASS_ia32_gp && strcmp(temp_reg->name+1, clobber) == 0)) {
3505 panic("Register '%s' mentioned in asm clobber is unknown\n", clobber);
3509 assert(reg->index < 32);
3511 limited = obstack_alloc(obst, sizeof(limited[0]));
3512 *limited = 1 << reg->index;
3514 req = obstack_alloc(obst, sizeof(req[0]));
3515 memset(req, 0, sizeof(req[0]));
3516 req->type = arch_register_req_type_limited;
3518 req->limited = limited;
3520 constraint->req = req;
3521 constraint->immediate_possible = 0;
3522 constraint->immediate_type = 0;
3525 static int is_memory_op(const ir_asm_constraint *constraint)
3527 ident *id = constraint->constraint;
3528 const char *str = get_id_str(id);
3531 for(c = str; *c != '\0'; ++c) {
3540 * generates code for a ASM node
3542 static ir_node *gen_ASM(ir_node *node)
3545 ir_graph *irg = current_ir_graph;
3546 ir_node *block = get_nodes_block(node);
3547 ir_node *new_block = be_transform_node(block);
3548 dbg_info *dbgi = get_irn_dbg_info(node);
3552 int n_out_constraints;
3554 const arch_register_req_t **out_reg_reqs;
3555 const arch_register_req_t **in_reg_reqs;
3556 ia32_asm_reg_t *register_map;
3557 unsigned reg_map_size = 0;
3558 struct obstack *obst;
3559 const ir_asm_constraint *in_constraints;
3560 const ir_asm_constraint *out_constraints;
3562 constraint_t parsed_constraint;
3564 arity = get_irn_arity(node);
3565 in = alloca(arity * sizeof(in[0]));
3566 memset(in, 0, arity * sizeof(in[0]));
3568 n_out_constraints = get_ASM_n_output_constraints(node);
3569 n_clobbers = get_ASM_n_clobbers(node);
3570 out_arity = n_out_constraints + n_clobbers;
3571 /* hack to keep space for mem proj */
3575 in_constraints = get_ASM_input_constraints(node);
3576 out_constraints = get_ASM_output_constraints(node);
3577 clobbers = get_ASM_clobbers(node);
3579 /* construct output constraints */
3580 obst = get_irg_obstack(irg);
3581 out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
3582 parsed_constraint.out_reqs = out_reg_reqs;
3583 parsed_constraint.n_outs = n_out_constraints;
3584 parsed_constraint.is_in = 0;
3586 for(i = 0; i < out_arity; ++i) {
3589 if(i < n_out_constraints) {
3590 const ir_asm_constraint *constraint = &out_constraints[i];
3591 c = get_id_str(constraint->constraint);
3592 parse_asm_constraint(i, &parsed_constraint, c);
3594 if(constraint->pos > reg_map_size)
3595 reg_map_size = constraint->pos;
3597 out_reg_reqs[i] = parsed_constraint.req;
3598 } else if(i < out_arity - 1) {
3599 ident *glob_id = clobbers [i - n_out_constraints];
3600 assert(glob_id != NULL);
3601 c = get_id_str(glob_id);
3602 parse_clobber(node, i, &parsed_constraint, c);
3604 out_reg_reqs[i+1] = parsed_constraint.req;
3608 out_reg_reqs[n_out_constraints] = &no_register_req;
3610 /* construct input constraints */
3611 in_reg_reqs = obstack_alloc(obst, arity * sizeof(in_reg_reqs[0]));
3612 parsed_constraint.is_in = 1;
3613 for(i = 0; i < arity; ++i) {
3614 const ir_asm_constraint *constraint = &in_constraints[i];
3615 ident *constr_id = constraint->constraint;
3616 const char *c = get_id_str(constr_id);
3618 parse_asm_constraint(i, &parsed_constraint, c);
3619 in_reg_reqs[i] = parsed_constraint.req;
3621 if(constraint->pos > reg_map_size)
3622 reg_map_size = constraint->pos;
3624 if(parsed_constraint.immediate_possible) {
3625 ir_node *pred = get_irn_n(node, i);
3626 char imm_type = parsed_constraint.immediate_type;
3627 ir_node *immediate = try_create_Immediate(pred, imm_type);
3629 if(immediate != NULL) {
3636 register_map = NEW_ARR_D(ia32_asm_reg_t, obst, reg_map_size);
3637 memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
3639 for(i = 0; i < n_out_constraints; ++i) {
3640 const ir_asm_constraint *constraint = &out_constraints[i];
3641 unsigned pos = constraint->pos;
3643 assert(pos < reg_map_size);
3644 register_map[pos].use_input = 0;
3645 register_map[pos].valid = 1;
3646 register_map[pos].memory = is_memory_op(constraint);
3647 register_map[pos].inout_pos = i;
3648 register_map[pos].mode = constraint->mode;
3651 /* transform inputs */
3652 for(i = 0; i < arity; ++i) {
3653 const ir_asm_constraint *constraint = &in_constraints[i];
3654 unsigned pos = constraint->pos;
3655 ir_node *pred = get_irn_n(node, i);
3656 ir_node *transformed;
3658 assert(pos < reg_map_size);
3659 register_map[pos].use_input = 1;
3660 register_map[pos].valid = 1;
3661 register_map[pos].memory = is_memory_op(constraint);
3662 register_map[pos].inout_pos = i;
3663 register_map[pos].mode = constraint->mode;
3668 transformed = be_transform_node(pred);
3669 in[i] = transformed;
3672 new_node = new_rd_ia32_Asm(dbgi, irg, new_block, arity, in, out_arity,
3673 get_ASM_text(node), register_map);
3675 set_ia32_out_req_all(new_node, out_reg_reqs);
3676 set_ia32_in_req_all(new_node, in_reg_reqs);
3678 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3684 * Transforms a FrameAddr into an ia32 Add.
3686 static ir_node *gen_be_FrameAddr(ir_node *node) {
3687 ir_node *block = be_transform_node(get_nodes_block(node));
3688 ir_node *op = be_get_FrameAddr_frame(node);
3689 ir_node *new_op = be_transform_node(op);
3690 ir_graph *irg = current_ir_graph;
3691 dbg_info *dbgi = get_irn_dbg_info(node);
3692 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3695 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3696 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3697 set_ia32_use_frame(new_node);
3699 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3705 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3707 static ir_node *gen_be_Return(ir_node *node) {
3708 ir_graph *irg = current_ir_graph;
3709 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3710 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3711 ir_entity *ent = get_irg_entity(irg);
3712 ir_type *tp = get_entity_type(ent);
3717 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3718 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3721 int pn_ret_val, pn_ret_mem, arity, i;
3723 assert(ret_val != NULL);
3724 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3725 return be_duplicate_node(node);
3728 res_type = get_method_res_type(tp, 0);
3730 if (! is_Primitive_type(res_type)) {
3731 return be_duplicate_node(node);
3734 mode = get_type_mode(res_type);
3735 if (! mode_is_float(mode)) {
3736 return be_duplicate_node(node);
3739 assert(get_method_n_ress(tp) == 1);
3741 pn_ret_val = get_Proj_proj(ret_val);
3742 pn_ret_mem = get_Proj_proj(ret_mem);
3744 /* get the Barrier */
3745 barrier = get_Proj_pred(ret_val);
3747 /* get result input of the Barrier */
3748 ret_val = get_irn_n(barrier, pn_ret_val);
3749 new_ret_val = be_transform_node(ret_val);
3751 /* get memory input of the Barrier */
3752 ret_mem = get_irn_n(barrier, pn_ret_mem);
3753 new_ret_mem = be_transform_node(ret_mem);
3755 frame = get_irg_frame(irg);
3757 dbgi = get_irn_dbg_info(barrier);
3758 block = be_transform_node(get_nodes_block(barrier));
3760 noreg = ia32_new_NoReg_gp(env_cg);
3762 /* store xmm0 onto stack */
3763 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3764 new_ret_mem, new_ret_val);
3765 set_ia32_ls_mode(sse_store, mode);
3766 set_ia32_op_type(sse_store, ia32_AddrModeD);
3767 set_ia32_use_frame(sse_store);
3769 /* load into x87 register */
3770 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3771 set_ia32_op_type(fld, ia32_AddrModeS);
3772 set_ia32_use_frame(fld);
3774 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3775 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3777 /* create a new barrier */
3778 arity = get_irn_arity(barrier);
3779 in = alloca(arity * sizeof(in[0]));
3780 for (i = 0; i < arity; ++i) {
3783 if (i == pn_ret_val) {
3785 } else if (i == pn_ret_mem) {
3788 ir_node *in = get_irn_n(barrier, i);
3789 new_in = be_transform_node(in);
3794 new_barrier = new_ir_node(dbgi, irg, block,
3795 get_irn_op(barrier), get_irn_mode(barrier),
3797 copy_node_attr(barrier, new_barrier);
3798 be_duplicate_deps(barrier, new_barrier);
3799 be_set_transformed_node(barrier, new_barrier);
3800 mark_irn_visited(barrier);
3802 /* transform normally */
3803 return be_duplicate_node(node);
3807 * Transform a be_AddSP into an ia32_SubSP.
3809 static ir_node *gen_be_AddSP(ir_node *node)
3811 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3812 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3814 return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
3818 * Transform a be_SubSP into an ia32_AddSP
3820 static ir_node *gen_be_SubSP(ir_node *node)
3822 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3823 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3825 return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
3829 * This function just sets the register for the Unknown node
3830 * as this is not done during register allocation because Unknown
3831 * is an "ignore" node.
3833 static ir_node *gen_Unknown(ir_node *node) {
3834 ir_mode *mode = get_irn_mode(node);
3836 if (mode_is_float(mode)) {
3837 if (ia32_cg_config.use_sse2) {
3838 return ia32_new_Unknown_xmm(env_cg);
3840 /* Unknown nodes are buggy in x87 sim, use zero for now... */
3841 ir_graph *irg = current_ir_graph;
3842 dbg_info *dbgi = get_irn_dbg_info(node);
3843 ir_node *block = get_irg_start_block(irg);
3844 return new_rd_ia32_vfldz(dbgi, irg, block);
3846 } else if (mode_needs_gp_reg(mode)) {
3847 return ia32_new_Unknown_gp(env_cg);
3849 panic("unsupported Unknown-Mode");
3855 * Change some phi modes
3857 static ir_node *gen_Phi(ir_node *node) {
3858 ir_node *block = be_transform_node(get_nodes_block(node));
3859 ir_graph *irg = current_ir_graph;
3860 dbg_info *dbgi = get_irn_dbg_info(node);
3861 ir_mode *mode = get_irn_mode(node);
3864 if(mode_needs_gp_reg(mode)) {
3865 /* we shouldn't have any 64bit stuff around anymore */
3866 assert(get_mode_size_bits(mode) <= 32);
3867 /* all integer operations are on 32bit registers now */
3869 } else if(mode_is_float(mode)) {
3870 if (ia32_cg_config.use_sse2) {
3877 /* phi nodes allow loops, so we use the old arguments for now
3878 * and fix this later */
3879 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3880 get_irn_in(node) + 1);
3881 copy_node_attr(node, phi);
3882 be_duplicate_deps(node, phi);
3884 be_set_transformed_node(node, phi);
3885 be_enqueue_preds(node);
3893 static ir_node *gen_IJmp(ir_node *node)
3895 ir_node *block = get_nodes_block(node);
3896 ir_node *new_block = be_transform_node(block);
3897 ir_graph *irg = current_ir_graph;
3898 dbg_info *dbgi = get_irn_dbg_info(node);
3899 ir_node *op = get_IJmp_target(node);
3901 ia32_address_mode_t am;
3902 ia32_address_t *addr = &am.addr;
3904 assert(get_irn_mode(op) == mode_P);
3906 match_arguments(&am, block, NULL, op, NULL,
3907 match_am | match_8bit_am | match_16bit_am |
3908 match_immediate | match_8bit | match_16bit);
3910 new_node = new_rd_ia32_IJmp(dbgi, irg, new_block, addr->base, addr->index,
3911 addr->mem, am.new_op2);
3912 set_am_attributes(new_node, &am);
3913 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3915 new_node = fix_mem_proj(new_node, &am);
3920 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
3923 typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
3924 ir_node *val, ir_node *mem);
3927 * Transforms a lowered Load into a "real" one.
3929 static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
3931 ir_node *block = be_transform_node(get_nodes_block(node));
3932 ir_node *ptr = get_irn_n(node, 0);
3933 ir_node *new_ptr = be_transform_node(ptr);
3934 ir_node *mem = get_irn_n(node, 1);
3935 ir_node *new_mem = be_transform_node(mem);
3936 ir_graph *irg = current_ir_graph;
3937 dbg_info *dbgi = get_irn_dbg_info(node);
3938 ir_mode *mode = get_ia32_ls_mode(node);
3939 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3942 new_op = func(dbgi, irg, block, new_ptr, noreg, new_mem);
3944 set_ia32_op_type(new_op, ia32_AddrModeS);
3945 set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
3946 set_ia32_am_scale(new_op, get_ia32_am_scale(node));
3947 set_ia32_am_sc(new_op, get_ia32_am_sc(node));
3948 if (is_ia32_am_sc_sign(node))
3949 set_ia32_am_sc_sign(new_op);
3950 set_ia32_ls_mode(new_op, mode);
3951 if (is_ia32_use_frame(node)) {
3952 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
3953 set_ia32_use_frame(new_op);
3956 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3962 * Transforms a lowered Store into a "real" one.
3964 static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
3966 ir_node *block = be_transform_node(get_nodes_block(node));
3967 ir_node *ptr = get_irn_n(node, 0);
3968 ir_node *new_ptr = be_transform_node(ptr);
3969 ir_node *val = get_irn_n(node, 1);
3970 ir_node *new_val = be_transform_node(val);
3971 ir_node *mem = get_irn_n(node, 2);
3972 ir_node *new_mem = be_transform_node(mem);
3973 ir_graph *irg = current_ir_graph;
3974 dbg_info *dbgi = get_irn_dbg_info(node);
3975 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3976 ir_mode *mode = get_ia32_ls_mode(node);
3980 new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
3982 am_offs = get_ia32_am_offs_int(node);
3983 add_ia32_am_offs_int(new_op, am_offs);
3985 set_ia32_op_type(new_op, ia32_AddrModeD);
3986 set_ia32_ls_mode(new_op, mode);
3987 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
3988 set_ia32_use_frame(new_op);
3990 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3995 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3997 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3998 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4000 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
4001 match_immediate | match_mode_neutral);
4004 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4006 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4007 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4008 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
4012 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4014 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4015 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4016 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
4020 static ir_node *gen_ia32_l_Add(ir_node *node) {
4021 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4022 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4023 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
4024 match_commutative | match_am | match_immediate |
4025 match_mode_neutral);
4027 if(is_Proj(lowered)) {
4028 lowered = get_Proj_pred(lowered);
4030 assert(is_ia32_Add(lowered));
4031 set_irn_mode(lowered, mode_T);
4037 static ir_node *gen_ia32_l_Adc(ir_node *node)
4039 return gen_binop_flags(node, new_rd_ia32_Adc,
4040 match_commutative | match_am | match_immediate |
4041 match_mode_neutral);
4045 * Transforms an ia32_l_vfild into a "real" ia32_vfild node
4047 * @param node The node to transform
4048 * @return the created ia32 vfild node
4050 static ir_node *gen_ia32_l_vfild(ir_node *node) {
4051 return gen_lowered_Load(node, new_rd_ia32_vfild);
4055 * Transforms an ia32_l_Load into a "real" ia32_Load node
4057 * @param node The node to transform
4058 * @return the created ia32 Load node
4060 static ir_node *gen_ia32_l_Load(ir_node *node) {
4061 return gen_lowered_Load(node, new_rd_ia32_Load);
4065 * Transforms an ia32_l_Store into a "real" ia32_Store node
4067 * @param node The node to transform
4068 * @return the created ia32 Store node
4070 static ir_node *gen_ia32_l_Store(ir_node *node) {
4071 return gen_lowered_Store(node, new_rd_ia32_Store);
4075 * Transforms a l_vfist into a "real" vfist node.
4077 * @param node The node to transform
4078 * @return the created ia32 vfist node
4080 static ir_node *gen_ia32_l_vfist(ir_node *node) {
4081 ir_node *block = be_transform_node(get_nodes_block(node));
4082 ir_node *ptr = get_irn_n(node, 0);
4083 ir_node *new_ptr = be_transform_node(ptr);
4084 ir_node *val = get_irn_n(node, 1);
4085 ir_node *new_val = be_transform_node(val);
4086 ir_node *mem = get_irn_n(node, 2);
4087 ir_node *new_mem = be_transform_node(mem);
4088 ir_graph *irg = current_ir_graph;
4089 dbg_info *dbgi = get_irn_dbg_info(node);
4090 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4091 ir_mode *mode = get_ia32_ls_mode(node);
4092 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4096 new_op = new_rd_ia32_vfist(dbgi, irg, block, new_ptr, noreg, new_mem,
4097 new_val, trunc_mode);
4099 am_offs = get_ia32_am_offs_int(node);
4100 add_ia32_am_offs_int(new_op, am_offs);
4102 set_ia32_op_type(new_op, ia32_AddrModeD);
4103 set_ia32_ls_mode(new_op, mode);
4104 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4105 set_ia32_use_frame(new_op);
4107 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4113 * Transforms a l_MulS into a "real" MulS node.
4115 * @return the created ia32 Mul node
4117 static ir_node *gen_ia32_l_Mul(ir_node *node) {
4118 ir_node *left = get_binop_left(node);
4119 ir_node *right = get_binop_right(node);
4121 return gen_binop(node, left, right, new_rd_ia32_Mul,
4122 match_commutative | match_am | match_mode_neutral);
4126 * Transforms a l_IMulS into a "real" IMul1OPS node.
4128 * @return the created ia32 IMul1OP node
4130 static ir_node *gen_ia32_l_IMul(ir_node *node) {
4131 ir_node *left = get_binop_left(node);
4132 ir_node *right = get_binop_right(node);
4134 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
4135 match_commutative | match_am | match_mode_neutral);
4138 static ir_node *gen_ia32_l_Sub(ir_node *node) {
4139 ir_node *left = get_irn_n(node, n_ia32_l_Sub_left);
4140 ir_node *right = get_irn_n(node, n_ia32_l_Sub_right);
4141 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
4142 match_am | match_immediate | match_mode_neutral);
4144 if(is_Proj(lowered)) {
4145 lowered = get_Proj_pred(lowered);
4147 assert(is_ia32_Sub(lowered));
4148 set_irn_mode(lowered, mode_T);
4154 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
4155 return gen_binop_flags(node, new_rd_ia32_Sbb,
4156 match_am | match_immediate | match_mode_neutral);
4160 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4161 * op1 - target to be shifted
4162 * op2 - contains bits to be shifted into target
4164 * Only op3 can be an immediate.
4166 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4167 ir_node *low, ir_node *count)
4169 ir_node *block = get_nodes_block(node);
4170 ir_node *new_block = be_transform_node(block);
4171 ir_graph *irg = current_ir_graph;
4172 dbg_info *dbgi = get_irn_dbg_info(node);
4173 ir_node *new_high = be_transform_node(high);
4174 ir_node *new_low = be_transform_node(low);
4178 /* the shift amount can be any mode that is bigger than 5 bits, since all
4179 * other bits are ignored anyway */
4180 while (is_Conv(count) && get_irn_n_edges(count) == 1) {
4181 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4182 count = get_Conv_op(count);
4184 new_count = create_immediate_or_transform(count, 0);
4186 if (is_ia32_l_ShlD(node)) {
4187 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
4190 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
4193 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4198 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4200 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4201 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4202 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4203 return gen_lowered_64bit_shifts(node, high, low, count);
4206 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4208 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4209 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4210 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4211 return gen_lowered_64bit_shifts(node, high, low, count);
4214 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
4215 ir_node *src_block = get_nodes_block(node);
4216 ir_node *block = be_transform_node(src_block);
4217 ir_graph *irg = current_ir_graph;
4218 dbg_info *dbgi = get_irn_dbg_info(node);
4219 ir_node *frame = get_irg_frame(irg);
4220 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4221 ir_node *nomem = new_NoMem();
4222 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4223 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4224 ir_node *new_val_low = be_transform_node(val_low);
4225 ir_node *new_val_high = be_transform_node(val_high);
4230 ir_node *store_high;
4232 if(!mode_is_signed(get_irn_mode(val_high))) {
4233 panic("unsigned long long -> float not supported yet (%+F)", node);
4237 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4239 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4241 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
4242 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
4244 set_ia32_use_frame(store_low);
4245 set_ia32_use_frame(store_high);
4246 set_ia32_op_type(store_low, ia32_AddrModeD);
4247 set_ia32_op_type(store_high, ia32_AddrModeD);
4248 set_ia32_ls_mode(store_low, mode_Iu);
4249 set_ia32_ls_mode(store_high, mode_Is);
4250 add_ia32_am_offs_int(store_high, 4);
4254 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4257 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
4259 set_ia32_use_frame(fild);
4260 set_ia32_op_type(fild, ia32_AddrModeS);
4261 set_ia32_ls_mode(fild, mode_Ls);
4263 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
4265 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4268 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
4269 ir_node *src_block = get_nodes_block(node);
4270 ir_node *block = be_transform_node(src_block);
4271 ir_graph *irg = current_ir_graph;
4272 dbg_info *dbgi = get_irn_dbg_info(node);
4273 ir_node *frame = get_irg_frame(irg);
4274 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4275 ir_node *nomem = new_NoMem();
4276 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4277 ir_node *new_val = be_transform_node(val);
4278 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4283 fist = new_rd_ia32_vfist(dbgi, irg, block, frame, noreg, nomem, new_val,
4285 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
4286 set_ia32_use_frame(fist);
4287 set_ia32_op_type(fist, ia32_AddrModeD);
4288 set_ia32_ls_mode(fist, mode_Ls);
4294 * the BAD transformer.
4296 static ir_node *bad_transform(ir_node *node) {
4297 panic("No transform function for %+F available.\n", node);
4301 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
4302 ir_graph *irg = current_ir_graph;
4303 ir_node *block = be_transform_node(get_nodes_block(node));
4304 ir_node *pred = get_Proj_pred(node);
4305 ir_node *new_pred = be_transform_node(pred);
4306 ir_node *frame = get_irg_frame(irg);
4307 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4308 dbg_info *dbgi = get_irn_dbg_info(node);
4309 long pn = get_Proj_proj(node);
4314 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
4315 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
4316 set_ia32_use_frame(load);
4317 set_ia32_op_type(load, ia32_AddrModeS);
4318 set_ia32_ls_mode(load, mode_Iu);
4319 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4320 * 32 bit from it with this particular load */
4321 attr = get_ia32_attr(load);
4322 attr->data.need_64bit_stackent = 1;
4324 if (pn == pn_ia32_l_FloattoLL_res_high) {
4325 add_ia32_am_offs_int(load, 4);
4327 assert(pn == pn_ia32_l_FloattoLL_res_low);
4330 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4336 * Transform the Projs of an AddSP.
4338 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
4339 ir_node *block = be_transform_node(get_nodes_block(node));
4340 ir_node *pred = get_Proj_pred(node);
4341 ir_node *new_pred = be_transform_node(pred);
4342 ir_graph *irg = current_ir_graph;
4343 dbg_info *dbgi = get_irn_dbg_info(node);
4344 long proj = get_Proj_proj(node);
4346 if (proj == pn_be_AddSP_sp) {
4347 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4348 pn_ia32_SubSP_stack);
4349 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4351 } else if(proj == pn_be_AddSP_res) {
4352 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4353 pn_ia32_SubSP_addr);
4354 } else if (proj == pn_be_AddSP_M) {
4355 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4359 return new_rd_Unknown(irg, get_irn_mode(node));
4363 * Transform the Projs of a SubSP.
4365 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
4366 ir_node *block = be_transform_node(get_nodes_block(node));
4367 ir_node *pred = get_Proj_pred(node);
4368 ir_node *new_pred = be_transform_node(pred);
4369 ir_graph *irg = current_ir_graph;
4370 dbg_info *dbgi = get_irn_dbg_info(node);
4371 long proj = get_Proj_proj(node);
4373 if (proj == pn_be_SubSP_sp) {
4374 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4375 pn_ia32_AddSP_stack);
4376 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4378 } else if (proj == pn_be_SubSP_M) {
4379 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4383 return new_rd_Unknown(irg, get_irn_mode(node));
4387 * Transform and renumber the Projs from a Load.
4389 static ir_node *gen_Proj_Load(ir_node *node) {
4391 ir_node *block = be_transform_node(get_nodes_block(node));
4392 ir_node *pred = get_Proj_pred(node);
4393 ir_graph *irg = current_ir_graph;
4394 dbg_info *dbgi = get_irn_dbg_info(node);
4395 long proj = get_Proj_proj(node);
4398 /* loads might be part of source address mode matches, so we don't
4399 transform the ProjMs yet (with the exception of loads whose result is
4402 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4405 assert(pn_ia32_Load_M == 1); /* convention: mem-result of Source-AM
4407 /* this is needed, because sometimes we have loops that are only
4408 reachable through the ProjM */
4409 be_enqueue_preds(node);
4410 /* do it in 2 steps, to silence firm verifier */
4411 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4412 set_Proj_proj(res, pn_ia32_Load_M);
4416 /* renumber the proj */
4417 new_pred = be_transform_node(pred);
4418 if (is_ia32_Load(new_pred)) {
4421 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4423 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4424 case pn_Load_X_regular:
4425 return new_rd_Jmp(dbgi, irg, block);
4426 case pn_Load_X_except:
4427 /* This Load might raise an exception. Mark it. */
4428 set_ia32_exc_label(new_pred, 1);
4429 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4433 } else if (is_ia32_Conv_I2I(new_pred) ||
4434 is_ia32_Conv_I2I8Bit(new_pred)) {
4435 set_irn_mode(new_pred, mode_T);
4436 if (proj == pn_Load_res) {
4437 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4438 } else if (proj == pn_Load_M) {
4439 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4441 } else if (is_ia32_xLoad(new_pred)) {
4444 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4446 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4447 case pn_Load_X_regular:
4448 return new_rd_Jmp(dbgi, irg, block);
4449 case pn_Load_X_except:
4450 /* This Load might raise an exception. Mark it. */
4451 set_ia32_exc_label(new_pred, 1);
4452 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4456 } else if (is_ia32_vfld(new_pred)) {
4459 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4461 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4462 case pn_Load_X_regular:
4463 return new_rd_Jmp(dbgi, irg, block);
4464 case pn_Load_X_except:
4465 /* This Load might raise an exception. Mark it. */
4466 set_ia32_exc_label(new_pred, 1);
4467 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4472 /* can happen for ProJMs when source address mode happened for the
4475 /* however it should not be the result proj, as that would mean the
4476 load had multiple users and should not have been used for
4478 if (proj != pn_Load_M) {
4479 panic("internal error: transformed node not a Load");
4481 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4485 return new_rd_Unknown(irg, get_irn_mode(node));
4489 * Transform and renumber the Projs from a DivMod like instruction.
4491 static ir_node *gen_Proj_DivMod(ir_node *node) {
4492 ir_node *block = be_transform_node(get_nodes_block(node));
4493 ir_node *pred = get_Proj_pred(node);
4494 ir_node *new_pred = be_transform_node(pred);
4495 ir_graph *irg = current_ir_graph;
4496 dbg_info *dbgi = get_irn_dbg_info(node);
4497 ir_mode *mode = get_irn_mode(node);
4498 long proj = get_Proj_proj(node);
4500 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4502 switch (get_irn_opcode(pred)) {
4506 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4508 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4509 case pn_Div_X_regular:
4510 return new_rd_Jmp(dbgi, irg, block);
4511 case pn_Div_X_except:
4512 set_ia32_exc_label(new_pred, 1);
4513 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4521 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4523 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4524 case pn_Mod_X_except:
4525 set_ia32_exc_label(new_pred, 1);
4526 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4534 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4535 case pn_DivMod_res_div:
4536 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4537 case pn_DivMod_res_mod:
4538 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4539 case pn_DivMod_X_regular:
4540 return new_rd_Jmp(dbgi, irg, block);
4541 case pn_DivMod_X_except:
4542 set_ia32_exc_label(new_pred, 1);
4543 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4553 return new_rd_Unknown(irg, mode);
4557 * Transform and renumber the Projs from a CopyB.
4559 static ir_node *gen_Proj_CopyB(ir_node *node) {
4560 ir_node *block = be_transform_node(get_nodes_block(node));
4561 ir_node *pred = get_Proj_pred(node);
4562 ir_node *new_pred = be_transform_node(pred);
4563 ir_graph *irg = current_ir_graph;
4564 dbg_info *dbgi = get_irn_dbg_info(node);
4565 ir_mode *mode = get_irn_mode(node);
4566 long proj = get_Proj_proj(node);
4569 case pn_CopyB_M_regular:
4570 if (is_ia32_CopyB_i(new_pred)) {
4571 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4572 } else if (is_ia32_CopyB(new_pred)) {
4573 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4581 return new_rd_Unknown(irg, mode);
4585 * Transform and renumber the Projs from a Quot.
4587 static ir_node *gen_Proj_Quot(ir_node *node) {
4588 ir_node *block = be_transform_node(get_nodes_block(node));
4589 ir_node *pred = get_Proj_pred(node);
4590 ir_node *new_pred = be_transform_node(pred);
4591 ir_graph *irg = current_ir_graph;
4592 dbg_info *dbgi = get_irn_dbg_info(node);
4593 ir_mode *mode = get_irn_mode(node);
4594 long proj = get_Proj_proj(node);
4598 if (is_ia32_xDiv(new_pred)) {
4599 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4600 } else if (is_ia32_vfdiv(new_pred)) {
4601 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4605 if (is_ia32_xDiv(new_pred)) {
4606 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4607 } else if (is_ia32_vfdiv(new_pred)) {
4608 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4616 return new_rd_Unknown(irg, mode);
4620 * Transform the Thread Local Storage Proj.
4622 static ir_node *gen_Proj_tls(ir_node *node) {
4623 ir_node *block = be_transform_node(get_nodes_block(node));
4624 ir_graph *irg = current_ir_graph;
4625 dbg_info *dbgi = NULL;
4626 ir_node *res = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
4631 static ir_node *gen_be_Call(ir_node *node) {
4632 ir_node *res = be_duplicate_node(node);
4633 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4638 static ir_node *gen_be_IncSP(ir_node *node) {
4639 ir_node *res = be_duplicate_node(node);
4640 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4646 * Transform the Projs from a be_Call.
4648 static ir_node *gen_Proj_be_Call(ir_node *node) {
4649 ir_node *block = be_transform_node(get_nodes_block(node));
4650 ir_node *call = get_Proj_pred(node);
4651 ir_node *new_call = be_transform_node(call);
4652 ir_graph *irg = current_ir_graph;
4653 dbg_info *dbgi = get_irn_dbg_info(node);
4654 ir_type *method_type = be_Call_get_type(call);
4655 int n_res = get_method_n_ress(method_type);
4656 long proj = get_Proj_proj(node);
4657 ir_mode *mode = get_irn_mode(node);
4659 const arch_register_class_t *cls;
4661 /* The following is kinda tricky: If we're using SSE, then we have to
4662 * move the result value of the call in floating point registers to an
4663 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4664 * after the call, we have to make sure to correctly make the
4665 * MemProj and the result Proj use these 2 nodes
4667 if (proj == pn_be_Call_M_regular) {
4668 // get new node for result, are we doing the sse load/store hack?
4669 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4670 ir_node *call_res_new;
4671 ir_node *call_res_pred = NULL;
4673 if (call_res != NULL) {
4674 call_res_new = be_transform_node(call_res);
4675 call_res_pred = get_Proj_pred(call_res_new);
4678 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4679 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4680 pn_be_Call_M_regular);
4682 assert(is_ia32_xLoad(call_res_pred));
4683 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4687 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4688 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4690 ir_node *frame = get_irg_frame(irg);
4691 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4693 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4696 /* in case there is no memory output: create one to serialize the copy
4698 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4699 pn_be_Call_M_regular);
4700 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4701 pn_be_Call_first_res);
4703 /* store st(0) onto stack */
4704 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4706 set_ia32_op_type(fstp, ia32_AddrModeD);
4707 set_ia32_use_frame(fstp);
4709 /* load into SSE register */
4710 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4712 set_ia32_op_type(sse_load, ia32_AddrModeS);
4713 set_ia32_use_frame(sse_load);
4715 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4721 /* transform call modes */
4722 if (mode_is_data(mode)) {
4723 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4727 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4731 * Transform the Projs from a Cmp.
4733 static ir_node *gen_Proj_Cmp(ir_node *node)
4735 /* this probably means not all mode_b nodes were lowered... */
4736 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4741 * Transform and potentially renumber Proj nodes.
4743 static ir_node *gen_Proj(ir_node *node) {
4744 ir_graph *irg = current_ir_graph;
4745 dbg_info *dbgi = get_irn_dbg_info(node);
4746 ir_node *pred = get_Proj_pred(node);
4747 long proj = get_Proj_proj(node);
4749 if (is_Store(pred)) {
4750 if (proj == pn_Store_M) {
4751 return be_transform_node(pred);
4754 return new_r_Bad(irg);
4756 } else if (is_Load(pred)) {
4757 return gen_Proj_Load(node);
4758 } else if (is_Div(pred) || is_Mod(pred) || is_DivMod(pred)) {
4759 return gen_Proj_DivMod(node);
4760 } else if (is_CopyB(pred)) {
4761 return gen_Proj_CopyB(node);
4762 } else if (is_Quot(pred)) {
4763 return gen_Proj_Quot(node);
4764 } else if (be_is_SubSP(pred)) {
4765 return gen_Proj_be_SubSP(node);
4766 } else if (be_is_AddSP(pred)) {
4767 return gen_Proj_be_AddSP(node);
4768 } else if (be_is_Call(pred)) {
4769 return gen_Proj_be_Call(node);
4770 } else if (is_Cmp(pred)) {
4771 return gen_Proj_Cmp(node);
4772 } else if (get_irn_op(pred) == op_Start) {
4773 if (proj == pn_Start_X_initial_exec) {
4774 ir_node *block = get_nodes_block(pred);
4777 /* we exchange the ProjX with a jump */
4778 block = be_transform_node(block);
4779 jump = new_rd_Jmp(dbgi, irg, block);
4782 if (node == be_get_old_anchor(anchor_tls)) {
4783 return gen_Proj_tls(node);
4785 } else if (is_ia32_l_FloattoLL(pred)) {
4786 return gen_Proj_l_FloattoLL(node);
4788 } else if(!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4792 ir_node *new_pred = be_transform_node(pred);
4793 ir_node *block = be_transform_node(get_nodes_block(node));
4794 ir_mode *mode = get_irn_mode(node);
4795 if (mode_needs_gp_reg(mode)) {
4796 ir_node *new_proj = new_r_Proj(irg, block, new_pred, mode_Iu,
4797 get_Proj_proj(node));
4798 #ifdef DEBUG_libfirm
4799 new_proj->node_nr = node->node_nr;
4805 return be_duplicate_node(node);
4809 * Enters all transform functions into the generic pointer
4811 static void register_transformers(void)
4815 /* first clear the generic function pointer for all ops */
4816 clear_irp_opcodes_generic_func();
4818 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4819 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4857 /* transform ops from intrinsic lowering */
4873 GEN(ia32_l_LLtoFloat);
4874 GEN(ia32_l_FloattoLL);
4880 /* we should never see these nodes */
4895 /* handle generic backend nodes */
4904 op_Mulh = get_op_Mulh();
4913 * Pre-transform all unknown and noreg nodes.
4915 static void ia32_pretransform_node(void *arch_cg) {
4916 ia32_code_gen_t *cg = arch_cg;
4918 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4919 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4920 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4921 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4922 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4923 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4928 * Walker, checks if all ia32 nodes producing more than one result have
4929 * its Projs, other wise creates new projs and keep them using a be_Keep node.
4931 static void add_missing_keep_walker(ir_node *node, void *data)
4934 unsigned found_projs = 0;
4935 const ir_edge_t *edge;
4936 ir_mode *mode = get_irn_mode(node);
4941 if(!is_ia32_irn(node))
4944 n_outs = get_ia32_n_res(node);
4947 if(is_ia32_SwitchJmp(node))
4950 assert(n_outs < (int) sizeof(unsigned) * 8);
4951 foreach_out_edge(node, edge) {
4952 ir_node *proj = get_edge_src_irn(edge);
4953 int pn = get_Proj_proj(proj);
4955 assert(get_irn_mode(proj) == mode_M || pn < n_outs);
4956 found_projs |= 1 << pn;
4960 /* are keeps missing? */
4962 for(i = 0; i < n_outs; ++i) {
4965 const arch_register_req_t *req;
4966 const arch_register_class_t *class;
4968 if(found_projs & (1 << i)) {
4972 req = get_ia32_out_req(node, i);
4977 if(class == &ia32_reg_classes[CLASS_ia32_flags]) {
4981 block = get_nodes_block(node);
4982 in[0] = new_r_Proj(current_ir_graph, block, node,
4983 arch_register_class_mode(class), i);
4984 if(last_keep != NULL) {
4985 be_Keep_add_node(last_keep, class, in[0]);
4987 last_keep = be_new_Keep(class, current_ir_graph, block, 1, in);
4988 if(sched_is_scheduled(node)) {
4989 sched_add_after(node, last_keep);
4996 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
4999 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5001 ir_graph *irg = be_get_birg_irg(cg->birg);
5002 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5005 /* do the transformation */
5006 void ia32_transform_graph(ia32_code_gen_t *cg) {
5008 ir_graph *irg = cg->irg;
5010 register_transformers();
5012 initial_fpcw = NULL;
5014 BE_TIMER_PUSH(t_heights);
5015 heights = heights_new(irg);
5016 BE_TIMER_POP(t_heights);
5017 ia32_calculate_non_address_mode_nodes(cg->birg);
5019 /* the transform phase is not safe for CSE (yet) because several nodes get
5020 * attributes set after their creation */
5021 cse_last = get_opt_cse();
5024 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
5026 set_opt_cse(cse_last);
5028 ia32_free_non_address_mode_nodes();
5029 heights_free(heights);
5033 void ia32_init_transform(void)
5035 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");