2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
35 #include "irgraph_t.h"
40 #include "iredges_t.h"
52 #include "../benode_t.h"
53 #include "../besched.h"
55 #include "../beutil.h"
56 #include "../beirg_t.h"
57 #include "../betranshlp.h"
60 #include "bearch_ia32_t.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
79 #define TP_SFP_SIGN "ia32_sfp_sign"
80 #define TP_DFP_SIGN "ia32_dfp_sign"
81 #define TP_SFP_ABS "ia32_sfp_abs"
82 #define TP_DFP_ABS "ia32_dfp_abs"
83 #define TP_INT_MAX "ia32_int_max"
85 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
86 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
87 #define ENT_SFP_ABS "IA32_SFP_ABS"
88 #define ENT_DFP_ABS "IA32_DFP_ABS"
89 #define ENT_INT_MAX "IA32_INT_MAX"
91 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
92 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
94 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
96 /** hold the current code generator during transformation */
97 static ia32_code_gen_t *env_cg = NULL;
98 static ir_node *initial_fpcw = NULL;
99 static heights_t *heights = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *try_create_Immediate(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_immediate_or_transform(ir_node *node,
132 char immediate_constraint_type);
134 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
135 dbg_info *dbgi, ir_node *block,
136 ir_node *op, ir_node *orig_node);
139 * Return true if a mode can be stored in the GP register set
141 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
142 if(mode == mode_fpcw)
144 if(get_mode_size_bits(mode) > 32)
146 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
150 * creates a unique ident by adding a number to a tag
152 * @param tag the tag string, must contain a %d if a number
155 static ident *unique_id(const char *tag)
157 static unsigned id = 0;
160 snprintf(str, sizeof(str), tag, ++id);
161 return new_id_from_str(str);
165 * Get a primitive type for a mode.
167 static ir_type *get_prim_type(pmap *types, ir_mode *mode)
169 pmap_entry *e = pmap_find(types, mode);
174 snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
175 res = new_type_primitive(new_id_from_str(buf), mode);
176 set_type_alignment_bytes(res, 16);
177 pmap_insert(types, mode, res);
185 * Creates an immediate.
187 * @param symconst if set, create a SymConst immediate
188 * @param symconst_sign sign for the symconst
189 * @param val integer value for the immediate
191 static ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
193 ir_graph *irg = current_ir_graph;
194 ir_node *start_block = get_irg_start_block(irg);
195 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block,
196 symconst, symconst_sign, val);
197 arch_set_irn_register(env_cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
203 * Get an atomic entity that is initialized with a tarval
205 static ir_entity *create_float_const_entity(ir_node *cnst)
207 ia32_isa_t *isa = env_cg->isa;
208 tarval *tv = get_Const_tarval(cnst);
209 pmap_entry *e = pmap_find(isa->tv_ent, tv);
214 ir_mode *mode = get_irn_mode(cnst);
215 ir_type *tp = get_Const_type(cnst);
216 if (tp == firm_unknown_type)
217 tp = get_prim_type(isa->types, mode);
219 res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
221 set_entity_ld_ident(res, get_entity_ident(res));
222 set_entity_visibility(res, visibility_local);
223 set_entity_variability(res, variability_constant);
224 set_entity_allocation(res, allocation_static);
226 /* we create a new entity here: It's initialization must resist on the
228 rem = current_ir_graph;
229 current_ir_graph = get_const_code_irg();
230 set_atomic_ent_value(res, new_Const_type(tv, tp));
231 current_ir_graph = rem;
233 pmap_insert(isa->tv_ent, tv, res);
241 static int is_Const_0(ir_node *node) {
242 return is_Const(node) && is_Const_null(node);
245 static int is_Const_1(ir_node *node) {
246 return is_Const(node) && is_Const_one(node);
249 static int is_Const_Minus_1(ir_node *node) {
250 return is_Const(node) && is_Const_all_one(node);
254 * returns true if constant can be created with a simple float command
256 static int is_simple_x87_Const(ir_node *node)
258 tarval *tv = get_Const_tarval(node);
260 if (tarval_is_null(tv) || tarval_is_one(tv))
263 /* TODO: match all the other float constants */
268 * returns true if constant can be created with a simple float command
270 static int is_simple_sse_Const(ir_node *node)
272 tarval *tv = get_Const_tarval(node);
274 if (get_tarval_mode(tv) == mode_F)
277 if (tarval_is_null(tv) || tarval_is_one(tv))
280 /* TODO: match all the other float constants */
285 * Transforms a Const.
287 static ir_node *gen_Const(ir_node *node) {
288 ir_graph *irg = current_ir_graph;
289 ir_node *old_block = get_nodes_block(node);
290 ir_node *block = be_transform_node(old_block);
291 dbg_info *dbgi = get_irn_dbg_info(node);
292 ir_mode *mode = get_irn_mode(node);
294 assert(is_Const(node));
296 if (mode_is_float(mode)) {
298 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
299 ir_node *nomem = new_NoMem();
303 if (ia32_cg_config.use_sse2) {
304 tarval *tv = get_Const_tarval(node);
305 if (tarval_is_null(tv)) {
306 load = new_rd_ia32_xZero(dbgi, irg, block);
307 set_ia32_ls_mode(load, mode);
309 } else if (tarval_is_one(tv)) {
310 int cnst = mode == mode_F ? 26 : 55;
311 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
312 ir_node *imm2 = create_Immediate(NULL, 0, 2);
313 ir_node *pslld, *psrld;
315 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
316 set_ia32_ls_mode(load, mode);
317 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
318 set_ia32_ls_mode(pslld, mode);
319 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
320 set_ia32_ls_mode(psrld, mode);
322 } else if (mode == mode_F) {
323 /* we can place any 32bit constant by using a movd gp, sse */
324 unsigned val = get_tarval_sub_bits(tv, 0) |
325 (get_tarval_sub_bits(tv, 1) << 8) |
326 (get_tarval_sub_bits(tv, 2) << 16) |
327 (get_tarval_sub_bits(tv, 3) << 24);
328 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
329 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
330 set_ia32_ls_mode(load, mode);
333 floatent = create_float_const_entity(node);
335 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
337 set_ia32_op_type(load, ia32_AddrModeS);
338 set_ia32_am_sc(load, floatent);
339 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
340 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
343 if (is_Const_null(node)) {
344 load = new_rd_ia32_vfldz(dbgi, irg, block);
346 } else if (is_Const_one(node)) {
347 load = new_rd_ia32_vfld1(dbgi, irg, block);
350 floatent = create_float_const_entity(node);
352 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
353 set_ia32_op_type(load, ia32_AddrModeS);
354 set_ia32_am_sc(load, floatent);
355 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
356 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
358 set_ia32_ls_mode(load, mode);
361 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
363 /* Const Nodes before the initial IncSP are a bad idea, because
364 * they could be spilled and we have no SP ready at that point yet.
365 * So add a dependency to the initial frame pointer calculation to
366 * avoid that situation.
368 if (get_irg_start_block(irg) == block) {
369 add_irn_dep(load, get_irg_frame(irg));
372 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
376 tarval *tv = get_Const_tarval(node);
379 tv = tarval_convert_to(tv, mode_Iu);
381 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
383 panic("couldn't convert constant tarval (%+F)", node);
385 val = get_tarval_long(tv);
387 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
388 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
391 if (get_irg_start_block(irg) == block) {
392 add_irn_dep(cnst, get_irg_frame(irg));
400 * Transforms a SymConst.
402 static ir_node *gen_SymConst(ir_node *node) {
403 ir_graph *irg = current_ir_graph;
404 ir_node *old_block = get_nodes_block(node);
405 ir_node *block = be_transform_node(old_block);
406 dbg_info *dbgi = get_irn_dbg_info(node);
407 ir_mode *mode = get_irn_mode(node);
410 if (mode_is_float(mode)) {
411 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
412 ir_node *nomem = new_NoMem();
414 if (ia32_cg_config.use_sse2)
415 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
417 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
418 set_ia32_am_sc(cnst, get_SymConst_entity(node));
419 set_ia32_use_frame(cnst);
423 if(get_SymConst_kind(node) != symconst_addr_ent) {
424 panic("backend only support symconst_addr_ent (at %+F)", node);
426 entity = get_SymConst_entity(node);
427 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
430 /* Const Nodes before the initial IncSP are a bad idea, because
431 * they could be spilled and we have no SP ready at that point yet
433 if (get_irg_start_block(irg) == block) {
434 add_irn_dep(cnst, get_irg_frame(irg));
437 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
442 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
443 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
444 static const struct {
446 const char *ent_name;
447 const char *cnst_str;
450 } names [ia32_known_const_max] = {
451 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
452 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
453 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
454 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
455 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
457 static ir_entity *ent_cache[ia32_known_const_max];
459 const char *tp_name, *ent_name, *cnst_str;
467 ent_name = names[kct].ent_name;
468 if (! ent_cache[kct]) {
469 tp_name = names[kct].tp_name;
470 cnst_str = names[kct].cnst_str;
472 switch (names[kct].mode) {
473 case 0: mode = mode_Iu; break;
474 case 1: mode = mode_Lu; break;
475 default: mode = mode_F; break;
477 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
478 tp = new_type_primitive(new_id_from_str(tp_name), mode);
479 /* set the specified alignment */
480 set_type_alignment_bytes(tp, names[kct].align);
482 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
484 set_entity_ld_ident(ent, get_entity_ident(ent));
485 set_entity_visibility(ent, visibility_local);
486 set_entity_variability(ent, variability_constant);
487 set_entity_allocation(ent, allocation_static);
489 /* we create a new entity here: It's initialization must resist on the
491 rem = current_ir_graph;
492 current_ir_graph = get_const_code_irg();
493 cnst = new_Const(mode, tv);
494 current_ir_graph = rem;
496 set_atomic_ent_value(ent, cnst);
498 /* cache the entry */
499 ent_cache[kct] = ent;
502 return ent_cache[kct];
507 * Prints the old node name on cg obst and returns a pointer to it.
509 const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
510 ia32_isa_t *isa = (ia32_isa_t *)cg->arch_env->isa;
512 lc_eoprintf(firm_get_arg_env(), isa->name_obst, "%+F", irn);
513 obstack_1grow(isa->name_obst, 0);
514 return obstack_finish(isa->name_obst);
519 * return true if the node is a Proj(Load) and could be used in source address
520 * mode for another node. Will return only true if the @p other node is not
521 * dependent on the memory of the Load (for binary operations use the other
522 * input here, for unary operations use NULL).
524 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
525 ir_node *other, ir_node *other2)
527 ir_mode *mode = get_irn_mode(node);
531 /* float constants are always available */
532 if (is_Const(node) && mode_is_float(mode)) {
533 if (ia32_cg_config.use_sse2) {
534 if (is_simple_sse_Const(node))
537 if (is_simple_x87_Const(node))
540 if (get_irn_n_edges(node) > 1)
547 load = get_Proj_pred(node);
548 pn = get_Proj_proj(node);
549 if(!is_Load(load) || pn != pn_Load_res)
551 if(get_nodes_block(load) != block)
553 /* we only use address mode if we're the only user of the load */
554 if(get_irn_n_edges(node) > 1)
556 /* in some edge cases with address mode we might reach the load normally
557 * and through some AM sequence, if it is already materialized then we
558 * can't create an AM node from it */
559 if(be_is_transformed(node))
562 /* don't do AM if other node inputs depend on the load (via mem-proj) */
563 if(other != NULL && get_nodes_block(other) == block
564 && heights_reachable_in_block(heights, other, load))
566 if(other2 != NULL && get_nodes_block(other2) == block
567 && heights_reachable_in_block(heights, other2, load))
573 typedef struct ia32_address_mode_t ia32_address_mode_t;
574 struct ia32_address_mode_t {
578 ia32_op_type_t op_type;
582 unsigned commutative : 1;
583 unsigned ins_permuted : 1;
586 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
588 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
590 /* construct load address */
591 memset(addr, 0, sizeof(addr[0]));
592 ia32_create_address_mode(addr, ptr, /*force=*/0);
594 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
595 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
596 addr->mem = be_transform_node(mem);
599 static void build_address(ia32_address_mode_t *am, ir_node *node)
601 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
602 ia32_address_t *addr = &am->addr;
608 if (is_Const(node)) {
609 ir_entity *entity = create_float_const_entity(node);
610 addr->base = noreg_gp;
611 addr->index = noreg_gp;
612 addr->mem = new_NoMem();
613 addr->symconst_ent = entity;
615 am->ls_mode = get_irn_mode(node);
616 am->pinned = op_pin_state_floats;
620 load = get_Proj_pred(node);
621 ptr = get_Load_ptr(load);
622 mem = get_Load_mem(load);
623 new_mem = be_transform_node(mem);
624 am->pinned = get_irn_pinned(load);
625 am->ls_mode = get_Load_mode(load);
626 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
628 /* construct load address */
629 ia32_create_address_mode(addr, ptr, /*force=*/0);
631 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
632 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
636 static void set_address(ir_node *node, const ia32_address_t *addr)
638 set_ia32_am_scale(node, addr->scale);
639 set_ia32_am_sc(node, addr->symconst_ent);
640 set_ia32_am_offs_int(node, addr->offset);
641 if(addr->symconst_sign)
642 set_ia32_am_sc_sign(node);
644 set_ia32_use_frame(node);
645 set_ia32_frame_ent(node, addr->frame_entity);
648 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
650 set_address(node, &am->addr);
652 set_ia32_op_type(node, am->op_type);
653 set_ia32_ls_mode(node, am->ls_mode);
654 if(am->pinned == op_pin_state_pinned && get_irn_pinned(node) != op_pin_state_pinned) {
655 set_irn_pinned(node, am->pinned);
658 set_ia32_commutative(node);
662 * Check, if a given node is a Down-Conv, ie. a integer Conv
663 * from a mode with a mode with more bits to a mode with lesser bits.
664 * Moreover, we return only true if the node has not more than 1 user.
666 * @param node the node
667 * @return non-zero if node is a Down-Conv
669 static int is_downconv(const ir_node *node)
677 /* we only want to skip the conv when we're the only user
678 * (not optimal but for now...)
680 if(get_irn_n_edges(node) > 1)
683 src_mode = get_irn_mode(get_Conv_op(node));
684 dest_mode = get_irn_mode(node);
685 return mode_needs_gp_reg(src_mode)
686 && mode_needs_gp_reg(dest_mode)
687 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
690 /* Skip all Down-Conv's on a given node and return the resulting node. */
691 ir_node *ia32_skip_downconv(ir_node *node) {
692 while (is_downconv(node))
693 node = get_Conv_op(node);
699 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
701 ir_mode *mode = get_irn_mode(node);
706 if(mode_is_signed(mode)) {
711 block = get_nodes_block(node);
712 dbgi = get_irn_dbg_info(node);
714 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
719 * matches operands of a node into ia32 addressing/operand modes. This covers
720 * usage of source address mode, immediates, operations with non 32-bit modes,
722 * The resulting data is filled into the @p am struct. block is the block
723 * of the node whose arguments are matched. op1, op2 are the first and second
724 * input that are matched (op1 may be NULL). other_op is another unrelated
725 * input that is not matched! but which is needed sometimes to check if AM
726 * for op1/op2 is legal.
727 * @p flags describes the supported modes of the operation in detail.
729 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
730 ir_node *op1, ir_node *op2, ir_node *other_op,
733 ia32_address_t *addr = &am->addr;
734 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
737 ir_mode *mode = get_irn_mode(op2);
739 unsigned commutative;
740 int use_am_and_immediates;
742 int mode_bits = get_mode_size_bits(mode);
744 memset(am, 0, sizeof(am[0]));
746 commutative = (flags & match_commutative) != 0;
747 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
748 use_am = (flags & match_am) != 0;
749 use_immediate = (flags & match_immediate) != 0;
750 assert(!use_am_and_immediates || use_immediate);
753 assert(!commutative || op1 != NULL);
754 assert(use_am || !(flags & match_8bit_am));
755 assert(use_am || !(flags & match_16bit_am));
757 if (mode_bits == 8) {
758 if (!(flags & match_8bit_am))
760 /* we don't automatically add upconvs yet */
761 assert((flags & match_mode_neutral) || (flags & match_8bit));
762 } else if (mode_bits == 16) {
763 if (!(flags & match_16bit_am))
765 /* we don't automatically add upconvs yet */
766 assert((flags & match_mode_neutral) || (flags & match_16bit));
769 /* we can simply skip downconvs for mode neutral nodes: the upper bits
770 * can be random for these operations */
771 if (flags & match_mode_neutral) {
772 op2 = ia32_skip_downconv(op2);
774 op1 = ia32_skip_downconv(op1);
778 /* match immediates. firm nodes are normalized: constants are always on the
781 if (!(flags & match_try_am) && use_immediate) {
782 new_op2 = try_create_Immediate(op2, 0);
785 if (new_op2 == NULL &&
786 use_am && ia32_use_source_address_mode(block, op2, op1, other_op)) {
787 build_address(am, op2);
788 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
789 if(mode_is_float(mode)) {
790 new_op2 = ia32_new_NoReg_vfp(env_cg);
794 am->op_type = ia32_AddrModeS;
795 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
797 ia32_use_source_address_mode(block, op1, op2, other_op)) {
799 build_address(am, op1);
801 if (mode_is_float(mode)) {
802 noreg = ia32_new_NoReg_vfp(env_cg);
807 if(new_op2 != NULL) {
810 new_op1 = be_transform_node(op2);
812 am->ins_permuted = 1;
814 am->op_type = ia32_AddrModeS;
816 if(flags & match_try_am) {
819 am->op_type = ia32_Normal;
823 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
825 new_op2 = be_transform_node(op2);
826 am->op_type = ia32_Normal;
827 am->ls_mode = get_irn_mode(op2);
828 if(flags & match_mode_neutral)
829 am->ls_mode = mode_Iu;
831 if(addr->base == NULL)
832 addr->base = noreg_gp;
833 if(addr->index == NULL)
834 addr->index = noreg_gp;
835 if(addr->mem == NULL)
836 addr->mem = new_NoMem();
838 am->new_op1 = new_op1;
839 am->new_op2 = new_op2;
840 am->commutative = commutative;
843 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
845 ir_graph *irg = current_ir_graph;
849 if(am->mem_proj == NULL)
852 /* we have to create a mode_T so the old MemProj can attach to us */
853 mode = get_irn_mode(node);
854 load = get_Proj_pred(am->mem_proj);
856 mark_irn_visited(load);
857 be_set_transformed_node(load, node);
860 set_irn_mode(node, mode_T);
861 return new_rd_Proj(NULL, irg, get_nodes_block(node), node, mode, pn_ia32_res);
868 * Construct a standard binary operation, set AM and immediate if required.
870 * @param op1 The first operand
871 * @param op2 The second operand
872 * @param func The node constructor function
873 * @return The constructed ia32 node.
875 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
876 construct_binop_func *func, match_flags_t flags)
878 ir_node *block = get_nodes_block(node);
879 ir_node *new_block = be_transform_node(block);
880 ir_graph *irg = current_ir_graph;
881 dbg_info *dbgi = get_irn_dbg_info(node);
883 ia32_address_mode_t am;
884 ia32_address_t *addr = &am.addr;
886 match_arguments(&am, block, op1, op2, NULL, flags);
888 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
889 am.new_op1, am.new_op2);
890 set_am_attributes(new_node, &am);
891 /* we can't use source address mode anymore when using immediates */
892 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
893 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
894 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
896 new_node = fix_mem_proj(new_node, &am);
903 n_ia32_l_binop_right,
904 n_ia32_l_binop_eflags
906 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
907 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
908 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
909 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_left, n_Sbb_left)
910 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_right, n_Sbb_right)
911 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
914 * Construct a binary operation which also consumes the eflags.
916 * @param node The node to transform
917 * @param func The node constructor function
918 * @param flags The match flags
919 * @return The constructor ia32 node
921 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
924 ir_node *src_block = get_nodes_block(node);
925 ir_node *block = be_transform_node(src_block);
926 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
927 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
928 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
929 ir_node *new_eflags = be_transform_node(eflags);
930 ir_graph *irg = current_ir_graph;
931 dbg_info *dbgi = get_irn_dbg_info(node);
933 ia32_address_mode_t am;
934 ia32_address_t *addr = &am.addr;
936 match_arguments(&am, src_block, op1, op2, NULL, flags);
938 new_node = func(dbgi, irg, block, addr->base, addr->index,
939 addr->mem, am.new_op1, am.new_op2, new_eflags);
940 set_am_attributes(new_node, &am);
941 /* we can't use source address mode anymore when using immediates */
942 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
943 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
944 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
946 new_node = fix_mem_proj(new_node, &am);
951 static ir_node *get_fpcw(void)
954 if(initial_fpcw != NULL)
957 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
958 &ia32_fp_cw_regs[REG_FPCW]);
959 initial_fpcw = be_transform_node(fpcw);
965 * Construct a standard binary operation, set AM and immediate if required.
967 * @param op1 The first operand
968 * @param op2 The second operand
969 * @param func The node constructor function
970 * @return The constructed ia32 node.
972 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
973 construct_binop_float_func *func,
976 ir_graph *irg = current_ir_graph;
977 dbg_info *dbgi = get_irn_dbg_info(node);
978 ir_node *block = get_nodes_block(node);
979 ir_node *new_block = be_transform_node(block);
980 ir_mode *mode = get_irn_mode(node);
982 ia32_address_mode_t am;
983 ia32_address_t *addr = &am.addr;
985 /* cannot use addresmode with long double on x87 */
986 if (get_mode_size_bits(mode) > 64)
989 match_arguments(&am, block, op1, op2, NULL, flags);
991 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
992 am.new_op1, am.new_op2, get_fpcw());
993 set_am_attributes(new_node, &am);
995 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
997 new_node = fix_mem_proj(new_node, &am);
1003 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1005 * @param op1 The first operand
1006 * @param op2 The second operand
1007 * @param func The node constructor function
1008 * @return The constructed ia32 node.
1010 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1011 construct_shift_func *func,
1012 match_flags_t flags)
1014 dbg_info *dbgi = get_irn_dbg_info(node);
1015 ir_graph *irg = current_ir_graph;
1016 ir_node *block = get_nodes_block(node);
1017 ir_node *new_block = be_transform_node(block);
1022 assert(! mode_is_float(get_irn_mode(node)));
1023 assert(flags & match_immediate);
1024 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1026 if(flags & match_mode_neutral) {
1027 op1 = ia32_skip_downconv(op1);
1029 new_op1 = be_transform_node(op1);
1031 /* the shift amount can be any mode that is bigger than 5 bits, since all
1032 * other bits are ignored anyway */
1033 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1034 op2 = get_Conv_op(op2);
1035 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1037 new_op2 = create_immediate_or_transform(op2, 0);
1039 new_node = func(dbgi, irg, new_block, new_op1, new_op2);
1040 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1042 /* lowered shift instruction may have a dependency operand, handle it here */
1043 if (get_irn_arity(node) == 3) {
1044 /* we have a dependency */
1045 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1046 add_irn_dep(new_node, new_dep);
1054 * Construct a standard unary operation, set AM and immediate if required.
1056 * @param op The operand
1057 * @param func The node constructor function
1058 * @return The constructed ia32 node.
1060 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1061 match_flags_t flags)
1063 ir_graph *irg = current_ir_graph;
1064 dbg_info *dbgi = get_irn_dbg_info(node);
1065 ir_node *block = get_nodes_block(node);
1066 ir_node *new_block = be_transform_node(block);
1070 assert(flags == 0 || flags == match_mode_neutral);
1071 if(flags & match_mode_neutral) {
1072 op = ia32_skip_downconv(op);
1075 new_op = be_transform_node(op);
1076 new_node = func(dbgi, irg, new_block, new_op);
1078 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1083 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1084 ia32_address_t *addr)
1086 ir_graph *irg = current_ir_graph;
1087 ir_node *base = addr->base;
1088 ir_node *index = addr->index;
1092 base = ia32_new_NoReg_gp(env_cg);
1094 base = be_transform_node(base);
1098 index = ia32_new_NoReg_gp(env_cg);
1100 index = be_transform_node(index);
1103 res = new_rd_ia32_Lea(dbgi, irg, block, base, index);
1104 set_address(res, addr);
1109 static int am_has_immediates(const ia32_address_t *addr)
1111 return addr->offset != 0 || addr->symconst_ent != NULL
1112 || addr->frame_entity || addr->use_frame;
1116 * Creates an ia32 Add.
1118 * @return the created ia32 Add node
1120 static ir_node *gen_Add(ir_node *node) {
1121 ir_graph *irg = current_ir_graph;
1122 dbg_info *dbgi = get_irn_dbg_info(node);
1123 ir_node *block = get_nodes_block(node);
1124 ir_node *new_block = be_transform_node(block);
1125 ir_node *op1 = get_Add_left(node);
1126 ir_node *op2 = get_Add_right(node);
1127 ir_mode *mode = get_irn_mode(node);
1129 ir_node *add_immediate_op;
1130 ia32_address_t addr;
1131 ia32_address_mode_t am;
1133 if (mode_is_float(mode)) {
1134 if (ia32_cg_config.use_sse2)
1135 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1136 match_commutative | match_am);
1138 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1139 match_commutative | match_am);
1142 ia32_mark_non_am(node);
1144 op2 = ia32_skip_downconv(op2);
1145 op1 = ia32_skip_downconv(op1);
1149 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1150 * 1. Add with immediate -> Lea
1151 * 2. Add with possible source address mode -> Add
1152 * 3. Otherwise -> Lea
1154 memset(&addr, 0, sizeof(addr));
1155 ia32_create_address_mode(&addr, node, /*force=*/1);
1156 add_immediate_op = NULL;
1158 if(addr.base == NULL && addr.index == NULL) {
1159 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1160 addr.symconst_sign, addr.offset);
1161 add_irn_dep(new_node, get_irg_frame(irg));
1162 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1165 /* add with immediate? */
1166 if(addr.index == NULL) {
1167 add_immediate_op = addr.base;
1168 } else if(addr.base == NULL && addr.scale == 0) {
1169 add_immediate_op = addr.index;
1172 if(add_immediate_op != NULL) {
1173 if(!am_has_immediates(&addr)) {
1174 #ifdef DEBUG_libfirm
1175 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1178 return be_transform_node(add_immediate_op);
1181 new_node = create_lea_from_address(dbgi, new_block, &addr);
1182 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1186 /* test if we can use source address mode */
1187 match_arguments(&am, block, op1, op2, NULL, match_commutative
1188 | match_mode_neutral | match_am | match_immediate | match_try_am);
1190 /* construct an Add with source address mode */
1191 if (am.op_type == ia32_AddrModeS) {
1192 ia32_address_t *am_addr = &am.addr;
1193 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1194 am_addr->index, am_addr->mem, am.new_op1,
1196 set_am_attributes(new_node, &am);
1197 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1199 new_node = fix_mem_proj(new_node, &am);
1204 /* otherwise construct a lea */
1205 new_node = create_lea_from_address(dbgi, new_block, &addr);
1206 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1211 * Creates an ia32 Mul.
1213 * @return the created ia32 Mul node
1215 static ir_node *gen_Mul(ir_node *node) {
1216 ir_node *op1 = get_Mul_left(node);
1217 ir_node *op2 = get_Mul_right(node);
1218 ir_mode *mode = get_irn_mode(node);
1220 if (mode_is_float(mode)) {
1221 if (ia32_cg_config.use_sse2)
1222 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1223 match_commutative | match_am);
1225 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1226 match_commutative | match_am);
1228 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1229 match_commutative | match_am | match_mode_neutral |
1230 match_immediate | match_am_and_immediates);
1234 * Creates an ia32 Mulh.
1235 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1236 * this result while Mul returns the lower 32 bit.
1238 * @return the created ia32 Mulh node
1240 static ir_node *gen_Mulh(ir_node *node)
1242 ir_node *block = get_nodes_block(node);
1243 ir_node *new_block = be_transform_node(block);
1244 ir_graph *irg = current_ir_graph;
1245 dbg_info *dbgi = get_irn_dbg_info(node);
1246 ir_mode *mode = get_irn_mode(node);
1247 ir_node *op1 = get_Mulh_left(node);
1248 ir_node *op2 = get_Mulh_right(node);
1249 ir_node *proj_res_high;
1251 ia32_address_mode_t am;
1252 ia32_address_t *addr = &am.addr;
1254 assert(!mode_is_float(mode) && "Mulh with float not supported");
1255 assert(get_mode_size_bits(mode) == 32);
1257 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1259 if (mode_is_signed(mode)) {
1260 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1261 addr->index, addr->mem, am.new_op1,
1264 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1265 addr->index, addr->mem, am.new_op1,
1269 set_am_attributes(new_node, &am);
1270 /* we can't use source address mode anymore when using immediates */
1271 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1272 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1273 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1275 assert(get_irn_mode(new_node) == mode_T);
1277 fix_mem_proj(new_node, &am);
1279 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1280 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1281 mode_Iu, pn_ia32_IMul1OP_res_high);
1283 return proj_res_high;
1289 * Creates an ia32 And.
1291 * @return The created ia32 And node
1293 static ir_node *gen_And(ir_node *node) {
1294 ir_node *op1 = get_And_left(node);
1295 ir_node *op2 = get_And_right(node);
1296 assert(! mode_is_float(get_irn_mode(node)));
1298 /* is it a zero extension? */
1299 if (is_Const(op2)) {
1300 tarval *tv = get_Const_tarval(op2);
1301 long v = get_tarval_long(tv);
1303 if (v == 0xFF || v == 0xFFFF) {
1304 dbg_info *dbgi = get_irn_dbg_info(node);
1305 ir_node *block = get_nodes_block(node);
1312 assert(v == 0xFFFF);
1315 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1321 return gen_binop(node, op1, op2, new_rd_ia32_And,
1322 match_commutative | match_mode_neutral | match_am
1329 * Creates an ia32 Or.
1331 * @return The created ia32 Or node
1333 static ir_node *gen_Or(ir_node *node) {
1334 ir_node *op1 = get_Or_left(node);
1335 ir_node *op2 = get_Or_right(node);
1337 assert (! mode_is_float(get_irn_mode(node)));
1338 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1339 | match_mode_neutral | match_am | match_immediate);
1345 * Creates an ia32 Eor.
1347 * @return The created ia32 Eor node
1349 static ir_node *gen_Eor(ir_node *node) {
1350 ir_node *op1 = get_Eor_left(node);
1351 ir_node *op2 = get_Eor_right(node);
1353 assert(! mode_is_float(get_irn_mode(node)));
1354 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1355 | match_mode_neutral | match_am | match_immediate);
1360 * Creates an ia32 Sub.
1362 * @return The created ia32 Sub node
1364 static ir_node *gen_Sub(ir_node *node) {
1365 ir_node *op1 = get_Sub_left(node);
1366 ir_node *op2 = get_Sub_right(node);
1367 ir_mode *mode = get_irn_mode(node);
1369 if (mode_is_float(mode)) {
1370 if (ia32_cg_config.use_sse2)
1371 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1373 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1378 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1382 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1383 | match_am | match_immediate);
1387 * Generates an ia32 DivMod with additional infrastructure for the
1388 * register allocator if needed.
1390 static ir_node *create_Div(ir_node *node)
1392 ir_graph *irg = current_ir_graph;
1393 dbg_info *dbgi = get_irn_dbg_info(node);
1394 ir_node *block = get_nodes_block(node);
1395 ir_node *new_block = be_transform_node(block);
1402 ir_node *sign_extension;
1403 ia32_address_mode_t am;
1404 ia32_address_t *addr = &am.addr;
1406 /* the upper bits have random contents for smaller modes */
1407 switch (get_irn_opcode(node)) {
1409 op1 = get_Div_left(node);
1410 op2 = get_Div_right(node);
1411 mem = get_Div_mem(node);
1412 mode = get_Div_resmode(node);
1415 op1 = get_Mod_left(node);
1416 op2 = get_Mod_right(node);
1417 mem = get_Mod_mem(node);
1418 mode = get_Mod_resmode(node);
1421 op1 = get_DivMod_left(node);
1422 op2 = get_DivMod_right(node);
1423 mem = get_DivMod_mem(node);
1424 mode = get_DivMod_resmode(node);
1427 panic("invalid divmod node %+F", node);
1430 match_arguments(&am, block, op1, op2, NULL, match_am);
1432 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1433 is the memory of the consumed address. We can have only the second op as address
1434 in Div nodes, so check only op2. */
1435 if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
1436 new_mem = be_transform_node(mem);
1437 if(!is_NoMem(addr->mem)) {
1441 new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
1444 new_mem = addr->mem;
1447 if (mode_is_signed(mode)) {
1448 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1449 add_irn_dep(produceval, get_irg_frame(irg));
1450 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1453 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1454 addr->index, new_mem, am.new_op1,
1455 sign_extension, am.new_op2);
1457 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1458 add_irn_dep(sign_extension, get_irg_frame(irg));
1460 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1461 addr->index, new_mem, am.new_op1,
1462 sign_extension, am.new_op2);
1465 set_irn_pinned(new_node, get_irn_pinned(node));
1467 set_am_attributes(new_node, &am);
1468 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1470 new_node = fix_mem_proj(new_node, &am);
1476 static ir_node *gen_Mod(ir_node *node) {
1477 return create_Div(node);
1480 static ir_node *gen_Div(ir_node *node) {
1481 return create_Div(node);
1484 static ir_node *gen_DivMod(ir_node *node) {
1485 return create_Div(node);
1491 * Creates an ia32 floating Div.
1493 * @return The created ia32 xDiv node
1495 static ir_node *gen_Quot(ir_node *node)
1497 ir_node *op1 = get_Quot_left(node);
1498 ir_node *op2 = get_Quot_right(node);
1500 if (ia32_cg_config.use_sse2) {
1501 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1503 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1509 * Creates an ia32 Shl.
1511 * @return The created ia32 Shl node
1513 static ir_node *gen_Shl(ir_node *node) {
1514 ir_node *left = get_Shl_left(node);
1515 ir_node *right = get_Shl_right(node);
1517 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1518 match_mode_neutral | match_immediate);
1522 * Creates an ia32 Shr.
1524 * @return The created ia32 Shr node
1526 static ir_node *gen_Shr(ir_node *node) {
1527 ir_node *left = get_Shr_left(node);
1528 ir_node *right = get_Shr_right(node);
1530 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1536 * Creates an ia32 Sar.
1538 * @return The created ia32 Shrs node
1540 static ir_node *gen_Shrs(ir_node *node) {
1541 ir_node *left = get_Shrs_left(node);
1542 ir_node *right = get_Shrs_right(node);
1543 ir_mode *mode = get_irn_mode(node);
1545 if(is_Const(right) && mode == mode_Is) {
1546 tarval *tv = get_Const_tarval(right);
1547 long val = get_tarval_long(tv);
1549 /* this is a sign extension */
1550 ir_graph *irg = current_ir_graph;
1551 dbg_info *dbgi = get_irn_dbg_info(node);
1552 ir_node *block = be_transform_node(get_nodes_block(node));
1554 ir_node *new_op = be_transform_node(op);
1555 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1556 add_irn_dep(pval, get_irg_frame(irg));
1558 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1562 /* 8 or 16 bit sign extension? */
1563 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1564 ir_node *shl_left = get_Shl_left(left);
1565 ir_node *shl_right = get_Shl_right(left);
1566 if(is_Const(shl_right)) {
1567 tarval *tv1 = get_Const_tarval(right);
1568 tarval *tv2 = get_Const_tarval(shl_right);
1569 if(tv1 == tv2 && tarval_is_long(tv1)) {
1570 long val = get_tarval_long(tv1);
1571 if(val == 16 || val == 24) {
1572 dbg_info *dbgi = get_irn_dbg_info(node);
1573 ir_node *block = get_nodes_block(node);
1583 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1592 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1598 * Creates an ia32 RotL.
1600 * @param op1 The first operator
1601 * @param op2 The second operator
1602 * @return The created ia32 RotL node
1604 static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
1605 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1611 * Creates an ia32 RotR.
1612 * NOTE: There is no RotR with immediate because this would always be a RotL
1613 * "imm-mode_size_bits" which can be pre-calculated.
1615 * @param op1 The first operator
1616 * @param op2 The second operator
1617 * @return The created ia32 RotR node
1619 static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
1620 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1626 * Creates an ia32 RotR or RotL (depending on the found pattern).
1628 * @return The created ia32 RotL or RotR node
1630 static ir_node *gen_Rot(ir_node *node) {
1631 ir_node *rotate = NULL;
1632 ir_node *op1 = get_Rot_left(node);
1633 ir_node *op2 = get_Rot_right(node);
1635 /* Firm has only Rot (which is a RotL), so we are looking for a right (op2)
1636 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1637 that means we can create a RotR instead of an Add and a RotL */
1639 if (get_irn_op(op2) == op_Add) {
1641 ir_node *left = get_Add_left(add);
1642 ir_node *right = get_Add_right(add);
1643 if (is_Const(right)) {
1644 tarval *tv = get_Const_tarval(right);
1645 ir_mode *mode = get_irn_mode(node);
1646 long bits = get_mode_size_bits(mode);
1648 if (get_irn_op(left) == op_Minus &&
1649 tarval_is_long(tv) &&
1650 get_tarval_long(tv) == bits &&
1653 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1654 rotate = gen_RotR(node, op1, get_Minus_op(left));
1659 if (rotate == NULL) {
1660 rotate = gen_RotL(node, op1, op2);
1669 * Transforms a Minus node.
1671 * @return The created ia32 Minus node
1673 static ir_node *gen_Minus(ir_node *node)
1675 ir_node *op = get_Minus_op(node);
1676 ir_node *block = be_transform_node(get_nodes_block(node));
1677 ir_graph *irg = current_ir_graph;
1678 dbg_info *dbgi = get_irn_dbg_info(node);
1679 ir_mode *mode = get_irn_mode(node);
1684 if (mode_is_float(mode)) {
1685 ir_node *new_op = be_transform_node(op);
1686 if (ia32_cg_config.use_sse2) {
1687 /* TODO: non-optimal... if we have many xXors, then we should
1688 * rather create a load for the const and use that instead of
1689 * several AM nodes... */
1690 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1691 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1692 ir_node *nomem = new_rd_NoMem(irg);
1694 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1695 nomem, new_op, noreg_xmm);
1697 size = get_mode_size_bits(mode);
1698 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1700 set_ia32_am_sc(new_node, ent);
1701 set_ia32_op_type(new_node, ia32_AddrModeS);
1702 set_ia32_ls_mode(new_node, mode);
1704 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1707 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1710 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1716 * Transforms a Not node.
1718 * @return The created ia32 Not node
1720 static ir_node *gen_Not(ir_node *node) {
1721 ir_node *op = get_Not_op(node);
1723 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1724 assert (! mode_is_float(get_irn_mode(node)));
1726 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1732 * Transforms an Abs node.
1734 * @return The created ia32 Abs node
1736 static ir_node *gen_Abs(ir_node *node)
1738 ir_node *block = get_nodes_block(node);
1739 ir_node *new_block = be_transform_node(block);
1740 ir_node *op = get_Abs_op(node);
1741 ir_graph *irg = current_ir_graph;
1742 dbg_info *dbgi = get_irn_dbg_info(node);
1743 ir_mode *mode = get_irn_mode(node);
1744 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1745 ir_node *nomem = new_NoMem();
1751 if (mode_is_float(mode)) {
1752 new_op = be_transform_node(op);
1754 if (ia32_cg_config.use_sse2) {
1755 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1756 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1757 nomem, new_op, noreg_fp);
1759 size = get_mode_size_bits(mode);
1760 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1762 set_ia32_am_sc(new_node, ent);
1764 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1766 set_ia32_op_type(new_node, ia32_AddrModeS);
1767 set_ia32_ls_mode(new_node, mode);
1769 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1770 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1773 ir_node *xor, *pval, *sign_extension;
1775 if (get_mode_size_bits(mode) == 32) {
1776 new_op = be_transform_node(op);
1778 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1781 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1782 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1785 add_irn_dep(pval, get_irg_frame(irg));
1786 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1788 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1789 nomem, new_op, sign_extension);
1790 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1792 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1793 nomem, xor, sign_extension);
1794 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1800 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1802 ir_graph *irg = current_ir_graph;
1810 /* we have a Cmp as input */
1812 ir_node *pred = get_Proj_pred(node);
1814 flags = be_transform_node(pred);
1815 *pnc_out = get_Proj_proj(node);
1820 /* a mode_b value, we have to compare it against 0 */
1821 dbgi = get_irn_dbg_info(node);
1822 new_block = be_transform_node(get_nodes_block(node));
1823 new_op = be_transform_node(node);
1824 noreg = ia32_new_NoReg_gp(env_cg);
1825 nomem = new_NoMem();
1826 flags = new_rd_ia32_Test(dbgi, irg, new_block, noreg, noreg, nomem,
1827 new_op, new_op, 0, 0);
1828 *pnc_out = pn_Cmp_Lg;
1833 * Transforms a Load.
1835 * @return the created ia32 Load node
1837 static ir_node *gen_Load(ir_node *node) {
1838 ir_node *old_block = get_nodes_block(node);
1839 ir_node *block = be_transform_node(old_block);
1840 ir_node *ptr = get_Load_ptr(node);
1841 ir_node *mem = get_Load_mem(node);
1842 ir_node *new_mem = be_transform_node(mem);
1845 ir_graph *irg = current_ir_graph;
1846 dbg_info *dbgi = get_irn_dbg_info(node);
1847 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1848 ir_mode *mode = get_Load_mode(node);
1851 ia32_address_t addr;
1853 /* construct load address */
1854 memset(&addr, 0, sizeof(addr));
1855 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1862 base = be_transform_node(base);
1868 index = be_transform_node(index);
1871 if (mode_is_float(mode)) {
1872 if (ia32_cg_config.use_sse2) {
1873 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1875 res_mode = mode_xmm;
1877 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1879 res_mode = mode_vfp;
1882 assert(mode != mode_b);
1884 /* create a conv node with address mode for smaller modes */
1885 if(get_mode_size_bits(mode) < 32) {
1886 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1887 new_mem, noreg, mode);
1889 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1894 set_irn_pinned(new_node, get_irn_pinned(node));
1895 set_ia32_op_type(new_node, ia32_AddrModeS);
1896 set_ia32_ls_mode(new_node, mode);
1897 set_address(new_node, &addr);
1899 if(get_irn_pinned(node) == op_pin_state_floats) {
1900 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1903 /* make sure we are scheduled behind the initial IncSP/Barrier
1904 * to avoid spills being placed before it
1906 if (block == get_irg_start_block(irg)) {
1907 add_irn_dep(new_node, get_irg_frame(irg));
1910 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1915 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1916 ir_node *ptr, ir_node *other)
1923 /* we only use address mode if we're the only user of the load */
1924 if(get_irn_n_edges(node) > 1)
1927 load = get_Proj_pred(node);
1930 if(get_nodes_block(load) != block)
1933 /* Store should be attached to the load */
1934 if(!is_Proj(mem) || get_Proj_pred(mem) != load)
1936 /* store should have the same pointer as the load */
1937 if(get_Load_ptr(load) != ptr)
1940 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1941 if(other != NULL && get_nodes_block(other) == block
1942 && heights_reachable_in_block(heights, other, load))
1948 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1949 ir_node *mem, ir_node *ptr, ir_mode *mode,
1950 construct_binop_dest_func *func,
1951 construct_binop_dest_func *func8bit,
1952 match_flags_t flags)
1954 ir_node *src_block = get_nodes_block(node);
1956 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1957 ir_graph *irg = current_ir_graph;
1962 ia32_address_mode_t am;
1963 ia32_address_t *addr = &am.addr;
1964 memset(&am, 0, sizeof(am));
1966 assert(flags & match_dest_am);
1967 assert(flags & match_immediate); /* there is no destam node without... */
1968 commutative = (flags & match_commutative) != 0;
1970 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
1971 build_address(&am, op1);
1972 new_op = create_immediate_or_transform(op2, 0);
1973 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
1974 build_address(&am, op2);
1975 new_op = create_immediate_or_transform(op1, 0);
1980 if(addr->base == NULL)
1981 addr->base = noreg_gp;
1982 if(addr->index == NULL)
1983 addr->index = noreg_gp;
1984 if(addr->mem == NULL)
1985 addr->mem = new_NoMem();
1987 dbgi = get_irn_dbg_info(node);
1988 block = be_transform_node(src_block);
1989 if(get_mode_size_bits(mode) == 8) {
1990 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
1993 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
1996 set_address(new_node, addr);
1997 set_ia32_op_type(new_node, ia32_AddrModeD);
1998 set_ia32_ls_mode(new_node, mode);
1999 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2004 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2005 ir_node *ptr, ir_mode *mode,
2006 construct_unop_dest_func *func)
2008 ir_graph *irg = current_ir_graph;
2009 ir_node *src_block = get_nodes_block(node);
2013 ia32_address_mode_t am;
2014 ia32_address_t *addr = &am.addr;
2015 memset(&am, 0, sizeof(am));
2017 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2020 build_address(&am, op);
2022 dbgi = get_irn_dbg_info(node);
2023 block = be_transform_node(src_block);
2024 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
2025 set_address(new_node, addr);
2026 set_ia32_op_type(new_node, ia32_AddrModeD);
2027 set_ia32_ls_mode(new_node, mode);
2028 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2033 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2034 ir_mode *mode = get_irn_mode(node);
2035 ir_node *psi_true = get_Psi_val(node, 0);
2036 ir_node *psi_default = get_Psi_default(node);
2047 ia32_address_t addr;
2049 if(get_mode_size_bits(mode) != 8)
2052 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2054 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2060 build_address_ptr(&addr, ptr, mem);
2062 irg = current_ir_graph;
2063 dbgi = get_irn_dbg_info(node);
2064 block = get_nodes_block(node);
2065 new_block = be_transform_node(block);
2066 cond = get_Psi_cond(node, 0);
2067 flags = get_flags_node(cond, &pnc);
2068 new_mem = be_transform_node(mem);
2069 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2070 addr.index, addr.mem, flags, pnc, negated);
2071 set_address(new_node, &addr);
2072 set_ia32_op_type(new_node, ia32_AddrModeD);
2073 set_ia32_ls_mode(new_node, mode);
2074 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2079 static ir_node *try_create_dest_am(ir_node *node) {
2080 ir_node *val = get_Store_value(node);
2081 ir_node *mem = get_Store_mem(node);
2082 ir_node *ptr = get_Store_ptr(node);
2083 ir_mode *mode = get_irn_mode(val);
2084 unsigned bits = get_mode_size_bits(mode);
2089 /* handle only GP modes for now... */
2090 if(!mode_needs_gp_reg(mode))
2094 /* store must be the only user of the val node */
2095 if(get_irn_n_edges(val) > 1)
2097 /* skip pointless convs */
2099 ir_node *conv_op = get_Conv_op(val);
2100 ir_mode *pred_mode = get_irn_mode(conv_op);
2101 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2109 /* value must be in the same block */
2110 if(get_nodes_block(node) != get_nodes_block(val))
2113 switch(get_irn_opcode(val)) {
2115 op1 = get_Add_left(val);
2116 op2 = get_Add_right(val);
2117 if(is_Const_1(op2)) {
2118 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2119 new_rd_ia32_IncMem);
2121 } else if(is_Const_Minus_1(op2)) {
2122 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2123 new_rd_ia32_DecMem);
2126 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2127 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2128 match_dest_am | match_commutative |
2132 op1 = get_Sub_left(val);
2133 op2 = get_Sub_right(val);
2135 ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
2138 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2139 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2140 match_dest_am | match_immediate |
2144 op1 = get_And_left(val);
2145 op2 = get_And_right(val);
2146 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2147 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2148 match_dest_am | match_commutative |
2152 op1 = get_Or_left(val);
2153 op2 = get_Or_right(val);
2154 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2155 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2156 match_dest_am | match_commutative |
2160 op1 = get_Eor_left(val);
2161 op2 = get_Eor_right(val);
2162 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2163 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2164 match_dest_am | match_commutative |
2168 op1 = get_Shl_left(val);
2169 op2 = get_Shl_right(val);
2170 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2171 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2172 match_dest_am | match_immediate);
2175 op1 = get_Shr_left(val);
2176 op2 = get_Shr_right(val);
2177 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2178 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2179 match_dest_am | match_immediate);
2182 op1 = get_Shrs_left(val);
2183 op2 = get_Shrs_right(val);
2184 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2185 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2186 match_dest_am | match_immediate);
2189 op1 = get_Rot_left(val);
2190 op2 = get_Rot_right(val);
2191 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2192 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2193 match_dest_am | match_immediate);
2195 /* TODO: match ROR patterns... */
2197 new_node = try_create_SetMem(val, ptr, mem);
2200 op1 = get_Minus_op(val);
2201 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2204 /* should be lowered already */
2205 assert(mode != mode_b);
2206 op1 = get_Not_op(val);
2207 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2213 if(new_node != NULL) {
2214 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2215 get_irn_pinned(node) == op_pin_state_pinned) {
2216 set_irn_pinned(new_node, op_pin_state_pinned);
2223 static int is_float_to_int32_conv(const ir_node *node)
2225 ir_mode *mode = get_irn_mode(node);
2229 if(get_mode_size_bits(mode) != 32 || !mode_needs_gp_reg(mode))
2234 conv_op = get_Conv_op(node);
2235 conv_mode = get_irn_mode(conv_op);
2237 if(!mode_is_float(conv_mode))
2244 * Transforms a Store.
2246 * @return the created ia32 Store node
2248 static ir_node *gen_Store(ir_node *node)
2250 ir_node *block = get_nodes_block(node);
2251 ir_node *new_block = be_transform_node(block);
2252 ir_node *ptr = get_Store_ptr(node);
2253 ir_node *val = get_Store_value(node);
2254 ir_node *mem = get_Store_mem(node);
2255 ir_graph *irg = current_ir_graph;
2256 dbg_info *dbgi = get_irn_dbg_info(node);
2257 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2258 ir_mode *mode = get_irn_mode(val);
2261 ia32_address_t addr;
2263 /* check for destination address mode */
2264 new_node = try_create_dest_am(node);
2265 if(new_node != NULL)
2268 /* construct store address */
2269 memset(&addr, 0, sizeof(addr));
2270 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2272 if(addr.base == NULL) {
2275 addr.base = be_transform_node(addr.base);
2278 if(addr.index == NULL) {
2281 addr.index = be_transform_node(addr.index);
2283 addr.mem = be_transform_node(mem);
2285 if (mode_is_float(mode)) {
2286 /* convs (and strict-convs) before stores are unnecessary if the mode
2288 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2289 val = get_Conv_op(val);
2291 new_val = be_transform_node(val);
2292 if (ia32_cg_config.use_sse2) {
2293 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2294 addr.index, addr.mem, new_val);
2296 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2297 addr.index, addr.mem, new_val, mode);
2299 } else if(is_float_to_int32_conv(val)) {
2300 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2301 val = get_Conv_op(val);
2303 /* convs (and strict-convs) before stores are unnecessary if the mode
2305 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2306 val = get_Conv_op(val);
2308 new_val = be_transform_node(val);
2310 new_node = new_rd_ia32_vfist(dbgi, irg, new_block, addr.base,
2311 addr.index, addr.mem, new_val, trunc_mode);
2313 new_val = create_immediate_or_transform(val, 0);
2314 assert(mode != mode_b);
2316 if (get_mode_size_bits(mode) == 8) {
2317 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2318 addr.index, addr.mem, new_val);
2320 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2321 addr.index, addr.mem, new_val);
2325 set_irn_pinned(new_node, get_irn_pinned(node));
2326 set_ia32_op_type(new_node, ia32_AddrModeD);
2327 set_ia32_ls_mode(new_node, mode);
2329 set_address(new_node, &addr);
2330 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2335 static ir_node *create_Switch(ir_node *node)
2337 ir_graph *irg = current_ir_graph;
2338 dbg_info *dbgi = get_irn_dbg_info(node);
2339 ir_node *block = be_transform_node(get_nodes_block(node));
2340 ir_node *sel = get_Cond_selector(node);
2341 ir_node *new_sel = be_transform_node(sel);
2342 int switch_min = INT_MAX;
2343 int switch_max = INT_MIN;
2344 long default_pn = get_Cond_defaultProj(node);
2346 const ir_edge_t *edge;
2348 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2350 /* determine the smallest switch case value */
2351 foreach_out_edge(node, edge) {
2352 ir_node *proj = get_edge_src_irn(edge);
2353 long pn = get_Proj_proj(proj);
2354 if(pn == default_pn)
2363 if((unsigned) (switch_max - switch_min) > 256000) {
2364 panic("Size of switch %+F bigger than 256000", node);
2367 if (switch_min != 0) {
2368 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2370 /* if smallest switch case is not 0 we need an additional sub */
2371 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2372 add_ia32_am_offs_int(new_sel, -switch_min);
2373 set_ia32_op_type(new_sel, ia32_AddrModeS);
2375 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2378 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2379 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2385 * Transform a Cond node.
2387 static ir_node *gen_Cond(ir_node *node) {
2388 ir_node *block = get_nodes_block(node);
2389 ir_node *new_block = be_transform_node(block);
2390 ir_graph *irg = current_ir_graph;
2391 dbg_info *dbgi = get_irn_dbg_info(node);
2392 ir_node *sel = get_Cond_selector(node);
2393 ir_mode *sel_mode = get_irn_mode(sel);
2394 ir_node *flags = NULL;
2398 if (sel_mode != mode_b) {
2399 return create_Switch(node);
2402 /* we get flags from a cmp */
2403 flags = get_flags_node(sel, &pnc);
2405 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2406 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2414 * Transforms a CopyB node.
2416 * @return The transformed node.
2418 static ir_node *gen_CopyB(ir_node *node) {
2419 ir_node *block = be_transform_node(get_nodes_block(node));
2420 ir_node *src = get_CopyB_src(node);
2421 ir_node *new_src = be_transform_node(src);
2422 ir_node *dst = get_CopyB_dst(node);
2423 ir_node *new_dst = be_transform_node(dst);
2424 ir_node *mem = get_CopyB_mem(node);
2425 ir_node *new_mem = be_transform_node(mem);
2426 ir_node *res = NULL;
2427 ir_graph *irg = current_ir_graph;
2428 dbg_info *dbgi = get_irn_dbg_info(node);
2429 int size = get_type_size_bytes(get_CopyB_type(node));
2432 /* If we have to copy more than 32 bytes, we use REP MOVSx and */
2433 /* then we need the size explicitly in ECX. */
2434 if (size >= 32 * 4) {
2435 rem = size & 0x3; /* size % 4 */
2438 res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
2439 add_irn_dep(res, get_irg_frame(irg));
2441 res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
2444 ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
2447 res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
2450 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
2455 static ir_node *gen_be_Copy(ir_node *node)
2457 ir_node *new_node = be_duplicate_node(node);
2458 ir_mode *mode = get_irn_mode(new_node);
2460 if (mode_needs_gp_reg(mode)) {
2461 set_irn_mode(new_node, mode_Iu);
2467 static ir_node *create_Fucom(ir_node *node)
2469 ir_graph *irg = current_ir_graph;
2470 dbg_info *dbgi = get_irn_dbg_info(node);
2471 ir_node *block = get_nodes_block(node);
2472 ir_node *new_block = be_transform_node(block);
2473 ir_node *left = get_Cmp_left(node);
2474 ir_node *new_left = be_transform_node(left);
2475 ir_node *right = get_Cmp_right(node);
2479 if(ia32_cg_config.use_fucomi) {
2480 new_right = be_transform_node(right);
2481 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2483 set_ia32_commutative(new_node);
2484 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2486 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2487 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2490 new_right = be_transform_node(right);
2491 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2495 set_ia32_commutative(new_node);
2497 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2499 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2500 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2506 static ir_node *create_Ucomi(ir_node *node)
2508 ir_graph *irg = current_ir_graph;
2509 dbg_info *dbgi = get_irn_dbg_info(node);
2510 ir_node *src_block = get_nodes_block(node);
2511 ir_node *new_block = be_transform_node(src_block);
2512 ir_node *left = get_Cmp_left(node);
2513 ir_node *right = get_Cmp_right(node);
2515 ia32_address_mode_t am;
2516 ia32_address_t *addr = &am.addr;
2518 match_arguments(&am, src_block, left, right, NULL,
2519 match_commutative | match_am);
2521 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2522 addr->mem, am.new_op1, am.new_op2,
2524 set_am_attributes(new_node, &am);
2526 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2528 new_node = fix_mem_proj(new_node, &am);
2534 * helper function: checks wether all Cmp projs are Lg or Eq which is needed
2535 * to fold an and into a test node
2537 static int can_fold_test_and(ir_node *node)
2539 const ir_edge_t *edge;
2541 /** we can only have eq and lg projs */
2542 foreach_out_edge(node, edge) {
2543 ir_node *proj = get_edge_src_irn(edge);
2544 pn_Cmp pnc = get_Proj_proj(proj);
2545 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2552 static ir_node *gen_Cmp(ir_node *node)
2554 ir_graph *irg = current_ir_graph;
2555 dbg_info *dbgi = get_irn_dbg_info(node);
2556 ir_node *block = get_nodes_block(node);
2557 ir_node *new_block = be_transform_node(block);
2558 ir_node *left = get_Cmp_left(node);
2559 ir_node *right = get_Cmp_right(node);
2560 ir_mode *cmp_mode = get_irn_mode(left);
2562 ia32_address_mode_t am;
2563 ia32_address_t *addr = &am.addr;
2566 if(mode_is_float(cmp_mode)) {
2567 if (ia32_cg_config.use_sse2) {
2568 return create_Ucomi(node);
2570 return create_Fucom(node);
2574 assert(mode_needs_gp_reg(cmp_mode));
2576 /* we prefer the Test instruction where possible except cases where
2577 * we can use SourceAM */
2578 cmp_unsigned = !mode_is_signed(cmp_mode);
2579 if (is_Const_0(right)) {
2581 get_irn_n_edges(left) == 1 &&
2582 can_fold_test_and(node)) {
2583 /* Test(and_left, and_right) */
2584 ir_node *and_left = get_And_left(left);
2585 ir_node *and_right = get_And_right(left);
2586 ir_mode *mode = get_irn_mode(and_left);
2588 match_arguments(&am, block, and_left, and_right, NULL,
2590 match_am | match_8bit_am | match_16bit_am |
2591 match_am_and_immediates | match_immediate |
2592 match_8bit | match_16bit);
2593 if (get_mode_size_bits(mode) == 8) {
2594 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2595 addr->index, addr->mem, am.new_op1,
2596 am.new_op2, am.ins_permuted,
2599 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2600 addr->index, addr->mem, am.new_op1,
2601 am.new_op2, am.ins_permuted, cmp_unsigned);
2604 match_arguments(&am, block, NULL, left, NULL,
2605 match_am | match_8bit_am | match_16bit_am |
2606 match_8bit | match_16bit);
2607 if (am.op_type == ia32_AddrModeS) {
2609 ir_node *imm_zero = try_create_Immediate(right, 0);
2610 if (get_mode_size_bits(cmp_mode) == 8) {
2611 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2612 addr->index, addr->mem, am.new_op2,
2613 imm_zero, am.ins_permuted,
2616 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2617 addr->index, addr->mem, am.new_op2,
2618 imm_zero, am.ins_permuted, cmp_unsigned);
2621 /* Test(left, left) */
2622 if (get_mode_size_bits(cmp_mode) == 8) {
2623 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2624 addr->index, addr->mem, am.new_op2,
2625 am.new_op2, am.ins_permuted,
2628 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2629 addr->index, addr->mem, am.new_op2,
2630 am.new_op2, am.ins_permuted,
2636 /* Cmp(left, right) */
2637 match_arguments(&am, block, left, right, NULL,
2638 match_commutative | match_am | match_8bit_am |
2639 match_16bit_am | match_am_and_immediates |
2640 match_immediate | match_8bit | match_16bit);
2641 if (get_mode_size_bits(cmp_mode) == 8) {
2642 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2643 addr->index, addr->mem, am.new_op1,
2644 am.new_op2, am.ins_permuted,
2647 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2648 addr->index, addr->mem, am.new_op1,
2649 am.new_op2, am.ins_permuted, cmp_unsigned);
2652 set_am_attributes(new_node, &am);
2653 assert(cmp_mode != NULL);
2654 set_ia32_ls_mode(new_node, cmp_mode);
2656 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2658 new_node = fix_mem_proj(new_node, &am);
2663 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2666 ir_graph *irg = current_ir_graph;
2667 dbg_info *dbgi = get_irn_dbg_info(node);
2668 ir_node *block = get_nodes_block(node);
2669 ir_node *new_block = be_transform_node(block);
2670 ir_node *val_true = get_Psi_val(node, 0);
2671 ir_node *val_false = get_Psi_default(node);
2673 match_flags_t match_flags;
2674 ia32_address_mode_t am;
2675 ia32_address_t *addr;
2677 assert(ia32_cg_config.use_cmov);
2678 assert(mode_needs_gp_reg(get_irn_mode(val_true)));
2682 match_flags = match_commutative | match_am | match_16bit_am |
2685 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2687 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2688 addr->mem, am.new_op1, am.new_op2, new_flags,
2689 am.ins_permuted, pnc);
2690 set_am_attributes(new_node, &am);
2692 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2694 new_node = fix_mem_proj(new_node, &am);
2701 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2702 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2705 ir_graph *irg = current_ir_graph;
2706 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2707 ir_node *nomem = new_NoMem();
2708 ir_mode *mode = get_irn_mode(orig_node);
2711 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2712 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2714 /* we might need to conv the result up */
2715 if(get_mode_size_bits(mode) > 8) {
2716 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2717 nomem, new_node, mode_Bu);
2718 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2725 * Transforms a Psi node into CMov.
2727 * @return The transformed node.
2729 static ir_node *gen_Psi(ir_node *node)
2731 dbg_info *dbgi = get_irn_dbg_info(node);
2732 ir_node *block = get_nodes_block(node);
2733 ir_node *new_block = be_transform_node(block);
2734 ir_node *psi_true = get_Psi_val(node, 0);
2735 ir_node *psi_default = get_Psi_default(node);
2736 ir_node *cond = get_Psi_cond(node, 0);
2737 ir_node *flags = NULL;
2741 assert(get_Psi_n_conds(node) == 1);
2742 assert(get_irn_mode(cond) == mode_b);
2743 assert(mode_needs_gp_reg(get_irn_mode(node)));
2745 flags = get_flags_node(cond, &pnc);
2747 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2748 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 0);
2749 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2750 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 1);
2752 new_node = create_CMov(node, cond, flags, pnc);
2759 * Create a conversion from x87 state register to general purpose.
2761 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
2762 ir_node *block = be_transform_node(get_nodes_block(node));
2763 ir_node *op = get_Conv_op(node);
2764 ir_node *new_op = be_transform_node(op);
2765 ia32_code_gen_t *cg = env_cg;
2766 ir_graph *irg = current_ir_graph;
2767 dbg_info *dbgi = get_irn_dbg_info(node);
2768 ir_node *noreg = ia32_new_NoReg_gp(cg);
2769 ir_node *trunc_mode = ia32_new_Fpu_truncate(cg);
2770 ir_mode *mode = get_irn_mode(node);
2771 ir_node *fist, *load;
2774 fist = new_rd_ia32_vfist(dbgi, irg, block, get_irg_frame(irg), noreg,
2775 new_NoMem(), new_op, trunc_mode);
2777 set_irn_pinned(fist, op_pin_state_floats);
2778 set_ia32_use_frame(fist);
2779 set_ia32_op_type(fist, ia32_AddrModeD);
2781 assert(get_mode_size_bits(mode) <= 32);
2782 /* exception we can only store signed 32 bit integers, so for unsigned
2783 we store a 64bit (signed) integer and load the lower bits */
2784 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
2785 set_ia32_ls_mode(fist, mode_Ls);
2787 set_ia32_ls_mode(fist, mode_Is);
2789 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
2792 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, fist);
2794 set_irn_pinned(load, op_pin_state_floats);
2795 set_ia32_use_frame(load);
2796 set_ia32_op_type(load, ia32_AddrModeS);
2797 set_ia32_ls_mode(load, mode_Is);
2798 if(get_ia32_ls_mode(fist) == mode_Ls) {
2799 ia32_attr_t *attr = get_ia32_attr(load);
2800 attr->data.need_64bit_stackent = 1;
2802 ia32_attr_t *attr = get_ia32_attr(load);
2803 attr->data.need_32bit_stackent = 1;
2805 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
2807 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
2811 * Creates a x87 strict Conv by placing a Sore and a Load
2813 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
2815 ir_node *block = get_nodes_block(node);
2816 ir_graph *irg = current_ir_graph;
2817 dbg_info *dbgi = get_irn_dbg_info(node);
2818 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2819 ir_node *nomem = new_NoMem();
2820 ir_node *frame = get_irg_frame(irg);
2821 ir_node *store, *load;
2824 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
2826 set_ia32_use_frame(store);
2827 set_ia32_op_type(store, ia32_AddrModeD);
2828 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2830 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
2832 set_ia32_use_frame(load);
2833 set_ia32_op_type(load, ia32_AddrModeS);
2834 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
2836 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
2841 * Create a conversion from general purpose to x87 register
2843 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
2844 ir_node *src_block = get_nodes_block(node);
2845 ir_node *block = be_transform_node(src_block);
2846 ir_graph *irg = current_ir_graph;
2847 dbg_info *dbgi = get_irn_dbg_info(node);
2848 ir_node *op = get_Conv_op(node);
2849 ir_node *new_op = NULL;
2853 ir_mode *store_mode;
2859 /* fild can use source AM if the operand is a signed 32bit integer */
2860 if (src_mode == mode_Is) {
2861 ia32_address_mode_t am;
2863 match_arguments(&am, src_block, NULL, op, NULL,
2864 match_am | match_try_am);
2865 if (am.op_type == ia32_AddrModeS) {
2866 ia32_address_t *addr = &am.addr;
2868 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
2869 addr->index, addr->mem);
2870 new_node = new_r_Proj(irg, block, fild, mode_vfp,
2873 set_am_attributes(fild, &am);
2874 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
2876 fix_mem_proj(fild, &am);
2881 if(new_op == NULL) {
2882 new_op = be_transform_node(op);
2885 noreg = ia32_new_NoReg_gp(env_cg);
2886 nomem = new_NoMem();
2887 mode = get_irn_mode(op);
2889 /* first convert to 32 bit signed if necessary */
2890 src_bits = get_mode_size_bits(src_mode);
2891 if (src_bits == 8) {
2892 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
2894 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2896 } else if (src_bits < 32) {
2897 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
2899 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2903 assert(get_mode_size_bits(mode) == 32);
2906 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
2909 set_ia32_use_frame(store);
2910 set_ia32_op_type(store, ia32_AddrModeD);
2911 set_ia32_ls_mode(store, mode_Iu);
2913 /* exception for 32bit unsigned, do a 64bit spill+load */
2914 if(!mode_is_signed(mode)) {
2917 ir_node *zero_const = create_Immediate(NULL, 0, 0);
2919 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
2920 get_irg_frame(irg), noreg, nomem,
2923 set_ia32_use_frame(zero_store);
2924 set_ia32_op_type(zero_store, ia32_AddrModeD);
2925 add_ia32_am_offs_int(zero_store, 4);
2926 set_ia32_ls_mode(zero_store, mode_Iu);
2931 store = new_rd_Sync(dbgi, irg, block, 2, in);
2932 store_mode = mode_Ls;
2934 store_mode = mode_Is;
2938 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
2940 set_ia32_use_frame(fild);
2941 set_ia32_op_type(fild, ia32_AddrModeS);
2942 set_ia32_ls_mode(fild, store_mode);
2944 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
2950 * Create a conversion from one integer mode into another one
2952 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
2953 dbg_info *dbgi, ir_node *block, ir_node *op,
2956 ir_graph *irg = current_ir_graph;
2957 int src_bits = get_mode_size_bits(src_mode);
2958 int tgt_bits = get_mode_size_bits(tgt_mode);
2959 ir_node *new_block = be_transform_node(block);
2961 ir_mode *smaller_mode;
2963 ia32_address_mode_t am;
2964 ia32_address_t *addr = &am.addr;
2967 if (src_bits < tgt_bits) {
2968 smaller_mode = src_mode;
2969 smaller_bits = src_bits;
2971 smaller_mode = tgt_mode;
2972 smaller_bits = tgt_bits;
2975 #ifdef DEBUG_libfirm
2977 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
2982 match_arguments(&am, block, NULL, op, NULL,
2983 match_8bit | match_16bit |
2984 match_am | match_8bit_am | match_16bit_am);
2985 if (smaller_bits == 8) {
2986 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
2987 addr->index, addr->mem, am.new_op2,
2990 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
2991 addr->index, addr->mem, am.new_op2,
2994 set_am_attributes(new_node, &am);
2995 /* match_arguments assume that out-mode = in-mode, this isn't true here
2997 set_ia32_ls_mode(new_node, smaller_mode);
2998 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2999 new_node = fix_mem_proj(new_node, &am);
3004 * Transforms a Conv node.
3006 * @return The created ia32 Conv node
3008 static ir_node *gen_Conv(ir_node *node) {
3009 ir_node *block = get_nodes_block(node);
3010 ir_node *new_block = be_transform_node(block);
3011 ir_node *op = get_Conv_op(node);
3012 ir_node *new_op = NULL;
3013 ir_graph *irg = current_ir_graph;
3014 dbg_info *dbgi = get_irn_dbg_info(node);
3015 ir_mode *src_mode = get_irn_mode(op);
3016 ir_mode *tgt_mode = get_irn_mode(node);
3017 int src_bits = get_mode_size_bits(src_mode);
3018 int tgt_bits = get_mode_size_bits(tgt_mode);
3019 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3020 ir_node *nomem = new_rd_NoMem(irg);
3021 ir_node *res = NULL;
3023 if (src_mode == mode_b) {
3024 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3025 /* nothing to do, we already model bools as 0/1 ints */
3026 return be_transform_node(op);
3029 if (src_mode == tgt_mode) {
3030 if (get_Conv_strict(node)) {
3031 if (ia32_cg_config.use_sse2) {
3032 /* when we are in SSE mode, we can kill all strict no-op conversion */
3033 return be_transform_node(op);
3036 /* this should be optimized already, but who knows... */
3037 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3038 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3039 return be_transform_node(op);
3043 if (mode_is_float(src_mode)) {
3044 new_op = be_transform_node(op);
3045 /* we convert from float ... */
3046 if (mode_is_float(tgt_mode)) {
3047 if(src_mode == mode_E && tgt_mode == mode_D
3048 && !get_Conv_strict(node)) {
3049 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3054 if (ia32_cg_config.use_sse2) {
3055 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3056 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3058 set_ia32_ls_mode(res, tgt_mode);
3060 if(get_Conv_strict(node)) {
3061 res = gen_x87_strict_conv(tgt_mode, new_op);
3062 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3065 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3070 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3071 if (ia32_cg_config.use_sse2) {
3072 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3074 set_ia32_ls_mode(res, src_mode);
3076 return gen_x87_fp_to_gp(node);
3080 /* we convert from int ... */
3081 if (mode_is_float(tgt_mode)) {
3083 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3084 if (ia32_cg_config.use_sse2) {
3085 new_op = be_transform_node(op);
3086 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3088 set_ia32_ls_mode(res, tgt_mode);
3090 res = gen_x87_gp_to_fp(node, src_mode);
3091 if(get_Conv_strict(node)) {
3092 res = gen_x87_strict_conv(tgt_mode, res);
3093 SET_IA32_ORIG_NODE(get_Proj_pred(res),
3094 ia32_get_old_node_name(env_cg, node));
3098 } else if(tgt_mode == mode_b) {
3099 /* mode_b lowering already took care that we only have 0/1 values */
3100 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3101 src_mode, tgt_mode));
3102 return be_transform_node(op);
3105 if (src_bits == tgt_bits) {
3106 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3107 src_mode, tgt_mode));
3108 return be_transform_node(op);
3111 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3119 static int check_immediate_constraint(long val, char immediate_constraint_type)
3121 switch (immediate_constraint_type) {
3125 return val >= 0 && val <= 32;
3127 return val >= 0 && val <= 63;
3129 return val >= -128 && val <= 127;
3131 return val == 0xff || val == 0xffff;
3133 return val >= 0 && val <= 3;
3135 return val >= 0 && val <= 255;
3137 return val >= 0 && val <= 127;
3141 panic("Invalid immediate constraint found");
3145 static ir_node *try_create_Immediate(ir_node *node,
3146 char immediate_constraint_type)
3149 tarval *offset = NULL;
3150 int offset_sign = 0;
3152 ir_entity *symconst_ent = NULL;
3153 int symconst_sign = 0;
3155 ir_node *cnst = NULL;
3156 ir_node *symconst = NULL;
3159 mode = get_irn_mode(node);
3160 if(!mode_is_int(mode) && !mode_is_reference(mode)) {
3164 if(is_Minus(node)) {
3166 node = get_Minus_op(node);
3169 if(is_Const(node)) {
3172 offset_sign = minus;
3173 } else if(is_SymConst(node)) {
3176 symconst_sign = minus;
3177 } else if(is_Add(node)) {
3178 ir_node *left = get_Add_left(node);
3179 ir_node *right = get_Add_right(node);
3180 if(is_Const(left) && is_SymConst(right)) {
3183 symconst_sign = minus;
3184 offset_sign = minus;
3185 } else if(is_SymConst(left) && is_Const(right)) {
3188 symconst_sign = minus;
3189 offset_sign = minus;
3191 } else if(is_Sub(node)) {
3192 ir_node *left = get_Sub_left(node);
3193 ir_node *right = get_Sub_right(node);
3194 if(is_Const(left) && is_SymConst(right)) {
3197 symconst_sign = !minus;
3198 offset_sign = minus;
3199 } else if(is_SymConst(left) && is_Const(right)) {
3202 symconst_sign = minus;
3203 offset_sign = !minus;
3210 offset = get_Const_tarval(cnst);
3211 if(tarval_is_long(offset)) {
3212 val = get_tarval_long(offset);
3214 ir_fprintf(stderr, "Optimisation Warning: tarval from %+F is not a "
3219 if(!check_immediate_constraint(val, immediate_constraint_type))
3222 if(symconst != NULL) {
3223 if(immediate_constraint_type != 0) {
3224 /* we need full 32bits for symconsts */
3228 /* unfortunately the assembler/linker doesn't support -symconst */
3232 if(get_SymConst_kind(symconst) != symconst_addr_ent)
3234 symconst_ent = get_SymConst_entity(symconst);
3236 if(cnst == NULL && symconst == NULL)
3239 if(offset_sign && offset != NULL) {
3240 offset = tarval_neg(offset);
3243 new_node = create_Immediate(symconst_ent, symconst_sign, val);
3248 static ir_node *create_immediate_or_transform(ir_node *node,
3249 char immediate_constraint_type)
3251 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3252 if (new_node == NULL) {
3253 new_node = be_transform_node(node);
3258 static const arch_register_req_t no_register_req = {
3259 arch_register_req_type_none,
3260 NULL, /* regclass */
3261 NULL, /* limit bitset */
3263 0 /* different pos */
3267 * An assembler constraint.
3269 typedef struct constraint_t constraint_t;
3270 struct constraint_t {
3273 const arch_register_req_t **out_reqs;
3275 const arch_register_req_t *req;
3276 unsigned immediate_possible;
3277 char immediate_type;
3280 static void parse_asm_constraint(int pos, constraint_t *constraint, const char *c)
3282 int immediate_possible = 0;
3283 char immediate_type = 0;
3284 unsigned limited = 0;
3285 const arch_register_class_t *cls = NULL;
3286 ir_graph *irg = current_ir_graph;
3287 struct obstack *obst = get_irg_obstack(irg);
3288 arch_register_req_t *req;
3289 unsigned *limited_ptr = NULL;
3293 /* TODO: replace all the asserts with nice error messages */
3296 /* a memory constraint: no need to do anything in backend about it
3297 * (the dependencies are already respected by the memory edge of
3299 constraint->req = &no_register_req;
3311 assert(cls == NULL ||
3312 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3313 cls = &ia32_reg_classes[CLASS_ia32_gp];
3314 limited |= 1 << REG_EAX;
3317 assert(cls == NULL ||
3318 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3319 cls = &ia32_reg_classes[CLASS_ia32_gp];
3320 limited |= 1 << REG_EBX;
3323 assert(cls == NULL ||
3324 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3325 cls = &ia32_reg_classes[CLASS_ia32_gp];
3326 limited |= 1 << REG_ECX;
3329 assert(cls == NULL ||
3330 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3331 cls = &ia32_reg_classes[CLASS_ia32_gp];
3332 limited |= 1 << REG_EDX;
3335 assert(cls == NULL ||
3336 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3337 cls = &ia32_reg_classes[CLASS_ia32_gp];
3338 limited |= 1 << REG_EDI;
3341 assert(cls == NULL ||
3342 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3343 cls = &ia32_reg_classes[CLASS_ia32_gp];
3344 limited |= 1 << REG_ESI;
3347 case 'q': /* q means lower part of the regs only, this makes no
3348 * difference to Q for us (we only assigne whole registers) */
3349 assert(cls == NULL ||
3350 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3351 cls = &ia32_reg_classes[CLASS_ia32_gp];
3352 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3356 assert(cls == NULL ||
3357 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3358 cls = &ia32_reg_classes[CLASS_ia32_gp];
3359 limited |= 1 << REG_EAX | 1 << REG_EDX;
3362 assert(cls == NULL ||
3363 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3364 cls = &ia32_reg_classes[CLASS_ia32_gp];
3365 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3366 1 << REG_EDX | 1 << REG_ESI | 1 << REG_EDI |
3373 assert(cls == NULL);
3374 cls = &ia32_reg_classes[CLASS_ia32_gp];
3380 /* TODO: mark values so the x87 simulator knows about t and u */
3381 assert(cls == NULL);
3382 cls = &ia32_reg_classes[CLASS_ia32_vfp];
3387 assert(cls == NULL);
3388 /* TODO: check that sse2 is supported */
3389 cls = &ia32_reg_classes[CLASS_ia32_xmm];
3399 assert(!immediate_possible);
3400 immediate_possible = 1;
3401 immediate_type = *c;
3405 assert(!immediate_possible);
3406 immediate_possible = 1;
3410 assert(!immediate_possible && cls == NULL);
3411 immediate_possible = 1;
3412 cls = &ia32_reg_classes[CLASS_ia32_gp];
3425 assert(constraint->is_in && "can only specify same constraint "
3428 sscanf(c, "%d%n", &same_as, &p);
3436 /* memory constraint no need to do anything in backend about it
3437 * (the dependencies are already respected by the memory edge of
3439 constraint->req = &no_register_req;
3442 case 'E': /* no float consts yet */
3443 case 'F': /* no float consts yet */
3444 case 's': /* makes no sense on x86 */
3445 case 'X': /* we can't support that in firm */
3448 case '<': /* no autodecrement on x86 */
3449 case '>': /* no autoincrement on x86 */
3450 case 'C': /* sse constant not supported yet */
3451 case 'G': /* 80387 constant not supported yet */
3452 case 'y': /* we don't support mmx registers yet */
3453 case 'Z': /* not available in 32 bit mode */
3454 case 'e': /* not available in 32 bit mode */
3455 panic("unsupported asm constraint '%c' found in (%+F)",
3456 *c, current_ir_graph);
3459 panic("unknown asm constraint '%c' found in (%+F)", *c,
3467 const arch_register_req_t *other_constr;
3469 assert(cls == NULL && "same as and register constraint not supported");
3470 assert(!immediate_possible && "same as and immediate constraint not "
3472 assert(same_as < constraint->n_outs && "wrong constraint number in "
3473 "same_as constraint");
3475 other_constr = constraint->out_reqs[same_as];
3477 req = obstack_alloc(obst, sizeof(req[0]));
3478 req->cls = other_constr->cls;
3479 req->type = arch_register_req_type_should_be_same;
3480 req->limited = NULL;
3481 req->other_same = 1U << pos;
3482 req->other_different = 0;
3484 /* switch constraints. This is because in firm we have same_as
3485 * constraints on the output constraints while in the gcc asm syntax
3486 * they are specified on the input constraints */
3487 constraint->req = other_constr;
3488 constraint->out_reqs[same_as] = req;
3489 constraint->immediate_possible = 0;
3493 if(immediate_possible && cls == NULL) {
3494 cls = &ia32_reg_classes[CLASS_ia32_gp];
3496 assert(!immediate_possible || cls == &ia32_reg_classes[CLASS_ia32_gp]);
3497 assert(cls != NULL);
3499 if(immediate_possible) {
3500 assert(constraint->is_in
3501 && "immediate make no sense for output constraints");
3503 /* todo: check types (no float input on 'r' constrained in and such... */
3506 req = obstack_alloc(obst, sizeof(req[0]) + sizeof(unsigned));
3507 limited_ptr = (unsigned*) (req+1);
3509 req = obstack_alloc(obst, sizeof(req[0]));
3511 memset(req, 0, sizeof(req[0]));
3514 req->type = arch_register_req_type_limited;
3515 *limited_ptr = limited;
3516 req->limited = limited_ptr;
3518 req->type = arch_register_req_type_normal;
3522 constraint->req = req;
3523 constraint->immediate_possible = immediate_possible;
3524 constraint->immediate_type = immediate_type;
3527 static void parse_clobber(ir_node *node, int pos, constraint_t *constraint,
3528 const char *clobber)
3530 ir_graph *irg = get_irn_irg(node);
3531 struct obstack *obst = get_irg_obstack(irg);
3532 const arch_register_t *reg = NULL;
3535 arch_register_req_t *req;
3536 const arch_register_class_t *cls;
3541 /* TODO: construct a hashmap instead of doing linear search for clobber
3543 for(c = 0; c < N_CLASSES; ++c) {
3544 cls = & ia32_reg_classes[c];
3545 for(r = 0; r < cls->n_regs; ++r) {
3546 const arch_register_t *temp_reg = arch_register_for_index(cls, r);
3547 if(strcmp(temp_reg->name, clobber) == 0
3548 || (c == CLASS_ia32_gp && strcmp(temp_reg->name+1, clobber) == 0)) {
3557 panic("Register '%s' mentioned in asm clobber is unknown\n", clobber);
3561 assert(reg->index < 32);
3563 limited = obstack_alloc(obst, sizeof(limited[0]));
3564 *limited = 1 << reg->index;
3566 req = obstack_alloc(obst, sizeof(req[0]));
3567 memset(req, 0, sizeof(req[0]));
3568 req->type = arch_register_req_type_limited;
3570 req->limited = limited;
3572 constraint->req = req;
3573 constraint->immediate_possible = 0;
3574 constraint->immediate_type = 0;
3577 static int is_memory_op(const ir_asm_constraint *constraint)
3579 ident *id = constraint->constraint;
3580 const char *str = get_id_str(id);
3583 for(c = str; *c != '\0'; ++c) {
3592 * generates code for a ASM node
3594 static ir_node *gen_ASM(ir_node *node)
3597 ir_graph *irg = current_ir_graph;
3598 ir_node *block = get_nodes_block(node);
3599 ir_node *new_block = be_transform_node(block);
3600 dbg_info *dbgi = get_irn_dbg_info(node);
3604 int n_out_constraints;
3606 const arch_register_req_t **out_reg_reqs;
3607 const arch_register_req_t **in_reg_reqs;
3608 ia32_asm_reg_t *register_map;
3609 unsigned reg_map_size = 0;
3610 struct obstack *obst;
3611 const ir_asm_constraint *in_constraints;
3612 const ir_asm_constraint *out_constraints;
3614 constraint_t parsed_constraint;
3616 arity = get_irn_arity(node);
3617 in = alloca(arity * sizeof(in[0]));
3618 memset(in, 0, arity * sizeof(in[0]));
3620 n_out_constraints = get_ASM_n_output_constraints(node);
3621 n_clobbers = get_ASM_n_clobbers(node);
3622 out_arity = n_out_constraints + n_clobbers;
3623 /* hack to keep space for mem proj */
3627 in_constraints = get_ASM_input_constraints(node);
3628 out_constraints = get_ASM_output_constraints(node);
3629 clobbers = get_ASM_clobbers(node);
3631 /* construct output constraints */
3632 obst = get_irg_obstack(irg);
3633 out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
3634 parsed_constraint.out_reqs = out_reg_reqs;
3635 parsed_constraint.n_outs = n_out_constraints;
3636 parsed_constraint.is_in = 0;
3638 for(i = 0; i < out_arity; ++i) {
3641 if(i < n_out_constraints) {
3642 const ir_asm_constraint *constraint = &out_constraints[i];
3643 c = get_id_str(constraint->constraint);
3644 parse_asm_constraint(i, &parsed_constraint, c);
3646 if(constraint->pos > reg_map_size)
3647 reg_map_size = constraint->pos;
3649 out_reg_reqs[i] = parsed_constraint.req;
3650 } else if(i < out_arity - 1) {
3651 ident *glob_id = clobbers [i - n_out_constraints];
3652 assert(glob_id != NULL);
3653 c = get_id_str(glob_id);
3654 parse_clobber(node, i, &parsed_constraint, c);
3656 out_reg_reqs[i+1] = parsed_constraint.req;
3660 out_reg_reqs[n_out_constraints] = &no_register_req;
3662 /* construct input constraints */
3663 in_reg_reqs = obstack_alloc(obst, arity * sizeof(in_reg_reqs[0]));
3664 parsed_constraint.is_in = 1;
3665 for(i = 0; i < arity; ++i) {
3666 const ir_asm_constraint *constraint = &in_constraints[i];
3667 ident *constr_id = constraint->constraint;
3668 const char *c = get_id_str(constr_id);
3670 parse_asm_constraint(i, &parsed_constraint, c);
3671 in_reg_reqs[i] = parsed_constraint.req;
3673 if(constraint->pos > reg_map_size)
3674 reg_map_size = constraint->pos;
3676 if(parsed_constraint.immediate_possible) {
3677 ir_node *pred = get_irn_n(node, i);
3678 char imm_type = parsed_constraint.immediate_type;
3679 ir_node *immediate = try_create_Immediate(pred, imm_type);
3681 if(immediate != NULL) {
3688 register_map = NEW_ARR_D(ia32_asm_reg_t, obst, reg_map_size);
3689 memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
3691 for(i = 0; i < n_out_constraints; ++i) {
3692 const ir_asm_constraint *constraint = &out_constraints[i];
3693 unsigned pos = constraint->pos;
3695 assert(pos < reg_map_size);
3696 register_map[pos].use_input = 0;
3697 register_map[pos].valid = 1;
3698 register_map[pos].memory = is_memory_op(constraint);
3699 register_map[pos].inout_pos = i;
3700 register_map[pos].mode = constraint->mode;
3703 /* transform inputs */
3704 for(i = 0; i < arity; ++i) {
3705 const ir_asm_constraint *constraint = &in_constraints[i];
3706 unsigned pos = constraint->pos;
3707 ir_node *pred = get_irn_n(node, i);
3708 ir_node *transformed;
3710 assert(pos < reg_map_size);
3711 register_map[pos].use_input = 1;
3712 register_map[pos].valid = 1;
3713 register_map[pos].memory = is_memory_op(constraint);
3714 register_map[pos].inout_pos = i;
3715 register_map[pos].mode = constraint->mode;
3720 transformed = be_transform_node(pred);
3721 in[i] = transformed;
3724 new_node = new_rd_ia32_Asm(dbgi, irg, new_block, arity, in, out_arity,
3725 get_ASM_text(node), register_map);
3727 set_ia32_out_req_all(new_node, out_reg_reqs);
3728 set_ia32_in_req_all(new_node, in_reg_reqs);
3730 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3736 * Transforms a FrameAddr into an ia32 Add.
3738 static ir_node *gen_be_FrameAddr(ir_node *node) {
3739 ir_node *block = be_transform_node(get_nodes_block(node));
3740 ir_node *op = be_get_FrameAddr_frame(node);
3741 ir_node *new_op = be_transform_node(op);
3742 ir_graph *irg = current_ir_graph;
3743 dbg_info *dbgi = get_irn_dbg_info(node);
3744 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3747 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3748 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3749 set_ia32_use_frame(new_node);
3751 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3757 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3759 static ir_node *gen_be_Return(ir_node *node) {
3760 ir_graph *irg = current_ir_graph;
3761 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3762 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3763 ir_entity *ent = get_irg_entity(irg);
3764 ir_type *tp = get_entity_type(ent);
3769 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3770 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3773 int pn_ret_val, pn_ret_mem, arity, i;
3775 assert(ret_val != NULL);
3776 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3777 return be_duplicate_node(node);
3780 res_type = get_method_res_type(tp, 0);
3782 if (! is_Primitive_type(res_type)) {
3783 return be_duplicate_node(node);
3786 mode = get_type_mode(res_type);
3787 if (! mode_is_float(mode)) {
3788 return be_duplicate_node(node);
3791 assert(get_method_n_ress(tp) == 1);
3793 pn_ret_val = get_Proj_proj(ret_val);
3794 pn_ret_mem = get_Proj_proj(ret_mem);
3796 /* get the Barrier */
3797 barrier = get_Proj_pred(ret_val);
3799 /* get result input of the Barrier */
3800 ret_val = get_irn_n(barrier, pn_ret_val);
3801 new_ret_val = be_transform_node(ret_val);
3803 /* get memory input of the Barrier */
3804 ret_mem = get_irn_n(barrier, pn_ret_mem);
3805 new_ret_mem = be_transform_node(ret_mem);
3807 frame = get_irg_frame(irg);
3809 dbgi = get_irn_dbg_info(barrier);
3810 block = be_transform_node(get_nodes_block(barrier));
3812 noreg = ia32_new_NoReg_gp(env_cg);
3814 /* store xmm0 onto stack */
3815 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3816 new_ret_mem, new_ret_val);
3817 set_ia32_ls_mode(sse_store, mode);
3818 set_ia32_op_type(sse_store, ia32_AddrModeD);
3819 set_ia32_use_frame(sse_store);
3821 /* load into x87 register */
3822 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3823 set_ia32_op_type(fld, ia32_AddrModeS);
3824 set_ia32_use_frame(fld);
3826 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3827 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3829 /* create a new barrier */
3830 arity = get_irn_arity(barrier);
3831 in = alloca(arity * sizeof(in[0]));
3832 for (i = 0; i < arity; ++i) {
3835 if (i == pn_ret_val) {
3837 } else if (i == pn_ret_mem) {
3840 ir_node *in = get_irn_n(barrier, i);
3841 new_in = be_transform_node(in);
3846 new_barrier = new_ir_node(dbgi, irg, block,
3847 get_irn_op(barrier), get_irn_mode(barrier),
3849 copy_node_attr(barrier, new_barrier);
3850 be_duplicate_deps(barrier, new_barrier);
3851 be_set_transformed_node(barrier, new_barrier);
3852 mark_irn_visited(barrier);
3854 /* transform normally */
3855 return be_duplicate_node(node);
3859 * Transform a be_AddSP into an ia32_SubSP.
3861 static ir_node *gen_be_AddSP(ir_node *node)
3863 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3864 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3866 return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
3870 * Transform a be_SubSP into an ia32_AddSP
3872 static ir_node *gen_be_SubSP(ir_node *node)
3874 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3875 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3877 return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
3881 * This function just sets the register for the Unknown node
3882 * as this is not done during register allocation because Unknown
3883 * is an "ignore" node.
3885 static ir_node *gen_Unknown(ir_node *node) {
3886 ir_mode *mode = get_irn_mode(node);
3888 if (mode_is_float(mode)) {
3889 if (ia32_cg_config.use_sse2) {
3890 return ia32_new_Unknown_xmm(env_cg);
3892 /* Unknown nodes are buggy in x87 sim, use zero for now... */
3893 ir_graph *irg = current_ir_graph;
3894 dbg_info *dbgi = get_irn_dbg_info(node);
3895 ir_node *block = get_irg_start_block(irg);
3896 return new_rd_ia32_vfldz(dbgi, irg, block);
3898 } else if (mode_needs_gp_reg(mode)) {
3899 return ia32_new_Unknown_gp(env_cg);
3901 panic("unsupported Unknown-Mode");
3907 * Change some phi modes
3909 static ir_node *gen_Phi(ir_node *node) {
3910 ir_node *block = be_transform_node(get_nodes_block(node));
3911 ir_graph *irg = current_ir_graph;
3912 dbg_info *dbgi = get_irn_dbg_info(node);
3913 ir_mode *mode = get_irn_mode(node);
3916 if(mode_needs_gp_reg(mode)) {
3917 /* we shouldn't have any 64bit stuff around anymore */
3918 assert(get_mode_size_bits(mode) <= 32);
3919 /* all integer operations are on 32bit registers now */
3921 } else if(mode_is_float(mode)) {
3922 if (ia32_cg_config.use_sse2) {
3929 /* phi nodes allow loops, so we use the old arguments for now
3930 * and fix this later */
3931 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3932 get_irn_in(node) + 1);
3933 copy_node_attr(node, phi);
3934 be_duplicate_deps(node, phi);
3936 be_set_transformed_node(node, phi);
3937 be_enqueue_preds(node);
3945 static ir_node *gen_IJmp(ir_node *node)
3947 ir_node *block = get_nodes_block(node);
3948 ir_node *new_block = be_transform_node(block);
3949 ir_graph *irg = current_ir_graph;
3950 dbg_info *dbgi = get_irn_dbg_info(node);
3951 ir_node *op = get_IJmp_target(node);
3953 ia32_address_mode_t am;
3954 ia32_address_t *addr = &am.addr;
3956 assert(get_irn_mode(op) == mode_P);
3958 match_arguments(&am, block, NULL, op, NULL,
3959 match_am | match_8bit_am | match_16bit_am |
3960 match_immediate | match_8bit | match_16bit);
3962 new_node = new_rd_ia32_IJmp(dbgi, irg, new_block, addr->base, addr->index,
3963 addr->mem, am.new_op2);
3964 set_am_attributes(new_node, &am);
3965 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3967 new_node = fix_mem_proj(new_node, &am);
3972 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
3975 typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
3976 ir_node *val, ir_node *mem);
3979 * Transforms a lowered Load into a "real" one.
3981 static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
3983 ir_node *block = be_transform_node(get_nodes_block(node));
3984 ir_node *ptr = get_irn_n(node, 0);
3985 ir_node *new_ptr = be_transform_node(ptr);
3986 ir_node *mem = get_irn_n(node, 1);
3987 ir_node *new_mem = be_transform_node(mem);
3988 ir_graph *irg = current_ir_graph;
3989 dbg_info *dbgi = get_irn_dbg_info(node);
3990 ir_mode *mode = get_ia32_ls_mode(node);
3991 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3994 new_op = func(dbgi, irg, block, new_ptr, noreg, new_mem);
3996 set_ia32_op_type(new_op, ia32_AddrModeS);
3997 set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
3998 set_ia32_am_scale(new_op, get_ia32_am_scale(node));
3999 set_ia32_am_sc(new_op, get_ia32_am_sc(node));
4000 if (is_ia32_am_sc_sign(node))
4001 set_ia32_am_sc_sign(new_op);
4002 set_ia32_ls_mode(new_op, mode);
4003 if (is_ia32_use_frame(node)) {
4004 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4005 set_ia32_use_frame(new_op);
4008 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4014 * Transforms a lowered Store into a "real" one.
4016 static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
4018 ir_node *block = be_transform_node(get_nodes_block(node));
4019 ir_node *ptr = get_irn_n(node, 0);
4020 ir_node *new_ptr = be_transform_node(ptr);
4021 ir_node *val = get_irn_n(node, 1);
4022 ir_node *new_val = be_transform_node(val);
4023 ir_node *mem = get_irn_n(node, 2);
4024 ir_node *new_mem = be_transform_node(mem);
4025 ir_graph *irg = current_ir_graph;
4026 dbg_info *dbgi = get_irn_dbg_info(node);
4027 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4028 ir_mode *mode = get_ia32_ls_mode(node);
4032 new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
4034 am_offs = get_ia32_am_offs_int(node);
4035 add_ia32_am_offs_int(new_op, am_offs);
4037 set_ia32_op_type(new_op, ia32_AddrModeD);
4038 set_ia32_ls_mode(new_op, mode);
4039 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4040 set_ia32_use_frame(new_op);
4042 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4047 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4049 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4050 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4052 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
4053 match_immediate | match_mode_neutral);
4056 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4058 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4059 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4060 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
4064 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4066 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4067 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4068 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
4072 static ir_node *gen_ia32_l_Add(ir_node *node) {
4073 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4074 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4075 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
4076 match_commutative | match_am | match_immediate |
4077 match_mode_neutral);
4079 if(is_Proj(lowered)) {
4080 lowered = get_Proj_pred(lowered);
4082 assert(is_ia32_Add(lowered));
4083 set_irn_mode(lowered, mode_T);
4089 static ir_node *gen_ia32_l_Adc(ir_node *node)
4091 return gen_binop_flags(node, new_rd_ia32_Adc,
4092 match_commutative | match_am | match_immediate |
4093 match_mode_neutral);
4097 * Transforms an ia32_l_vfild into a "real" ia32_vfild node
4099 * @param node The node to transform
4100 * @return the created ia32 vfild node
4102 static ir_node *gen_ia32_l_vfild(ir_node *node) {
4103 return gen_lowered_Load(node, new_rd_ia32_vfild);
4107 * Transforms an ia32_l_Load into a "real" ia32_Load node
4109 * @param node The node to transform
4110 * @return the created ia32 Load node
4112 static ir_node *gen_ia32_l_Load(ir_node *node) {
4113 return gen_lowered_Load(node, new_rd_ia32_Load);
4117 * Transforms an ia32_l_Store into a "real" ia32_Store node
4119 * @param node The node to transform
4120 * @return the created ia32 Store node
4122 static ir_node *gen_ia32_l_Store(ir_node *node) {
4123 return gen_lowered_Store(node, new_rd_ia32_Store);
4127 * Transforms a l_vfist into a "real" vfist node.
4129 * @param node The node to transform
4130 * @return the created ia32 vfist node
4132 static ir_node *gen_ia32_l_vfist(ir_node *node) {
4133 ir_node *block = be_transform_node(get_nodes_block(node));
4134 ir_node *ptr = get_irn_n(node, 0);
4135 ir_node *new_ptr = be_transform_node(ptr);
4136 ir_node *val = get_irn_n(node, 1);
4137 ir_node *new_val = be_transform_node(val);
4138 ir_node *mem = get_irn_n(node, 2);
4139 ir_node *new_mem = be_transform_node(mem);
4140 ir_graph *irg = current_ir_graph;
4141 dbg_info *dbgi = get_irn_dbg_info(node);
4142 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4143 ir_mode *mode = get_ia32_ls_mode(node);
4144 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4148 new_op = new_rd_ia32_vfist(dbgi, irg, block, new_ptr, noreg, new_mem,
4149 new_val, trunc_mode);
4151 am_offs = get_ia32_am_offs_int(node);
4152 add_ia32_am_offs_int(new_op, am_offs);
4154 set_ia32_op_type(new_op, ia32_AddrModeD);
4155 set_ia32_ls_mode(new_op, mode);
4156 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4157 set_ia32_use_frame(new_op);
4159 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4165 * Transforms a l_MulS into a "real" MulS node.
4167 * @return the created ia32 Mul node
4169 static ir_node *gen_ia32_l_Mul(ir_node *node) {
4170 ir_node *left = get_binop_left(node);
4171 ir_node *right = get_binop_right(node);
4173 return gen_binop(node, left, right, new_rd_ia32_Mul,
4174 match_commutative | match_am | match_mode_neutral);
4178 * Transforms a l_IMulS into a "real" IMul1OPS node.
4180 * @return the created ia32 IMul1OP node
4182 static ir_node *gen_ia32_l_IMul(ir_node *node) {
4183 ir_node *left = get_binop_left(node);
4184 ir_node *right = get_binop_right(node);
4186 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
4187 match_commutative | match_am | match_mode_neutral);
4190 static ir_node *gen_ia32_l_Sub(ir_node *node) {
4191 ir_node *left = get_irn_n(node, n_ia32_l_Sub_left);
4192 ir_node *right = get_irn_n(node, n_ia32_l_Sub_right);
4193 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
4194 match_am | match_immediate | match_mode_neutral);
4196 if(is_Proj(lowered)) {
4197 lowered = get_Proj_pred(lowered);
4199 assert(is_ia32_Sub(lowered));
4200 set_irn_mode(lowered, mode_T);
4206 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
4207 return gen_binop_flags(node, new_rd_ia32_Sbb,
4208 match_am | match_immediate | match_mode_neutral);
4212 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4213 * op1 - target to be shifted
4214 * op2 - contains bits to be shifted into target
4216 * Only op3 can be an immediate.
4218 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4219 ir_node *low, ir_node *count)
4221 ir_node *block = get_nodes_block(node);
4222 ir_node *new_block = be_transform_node(block);
4223 ir_graph *irg = current_ir_graph;
4224 dbg_info *dbgi = get_irn_dbg_info(node);
4225 ir_node *new_high = be_transform_node(high);
4226 ir_node *new_low = be_transform_node(low);
4230 /* the shift amount can be any mode that is bigger than 5 bits, since all
4231 * other bits are ignored anyway */
4232 while (is_Conv(count) && get_irn_n_edges(count) == 1) {
4233 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4234 count = get_Conv_op(count);
4236 new_count = create_immediate_or_transform(count, 0);
4238 if (is_ia32_l_ShlD(node)) {
4239 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
4242 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
4245 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4250 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4252 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4253 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4254 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4255 return gen_lowered_64bit_shifts(node, high, low, count);
4258 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4260 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4261 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4262 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4263 return gen_lowered_64bit_shifts(node, high, low, count);
4266 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
4267 ir_node *src_block = get_nodes_block(node);
4268 ir_node *block = be_transform_node(src_block);
4269 ir_graph *irg = current_ir_graph;
4270 dbg_info *dbgi = get_irn_dbg_info(node);
4271 ir_node *frame = get_irg_frame(irg);
4272 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4273 ir_node *nomem = new_NoMem();
4274 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4275 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4276 ir_node *new_val_low = be_transform_node(val_low);
4277 ir_node *new_val_high = be_transform_node(val_high);
4282 ir_node *store_high;
4284 if(!mode_is_signed(get_irn_mode(val_high))) {
4285 panic("unsigned long long -> float not supported yet (%+F)", node);
4289 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4291 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4293 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
4294 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
4296 set_ia32_use_frame(store_low);
4297 set_ia32_use_frame(store_high);
4298 set_ia32_op_type(store_low, ia32_AddrModeD);
4299 set_ia32_op_type(store_high, ia32_AddrModeD);
4300 set_ia32_ls_mode(store_low, mode_Iu);
4301 set_ia32_ls_mode(store_high, mode_Is);
4302 add_ia32_am_offs_int(store_high, 4);
4306 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4309 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
4311 set_ia32_use_frame(fild);
4312 set_ia32_op_type(fild, ia32_AddrModeS);
4313 set_ia32_ls_mode(fild, mode_Ls);
4315 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
4317 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4320 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
4321 ir_node *src_block = get_nodes_block(node);
4322 ir_node *block = be_transform_node(src_block);
4323 ir_graph *irg = current_ir_graph;
4324 dbg_info *dbgi = get_irn_dbg_info(node);
4325 ir_node *frame = get_irg_frame(irg);
4326 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4327 ir_node *nomem = new_NoMem();
4328 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4329 ir_node *new_val = be_transform_node(val);
4330 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4335 fist = new_rd_ia32_vfist(dbgi, irg, block, frame, noreg, nomem, new_val,
4337 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
4338 set_ia32_use_frame(fist);
4339 set_ia32_op_type(fist, ia32_AddrModeD);
4340 set_ia32_ls_mode(fist, mode_Ls);
4346 * the BAD transformer.
4348 static ir_node *bad_transform(ir_node *node) {
4349 panic("No transform function for %+F available.\n", node);
4353 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
4354 ir_graph *irg = current_ir_graph;
4355 ir_node *block = be_transform_node(get_nodes_block(node));
4356 ir_node *pred = get_Proj_pred(node);
4357 ir_node *new_pred = be_transform_node(pred);
4358 ir_node *frame = get_irg_frame(irg);
4359 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4360 dbg_info *dbgi = get_irn_dbg_info(node);
4361 long pn = get_Proj_proj(node);
4366 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
4367 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
4368 set_ia32_use_frame(load);
4369 set_ia32_op_type(load, ia32_AddrModeS);
4370 set_ia32_ls_mode(load, mode_Iu);
4371 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4372 * 32 bit from it with this particular load */
4373 attr = get_ia32_attr(load);
4374 attr->data.need_64bit_stackent = 1;
4376 if (pn == pn_ia32_l_FloattoLL_res_high) {
4377 add_ia32_am_offs_int(load, 4);
4379 assert(pn == pn_ia32_l_FloattoLL_res_low);
4382 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4388 * Transform the Projs of an AddSP.
4390 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
4391 ir_node *block = be_transform_node(get_nodes_block(node));
4392 ir_node *pred = get_Proj_pred(node);
4393 ir_node *new_pred = be_transform_node(pred);
4394 ir_graph *irg = current_ir_graph;
4395 dbg_info *dbgi = get_irn_dbg_info(node);
4396 long proj = get_Proj_proj(node);
4398 if (proj == pn_be_AddSP_sp) {
4399 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4400 pn_ia32_SubSP_stack);
4401 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4403 } else if(proj == pn_be_AddSP_res) {
4404 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4405 pn_ia32_SubSP_addr);
4406 } else if (proj == pn_be_AddSP_M) {
4407 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4411 return new_rd_Unknown(irg, get_irn_mode(node));
4415 * Transform the Projs of a SubSP.
4417 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
4418 ir_node *block = be_transform_node(get_nodes_block(node));
4419 ir_node *pred = get_Proj_pred(node);
4420 ir_node *new_pred = be_transform_node(pred);
4421 ir_graph *irg = current_ir_graph;
4422 dbg_info *dbgi = get_irn_dbg_info(node);
4423 long proj = get_Proj_proj(node);
4425 if (proj == pn_be_SubSP_sp) {
4426 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4427 pn_ia32_AddSP_stack);
4428 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4430 } else if (proj == pn_be_SubSP_M) {
4431 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4435 return new_rd_Unknown(irg, get_irn_mode(node));
4439 * Transform and renumber the Projs from a Load.
4441 static ir_node *gen_Proj_Load(ir_node *node) {
4443 ir_node *block = be_transform_node(get_nodes_block(node));
4444 ir_node *pred = get_Proj_pred(node);
4445 ir_graph *irg = current_ir_graph;
4446 dbg_info *dbgi = get_irn_dbg_info(node);
4447 long proj = get_Proj_proj(node);
4450 /* loads might be part of source address mode matches, so we don't
4451 transform the ProjMs yet (with the exception of loads whose result is
4454 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4457 assert(pn_ia32_Load_M == 1); /* convention: mem-result of Source-AM
4459 /* this is needed, because sometimes we have loops that are only
4460 reachable through the ProjM */
4461 be_enqueue_preds(node);
4462 /* do it in 2 steps, to silence firm verifier */
4463 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4464 set_Proj_proj(res, pn_ia32_Load_M);
4468 /* renumber the proj */
4469 new_pred = be_transform_node(pred);
4470 if (is_ia32_Load(new_pred)) {
4473 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4475 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4476 case pn_Load_X_regular:
4477 return new_rd_Jmp(dbgi, irg, block);
4478 case pn_Load_X_except:
4479 /* This Load might raise an exception. Mark it. */
4480 set_ia32_exc_label(new_pred, 1);
4481 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4485 } else if (is_ia32_Conv_I2I(new_pred) ||
4486 is_ia32_Conv_I2I8Bit(new_pred)) {
4487 set_irn_mode(new_pred, mode_T);
4488 if (proj == pn_Load_res) {
4489 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4490 } else if (proj == pn_Load_M) {
4491 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4493 } else if (is_ia32_xLoad(new_pred)) {
4496 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4498 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4499 case pn_Load_X_regular:
4500 return new_rd_Jmp(dbgi, irg, block);
4501 case pn_Load_X_except:
4502 /* This Load might raise an exception. Mark it. */
4503 set_ia32_exc_label(new_pred, 1);
4504 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4508 } else if (is_ia32_vfld(new_pred)) {
4511 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4513 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4514 case pn_Load_X_regular:
4515 return new_rd_Jmp(dbgi, irg, block);
4516 case pn_Load_X_except:
4517 /* This Load might raise an exception. Mark it. */
4518 set_ia32_exc_label(new_pred, 1);
4519 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4524 /* can happen for ProJMs when source address mode happened for the
4527 /* however it should not be the result proj, as that would mean the
4528 load had multiple users and should not have been used for
4530 if (proj != pn_Load_M) {
4531 panic("internal error: transformed node not a Load");
4533 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4537 return new_rd_Unknown(irg, get_irn_mode(node));
4541 * Transform and renumber the Projs from a DivMod like instruction.
4543 static ir_node *gen_Proj_DivMod(ir_node *node) {
4544 ir_node *block = be_transform_node(get_nodes_block(node));
4545 ir_node *pred = get_Proj_pred(node);
4546 ir_node *new_pred = be_transform_node(pred);
4547 ir_graph *irg = current_ir_graph;
4548 dbg_info *dbgi = get_irn_dbg_info(node);
4549 ir_mode *mode = get_irn_mode(node);
4550 long proj = get_Proj_proj(node);
4552 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4554 switch (get_irn_opcode(pred)) {
4558 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4560 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4561 case pn_Div_X_regular:
4562 return new_rd_Jmp(dbgi, irg, block);
4563 case pn_Div_X_except:
4564 set_ia32_exc_label(new_pred, 1);
4565 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4573 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4575 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4576 case pn_Mod_X_except:
4577 set_ia32_exc_label(new_pred, 1);
4578 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4586 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4587 case pn_DivMod_res_div:
4588 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4589 case pn_DivMod_res_mod:
4590 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4591 case pn_DivMod_X_regular:
4592 return new_rd_Jmp(dbgi, irg, block);
4593 case pn_DivMod_X_except:
4594 set_ia32_exc_label(new_pred, 1);
4595 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4605 return new_rd_Unknown(irg, mode);
4609 * Transform and renumber the Projs from a CopyB.
4611 static ir_node *gen_Proj_CopyB(ir_node *node) {
4612 ir_node *block = be_transform_node(get_nodes_block(node));
4613 ir_node *pred = get_Proj_pred(node);
4614 ir_node *new_pred = be_transform_node(pred);
4615 ir_graph *irg = current_ir_graph;
4616 dbg_info *dbgi = get_irn_dbg_info(node);
4617 ir_mode *mode = get_irn_mode(node);
4618 long proj = get_Proj_proj(node);
4621 case pn_CopyB_M_regular:
4622 if (is_ia32_CopyB_i(new_pred)) {
4623 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4624 } else if (is_ia32_CopyB(new_pred)) {
4625 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4633 return new_rd_Unknown(irg, mode);
4637 * Transform and renumber the Projs from a Quot.
4639 static ir_node *gen_Proj_Quot(ir_node *node) {
4640 ir_node *block = be_transform_node(get_nodes_block(node));
4641 ir_node *pred = get_Proj_pred(node);
4642 ir_node *new_pred = be_transform_node(pred);
4643 ir_graph *irg = current_ir_graph;
4644 dbg_info *dbgi = get_irn_dbg_info(node);
4645 ir_mode *mode = get_irn_mode(node);
4646 long proj = get_Proj_proj(node);
4650 if (is_ia32_xDiv(new_pred)) {
4651 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4652 } else if (is_ia32_vfdiv(new_pred)) {
4653 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4657 if (is_ia32_xDiv(new_pred)) {
4658 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4659 } else if (is_ia32_vfdiv(new_pred)) {
4660 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4668 return new_rd_Unknown(irg, mode);
4672 * Transform the Thread Local Storage Proj.
4674 static ir_node *gen_Proj_tls(ir_node *node) {
4675 ir_node *block = be_transform_node(get_nodes_block(node));
4676 ir_graph *irg = current_ir_graph;
4677 dbg_info *dbgi = NULL;
4678 ir_node *res = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
4683 static ir_node *gen_be_Call(ir_node *node) {
4684 ir_node *res = be_duplicate_node(node);
4685 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4690 static ir_node *gen_be_IncSP(ir_node *node) {
4691 ir_node *res = be_duplicate_node(node);
4692 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4698 * Transform the Projs from a be_Call.
4700 static ir_node *gen_Proj_be_Call(ir_node *node) {
4701 ir_node *block = be_transform_node(get_nodes_block(node));
4702 ir_node *call = get_Proj_pred(node);
4703 ir_node *new_call = be_transform_node(call);
4704 ir_graph *irg = current_ir_graph;
4705 dbg_info *dbgi = get_irn_dbg_info(node);
4706 ir_type *method_type = be_Call_get_type(call);
4707 int n_res = get_method_n_ress(method_type);
4708 long proj = get_Proj_proj(node);
4709 ir_mode *mode = get_irn_mode(node);
4711 const arch_register_class_t *cls;
4713 /* The following is kinda tricky: If we're using SSE, then we have to
4714 * move the result value of the call in floating point registers to an
4715 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4716 * after the call, we have to make sure to correctly make the
4717 * MemProj and the result Proj use these 2 nodes
4719 if (proj == pn_be_Call_M_regular) {
4720 // get new node for result, are we doing the sse load/store hack?
4721 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4722 ir_node *call_res_new;
4723 ir_node *call_res_pred = NULL;
4725 if (call_res != NULL) {
4726 call_res_new = be_transform_node(call_res);
4727 call_res_pred = get_Proj_pred(call_res_new);
4730 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4731 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4732 pn_be_Call_M_regular);
4734 assert(is_ia32_xLoad(call_res_pred));
4735 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4739 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4740 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4742 ir_node *frame = get_irg_frame(irg);
4743 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4745 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4748 /* in case there is no memory output: create one to serialize the copy
4750 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4751 pn_be_Call_M_regular);
4752 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4753 pn_be_Call_first_res);
4755 /* store st(0) onto stack */
4756 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4758 set_ia32_op_type(fstp, ia32_AddrModeD);
4759 set_ia32_use_frame(fstp);
4761 /* load into SSE register */
4762 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4764 set_ia32_op_type(sse_load, ia32_AddrModeS);
4765 set_ia32_use_frame(sse_load);
4767 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4773 /* transform call modes */
4774 if (mode_is_data(mode)) {
4775 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4779 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4783 * Transform the Projs from a Cmp.
4785 static ir_node *gen_Proj_Cmp(ir_node *node)
4787 /* this probably means not all mode_b nodes were lowered... */
4788 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4793 * Transform and potentially renumber Proj nodes.
4795 static ir_node *gen_Proj(ir_node *node) {
4796 ir_graph *irg = current_ir_graph;
4797 dbg_info *dbgi = get_irn_dbg_info(node);
4798 ir_node *pred = get_Proj_pred(node);
4799 long proj = get_Proj_proj(node);
4801 if (is_Store(pred)) {
4802 if (proj == pn_Store_M) {
4803 return be_transform_node(pred);
4806 return new_r_Bad(irg);
4808 } else if (is_Load(pred)) {
4809 return gen_Proj_Load(node);
4810 } else if (is_Div(pred) || is_Mod(pred) || is_DivMod(pred)) {
4811 return gen_Proj_DivMod(node);
4812 } else if (is_CopyB(pred)) {
4813 return gen_Proj_CopyB(node);
4814 } else if (is_Quot(pred)) {
4815 return gen_Proj_Quot(node);
4816 } else if (be_is_SubSP(pred)) {
4817 return gen_Proj_be_SubSP(node);
4818 } else if (be_is_AddSP(pred)) {
4819 return gen_Proj_be_AddSP(node);
4820 } else if (be_is_Call(pred)) {
4821 return gen_Proj_be_Call(node);
4822 } else if (is_Cmp(pred)) {
4823 return gen_Proj_Cmp(node);
4824 } else if (get_irn_op(pred) == op_Start) {
4825 if (proj == pn_Start_X_initial_exec) {
4826 ir_node *block = get_nodes_block(pred);
4829 /* we exchange the ProjX with a jump */
4830 block = be_transform_node(block);
4831 jump = new_rd_Jmp(dbgi, irg, block);
4834 if (node == be_get_old_anchor(anchor_tls)) {
4835 return gen_Proj_tls(node);
4837 } else if (is_ia32_l_FloattoLL(pred)) {
4838 return gen_Proj_l_FloattoLL(node);
4840 } else if(!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4844 ir_node *new_pred = be_transform_node(pred);
4845 ir_node *block = be_transform_node(get_nodes_block(node));
4846 ir_mode *mode = get_irn_mode(node);
4847 if (mode_needs_gp_reg(mode)) {
4848 ir_node *new_proj = new_r_Proj(irg, block, new_pred, mode_Iu,
4849 get_Proj_proj(node));
4850 #ifdef DEBUG_libfirm
4851 new_proj->node_nr = node->node_nr;
4857 return be_duplicate_node(node);
4861 * Enters all transform functions into the generic pointer
4863 static void register_transformers(void)
4867 /* first clear the generic function pointer for all ops */
4868 clear_irp_opcodes_generic_func();
4870 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4871 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4909 /* transform ops from intrinsic lowering */
4925 GEN(ia32_l_LLtoFloat);
4926 GEN(ia32_l_FloattoLL);
4932 /* we should never see these nodes */
4947 /* handle generic backend nodes */
4956 op_Mulh = get_op_Mulh();
4965 * Pre-transform all unknown and noreg nodes.
4967 static void ia32_pretransform_node(void *arch_cg) {
4968 ia32_code_gen_t *cg = arch_cg;
4970 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4971 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4972 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4973 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4974 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4975 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4980 * Walker, checks if all ia32 nodes producing more than one result have
4981 * its Projs, other wise creates new projs and keep them using a be_Keep node.
4983 static void add_missing_keep_walker(ir_node *node, void *data)
4986 unsigned found_projs = 0;
4987 const ir_edge_t *edge;
4988 ir_mode *mode = get_irn_mode(node);
4993 if(!is_ia32_irn(node))
4996 n_outs = get_ia32_n_res(node);
4999 if(is_ia32_SwitchJmp(node))
5002 assert(n_outs < (int) sizeof(unsigned) * 8);
5003 foreach_out_edge(node, edge) {
5004 ir_node *proj = get_edge_src_irn(edge);
5005 int pn = get_Proj_proj(proj);
5007 assert(get_irn_mode(proj) == mode_M || pn < n_outs);
5008 found_projs |= 1 << pn;
5012 /* are keeps missing? */
5014 for(i = 0; i < n_outs; ++i) {
5017 const arch_register_req_t *req;
5018 const arch_register_class_t *class;
5020 if(found_projs & (1 << i)) {
5024 req = get_ia32_out_req(node, i);
5029 if(class == &ia32_reg_classes[CLASS_ia32_flags]) {
5033 block = get_nodes_block(node);
5034 in[0] = new_r_Proj(current_ir_graph, block, node,
5035 arch_register_class_mode(class), i);
5036 if(last_keep != NULL) {
5037 be_Keep_add_node(last_keep, class, in[0]);
5039 last_keep = be_new_Keep(class, current_ir_graph, block, 1, in);
5040 if(sched_is_scheduled(node)) {
5041 sched_add_after(node, last_keep);
5048 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5051 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5053 ir_graph *irg = be_get_birg_irg(cg->birg);
5054 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5057 /* do the transformation */
5058 void ia32_transform_graph(ia32_code_gen_t *cg) {
5060 ir_graph *irg = cg->irg;
5062 register_transformers();
5064 initial_fpcw = NULL;
5066 BE_TIMER_PUSH(t_heights);
5067 heights = heights_new(irg);
5068 BE_TIMER_POP(t_heights);
5069 ia32_calculate_non_address_mode_nodes(cg->birg);
5071 /* the transform phase is not safe for CSE (yet) because several nodes get
5072 * attributes set after their creation */
5073 cse_last = get_opt_cse();
5076 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
5078 set_opt_cse(cse_last);
5080 ia32_free_non_address_mode_nodes();
5081 heights_free(heights);
5085 void ia32_init_transform(void)
5087 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");