2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
35 #include "irgraph_t.h"
40 #include "iredges_t.h"
52 #include "../benode_t.h"
53 #include "../besched.h"
55 #include "../beutil.h"
56 #include "../beirg_t.h"
57 #include "../betranshlp.h"
60 #include "bearch_ia32_t.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
79 #define TP_SFP_SIGN "ia32_sfp_sign"
80 #define TP_DFP_SIGN "ia32_dfp_sign"
81 #define TP_SFP_ABS "ia32_sfp_abs"
82 #define TP_DFP_ABS "ia32_dfp_abs"
83 #define TP_INT_MAX "ia32_int_max"
85 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
86 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
87 #define ENT_SFP_ABS "IA32_SFP_ABS"
88 #define ENT_DFP_ABS "IA32_DFP_ABS"
89 #define ENT_INT_MAX "IA32_INT_MAX"
91 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
92 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
94 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
96 /** hold the current code generator during transformation */
97 static ia32_code_gen_t *env_cg = NULL;
98 static ir_node *initial_fpcw = NULL;
99 static heights_t *heights = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *try_create_Immediate(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_immediate_or_transform(ir_node *node,
132 char immediate_constraint_type);
134 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
135 dbg_info *dbgi, ir_node *block,
136 ir_node *op, ir_node *orig_node);
139 * Return true if a mode can be stored in the GP register set
141 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
142 if(mode == mode_fpcw)
144 if(get_mode_size_bits(mode) > 32)
146 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
150 * creates a unique ident by adding a number to a tag
152 * @param tag the tag string, must contain a %d if a number
155 static ident *unique_id(const char *tag)
157 static unsigned id = 0;
160 snprintf(str, sizeof(str), tag, ++id);
161 return new_id_from_str(str);
165 * Get a primitive type for a mode.
167 static ir_type *get_prim_type(pmap *types, ir_mode *mode)
169 pmap_entry *e = pmap_find(types, mode);
174 snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
175 res = new_type_primitive(new_id_from_str(buf), mode);
176 set_type_alignment_bytes(res, 16);
177 pmap_insert(types, mode, res);
185 * Creates an immediate.
187 * @param symconst if set, create a SymConst immediate
188 * @param symconst_sign sign for the symconst
189 * @param val integer value for the immediate
191 static ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
193 ir_graph *irg = current_ir_graph;
194 ir_node *start_block = get_irg_start_block(irg);
195 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block,
196 symconst, symconst_sign, val);
197 arch_set_irn_register(env_cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
203 * Get an atomic entity that is initialized with a tarval
205 static ir_entity *create_float_const_entity(ir_node *cnst)
207 ia32_isa_t *isa = env_cg->isa;
208 tarval *tv = get_Const_tarval(cnst);
209 pmap_entry *e = pmap_find(isa->tv_ent, tv);
214 ir_mode *mode = get_irn_mode(cnst);
215 ir_type *tp = get_Const_type(cnst);
216 if (tp == firm_unknown_type)
217 tp = get_prim_type(isa->types, mode);
219 res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
221 set_entity_ld_ident(res, get_entity_ident(res));
222 set_entity_visibility(res, visibility_local);
223 set_entity_variability(res, variability_constant);
224 set_entity_allocation(res, allocation_static);
226 /* we create a new entity here: It's initialization must resist on the
228 rem = current_ir_graph;
229 current_ir_graph = get_const_code_irg();
230 set_atomic_ent_value(res, new_Const_type(tv, tp));
231 current_ir_graph = rem;
233 pmap_insert(isa->tv_ent, tv, res);
241 static int is_Const_0(ir_node *node) {
242 return is_Const(node) && is_Const_null(node);
245 static int is_Const_1(ir_node *node) {
246 return is_Const(node) && is_Const_one(node);
249 static int is_Const_Minus_1(ir_node *node) {
250 return is_Const(node) && is_Const_all_one(node);
254 * returns true if constant can be created with a simple float command
256 static int is_simple_x87_Const(ir_node *node)
258 tarval *tv = get_Const_tarval(node);
260 if (tarval_is_null(tv) || tarval_is_one(tv))
263 /* TODO: match all the other float constants */
268 * returns true if constant can be created with a simple float command
270 static int is_simple_sse_Const(ir_node *node)
272 tarval *tv = get_Const_tarval(node);
274 if (get_tarval_mode(tv) == mode_F)
277 if (tarval_is_null(tv) || tarval_is_one(tv))
280 /* TODO: match all the other float constants */
285 * Transforms a Const.
287 static ir_node *gen_Const(ir_node *node) {
288 ir_graph *irg = current_ir_graph;
289 ir_node *old_block = get_nodes_block(node);
290 ir_node *block = be_transform_node(old_block);
291 dbg_info *dbgi = get_irn_dbg_info(node);
292 ir_mode *mode = get_irn_mode(node);
294 assert(is_Const(node));
296 if (mode_is_float(mode)) {
298 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
299 ir_node *nomem = new_NoMem();
303 if (ia32_cg_config.use_sse2) {
304 tarval *tv = get_Const_tarval(node);
305 if (tarval_is_null(tv)) {
306 load = new_rd_ia32_xZero(dbgi, irg, block);
307 set_ia32_ls_mode(load, mode);
309 } else if (tarval_is_one(tv)) {
310 int cnst = mode == mode_F ? 26 : 55;
311 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
312 ir_node *imm2 = create_Immediate(NULL, 0, 2);
313 ir_node *pslld, *psrld;
315 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
316 set_ia32_ls_mode(load, mode);
317 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
318 set_ia32_ls_mode(pslld, mode);
319 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
320 set_ia32_ls_mode(psrld, mode);
322 } else if (mode == mode_F) {
323 /* we can place any 32bit constant by using a movd gp, sse */
324 unsigned val = get_tarval_sub_bits(tv, 0) |
325 (get_tarval_sub_bits(tv, 1) << 8) |
326 (get_tarval_sub_bits(tv, 2) << 16) |
327 (get_tarval_sub_bits(tv, 3) << 24);
328 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
329 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
330 set_ia32_ls_mode(load, mode);
333 floatent = create_float_const_entity(node);
335 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
337 set_ia32_op_type(load, ia32_AddrModeS);
338 set_ia32_am_sc(load, floatent);
339 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
340 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
343 if (is_Const_null(node)) {
344 load = new_rd_ia32_vfldz(dbgi, irg, block);
346 } else if (is_Const_one(node)) {
347 load = new_rd_ia32_vfld1(dbgi, irg, block);
350 floatent = create_float_const_entity(node);
352 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
353 set_ia32_op_type(load, ia32_AddrModeS);
354 set_ia32_am_sc(load, floatent);
355 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
356 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
358 set_ia32_ls_mode(load, mode);
361 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
363 /* Const Nodes before the initial IncSP are a bad idea, because
364 * they could be spilled and we have no SP ready at that point yet.
365 * So add a dependency to the initial frame pointer calculation to
366 * avoid that situation.
368 if (get_irg_start_block(irg) == block) {
369 add_irn_dep(load, get_irg_frame(irg));
372 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
376 tarval *tv = get_Const_tarval(node);
379 tv = tarval_convert_to(tv, mode_Iu);
381 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
383 panic("couldn't convert constant tarval (%+F)", node);
385 val = get_tarval_long(tv);
387 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
388 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
391 if (get_irg_start_block(irg) == block) {
392 add_irn_dep(cnst, get_irg_frame(irg));
400 * Transforms a SymConst.
402 static ir_node *gen_SymConst(ir_node *node) {
403 ir_graph *irg = current_ir_graph;
404 ir_node *old_block = get_nodes_block(node);
405 ir_node *block = be_transform_node(old_block);
406 dbg_info *dbgi = get_irn_dbg_info(node);
407 ir_mode *mode = get_irn_mode(node);
410 if (mode_is_float(mode)) {
411 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
412 ir_node *nomem = new_NoMem();
414 if (ia32_cg_config.use_sse2)
415 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
417 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
418 set_ia32_am_sc(cnst, get_SymConst_entity(node));
419 set_ia32_use_frame(cnst);
423 if(get_SymConst_kind(node) != symconst_addr_ent) {
424 panic("backend only support symconst_addr_ent (at %+F)", node);
426 entity = get_SymConst_entity(node);
427 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
430 /* Const Nodes before the initial IncSP are a bad idea, because
431 * they could be spilled and we have no SP ready at that point yet
433 if (get_irg_start_block(irg) == block) {
434 add_irn_dep(cnst, get_irg_frame(irg));
437 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
442 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
443 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
444 static const struct {
446 const char *ent_name;
447 const char *cnst_str;
450 } names [ia32_known_const_max] = {
451 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
452 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
453 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
454 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
455 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
457 static ir_entity *ent_cache[ia32_known_const_max];
459 const char *tp_name, *ent_name, *cnst_str;
467 ent_name = names[kct].ent_name;
468 if (! ent_cache[kct]) {
469 tp_name = names[kct].tp_name;
470 cnst_str = names[kct].cnst_str;
472 switch (names[kct].mode) {
473 case 0: mode = mode_Iu; break;
474 case 1: mode = mode_Lu; break;
475 default: mode = mode_F; break;
477 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
478 tp = new_type_primitive(new_id_from_str(tp_name), mode);
479 /* set the specified alignment */
480 set_type_alignment_bytes(tp, names[kct].align);
482 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
484 set_entity_ld_ident(ent, get_entity_ident(ent));
485 set_entity_visibility(ent, visibility_local);
486 set_entity_variability(ent, variability_constant);
487 set_entity_allocation(ent, allocation_static);
489 /* we create a new entity here: It's initialization must resist on the
491 rem = current_ir_graph;
492 current_ir_graph = get_const_code_irg();
493 cnst = new_Const(mode, tv);
494 current_ir_graph = rem;
496 set_atomic_ent_value(ent, cnst);
498 /* cache the entry */
499 ent_cache[kct] = ent;
502 return ent_cache[kct];
507 * Prints the old node name on cg obst and returns a pointer to it.
509 const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
510 ia32_isa_t *isa = (ia32_isa_t *)cg->arch_env->isa;
512 lc_eoprintf(firm_get_arg_env(), isa->name_obst, "%+F", irn);
513 obstack_1grow(isa->name_obst, 0);
514 return obstack_finish(isa->name_obst);
519 * return true if the node is a Proj(Load) and could be used in source address
520 * mode for another node. Will return only true if the @p other node is not
521 * dependent on the memory of the Load (for binary operations use the other
522 * input here, for unary operations use NULL).
524 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
525 ir_node *other, ir_node *other2)
527 ir_mode *mode = get_irn_mode(node);
531 /* float constants are always available */
532 if (is_Const(node) && mode_is_float(mode)) {
533 if (ia32_cg_config.use_sse2) {
534 if (is_simple_sse_Const(node))
537 if (is_simple_x87_Const(node))
540 if (get_irn_n_edges(node) > 1)
547 load = get_Proj_pred(node);
548 pn = get_Proj_proj(node);
549 if(!is_Load(load) || pn != pn_Load_res)
551 if(get_nodes_block(load) != block)
553 /* we only use address mode if we're the only user of the load */
554 if(get_irn_n_edges(node) > 1)
556 /* in some edge cases with address mode we might reach the load normally
557 * and through some AM sequence, if it is already materialized then we
558 * can't create an AM node from it */
559 if(be_is_transformed(node))
562 /* don't do AM if other node inputs depend on the load (via mem-proj) */
563 if(other != NULL && get_nodes_block(other) == block
564 && heights_reachable_in_block(heights, other, load))
566 if(other2 != NULL && get_nodes_block(other2) == block
567 && heights_reachable_in_block(heights, other2, load))
573 typedef struct ia32_address_mode_t ia32_address_mode_t;
574 struct ia32_address_mode_t {
578 ia32_op_type_t op_type;
582 unsigned commutative : 1;
583 unsigned ins_permuted : 1;
586 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
588 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
590 /* construct load address */
591 memset(addr, 0, sizeof(addr[0]));
592 ia32_create_address_mode(addr, ptr, /*force=*/0);
594 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
595 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
596 addr->mem = be_transform_node(mem);
599 static void build_address(ia32_address_mode_t *am, ir_node *node)
601 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
602 ia32_address_t *addr = &am->addr;
608 if (is_Const(node)) {
609 ir_entity *entity = create_float_const_entity(node);
610 addr->base = noreg_gp;
611 addr->index = noreg_gp;
612 addr->mem = new_NoMem();
613 addr->symconst_ent = entity;
615 am->ls_mode = get_irn_mode(node);
616 am->pinned = op_pin_state_floats;
620 load = get_Proj_pred(node);
621 ptr = get_Load_ptr(load);
622 mem = get_Load_mem(load);
623 new_mem = be_transform_node(mem);
624 am->pinned = get_irn_pinned(load);
625 am->ls_mode = get_Load_mode(load);
626 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
628 /* construct load address */
629 ia32_create_address_mode(addr, ptr, /*force=*/0);
631 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
632 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
636 static void set_address(ir_node *node, const ia32_address_t *addr)
638 set_ia32_am_scale(node, addr->scale);
639 set_ia32_am_sc(node, addr->symconst_ent);
640 set_ia32_am_offs_int(node, addr->offset);
641 if(addr->symconst_sign)
642 set_ia32_am_sc_sign(node);
644 set_ia32_use_frame(node);
645 set_ia32_frame_ent(node, addr->frame_entity);
648 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
650 set_address(node, &am->addr);
652 set_ia32_op_type(node, am->op_type);
653 set_ia32_ls_mode(node, am->ls_mode);
654 if(am->pinned == op_pin_state_pinned && get_irn_pinned(node) != op_pin_state_pinned) {
655 set_irn_pinned(node, am->pinned);
658 set_ia32_commutative(node);
662 * Check, if a given node is a Down-Conv, ie. a integer Conv
663 * from a mode with a mode with more bits to a mode with lesser bits.
664 * Moreover, we return only true if the node has not more than 1 user.
666 * @param node the node
667 * @return non-zero if node is a Down-Conv
669 static int is_downconv(const ir_node *node)
677 /* we only want to skip the conv when we're the only user
678 * (not optimal but for now...)
680 if(get_irn_n_edges(node) > 1)
683 src_mode = get_irn_mode(get_Conv_op(node));
684 dest_mode = get_irn_mode(node);
685 return mode_needs_gp_reg(src_mode)
686 && mode_needs_gp_reg(dest_mode)
687 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
690 /* Skip all Down-Conv's on a given node and return the resulting node. */
691 ir_node *ia32_skip_downconv(ir_node *node) {
692 while (is_downconv(node))
693 node = get_Conv_op(node);
699 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
701 ir_mode *mode = get_irn_mode(node);
706 if(mode_is_signed(mode)) {
711 block = get_nodes_block(node);
712 dbgi = get_irn_dbg_info(node);
714 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
719 * matches operands of a node into ia32 addressing/operand modes. This covers
720 * usage of source address mode, immediates, operations with non 32-bit modes,
722 * The resulting data is filled into the @p am struct. block is the block
723 * of the node whose arguments are matched. op1, op2 are the first and second
724 * input that are matched (op1 may be NULL). other_op is another unrelated
725 * input that is not matched! but which is needed sometimes to check if AM
726 * for op1/op2 is legal.
727 * @p flags describes the supported modes of the operation in detail.
729 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
730 ir_node *op1, ir_node *op2, ir_node *other_op,
733 ia32_address_t *addr = &am->addr;
734 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
737 ir_mode *mode = get_irn_mode(op2);
739 unsigned commutative;
740 int use_am_and_immediates;
742 int mode_bits = get_mode_size_bits(mode);
744 memset(am, 0, sizeof(am[0]));
746 commutative = (flags & match_commutative) != 0;
747 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
748 use_am = (flags & match_am) != 0;
749 use_immediate = (flags & match_immediate) != 0;
750 assert(!use_am_and_immediates || use_immediate);
753 assert(!commutative || op1 != NULL);
754 assert(use_am || !(flags & match_8bit_am));
755 assert(use_am || !(flags & match_16bit_am));
757 if (mode_bits == 8) {
758 if (!(flags & match_8bit_am))
760 /* we don't automatically add upconvs yet */
761 assert((flags & match_mode_neutral) || (flags & match_8bit));
762 } else if (mode_bits == 16) {
763 if (!(flags & match_16bit_am))
765 /* we don't automatically add upconvs yet */
766 assert((flags & match_mode_neutral) || (flags & match_16bit));
769 /* we can simply skip downconvs for mode neutral nodes: the upper bits
770 * can be random for these operations */
771 if (flags & match_mode_neutral) {
772 op2 = ia32_skip_downconv(op2);
774 op1 = ia32_skip_downconv(op1);
778 /* match immediates. firm nodes are normalized: constants are always on the
781 if (!(flags & match_try_am) && use_immediate) {
782 new_op2 = try_create_Immediate(op2, 0);
785 if (new_op2 == NULL &&
786 use_am && ia32_use_source_address_mode(block, op2, op1, other_op)) {
787 build_address(am, op2);
788 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
789 if(mode_is_float(mode)) {
790 new_op2 = ia32_new_NoReg_vfp(env_cg);
794 am->op_type = ia32_AddrModeS;
795 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
797 ia32_use_source_address_mode(block, op1, op2, other_op)) {
799 build_address(am, op1);
801 if (mode_is_float(mode)) {
802 noreg = ia32_new_NoReg_vfp(env_cg);
807 if(new_op2 != NULL) {
810 new_op1 = be_transform_node(op2);
812 am->ins_permuted = 1;
814 am->op_type = ia32_AddrModeS;
816 if(flags & match_try_am) {
819 am->op_type = ia32_Normal;
823 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
825 new_op2 = be_transform_node(op2);
826 am->op_type = ia32_Normal;
827 am->ls_mode = get_irn_mode(op2);
828 if(flags & match_mode_neutral)
829 am->ls_mode = mode_Iu;
831 if(addr->base == NULL)
832 addr->base = noreg_gp;
833 if(addr->index == NULL)
834 addr->index = noreg_gp;
835 if(addr->mem == NULL)
836 addr->mem = new_NoMem();
838 am->new_op1 = new_op1;
839 am->new_op2 = new_op2;
840 am->commutative = commutative;
843 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
845 ir_graph *irg = current_ir_graph;
849 if(am->mem_proj == NULL)
852 /* we have to create a mode_T so the old MemProj can attach to us */
853 mode = get_irn_mode(node);
854 load = get_Proj_pred(am->mem_proj);
856 mark_irn_visited(load);
857 be_set_transformed_node(load, node);
860 set_irn_mode(node, mode_T);
861 return new_rd_Proj(NULL, irg, get_nodes_block(node), node, mode, pn_ia32_res);
868 * Construct a standard binary operation, set AM and immediate if required.
870 * @param op1 The first operand
871 * @param op2 The second operand
872 * @param func The node constructor function
873 * @return The constructed ia32 node.
875 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
876 construct_binop_func *func, match_flags_t flags)
878 ir_node *block = get_nodes_block(node);
879 ir_node *new_block = be_transform_node(block);
880 ir_graph *irg = current_ir_graph;
881 dbg_info *dbgi = get_irn_dbg_info(node);
883 ia32_address_mode_t am;
884 ia32_address_t *addr = &am.addr;
886 match_arguments(&am, block, op1, op2, NULL, flags);
888 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
889 am.new_op1, am.new_op2);
890 set_am_attributes(new_node, &am);
891 /* we can't use source address mode anymore when using immediates */
892 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
893 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
894 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
896 new_node = fix_mem_proj(new_node, &am);
903 n_ia32_l_binop_right,
904 n_ia32_l_binop_eflags
906 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
907 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
908 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
909 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_left, n_Sbb_left)
910 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_right, n_Sbb_right)
911 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
914 * Construct a binary operation which also consumes the eflags.
916 * @param node The node to transform
917 * @param func The node constructor function
918 * @param flags The match flags
919 * @return The constructor ia32 node
921 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
924 ir_node *src_block = get_nodes_block(node);
925 ir_node *block = be_transform_node(src_block);
926 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
927 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
928 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
929 ir_node *new_eflags = be_transform_node(eflags);
930 ir_graph *irg = current_ir_graph;
931 dbg_info *dbgi = get_irn_dbg_info(node);
933 ia32_address_mode_t am;
934 ia32_address_t *addr = &am.addr;
936 match_arguments(&am, src_block, op1, op2, NULL, flags);
938 new_node = func(dbgi, irg, block, addr->base, addr->index,
939 addr->mem, am.new_op1, am.new_op2, new_eflags);
940 set_am_attributes(new_node, &am);
941 /* we can't use source address mode anymore when using immediates */
942 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
943 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
944 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
946 new_node = fix_mem_proj(new_node, &am);
951 static ir_node *get_fpcw(void)
954 if(initial_fpcw != NULL)
957 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
958 &ia32_fp_cw_regs[REG_FPCW]);
959 initial_fpcw = be_transform_node(fpcw);
965 * Construct a standard binary operation, set AM and immediate if required.
967 * @param op1 The first operand
968 * @param op2 The second operand
969 * @param func The node constructor function
970 * @return The constructed ia32 node.
972 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
973 construct_binop_float_func *func,
976 ir_graph *irg = current_ir_graph;
977 dbg_info *dbgi = get_irn_dbg_info(node);
978 ir_node *block = get_nodes_block(node);
979 ir_node *new_block = be_transform_node(block);
980 ir_mode *mode = get_irn_mode(node);
982 ia32_address_mode_t am;
983 ia32_address_t *addr = &am.addr;
985 /* cannot use addresmode with long double on x87 */
986 if (get_mode_size_bits(mode) > 64)
989 match_arguments(&am, block, op1, op2, NULL, flags);
991 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
992 am.new_op1, am.new_op2, get_fpcw());
993 set_am_attributes(new_node, &am);
995 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
997 new_node = fix_mem_proj(new_node, &am);
1003 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1005 * @param op1 The first operand
1006 * @param op2 The second operand
1007 * @param func The node constructor function
1008 * @return The constructed ia32 node.
1010 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1011 construct_shift_func *func,
1012 match_flags_t flags)
1014 dbg_info *dbgi = get_irn_dbg_info(node);
1015 ir_graph *irg = current_ir_graph;
1016 ir_node *block = get_nodes_block(node);
1017 ir_node *new_block = be_transform_node(block);
1022 assert(! mode_is_float(get_irn_mode(node)));
1023 assert(flags & match_immediate);
1024 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1026 if(flags & match_mode_neutral) {
1027 op1 = ia32_skip_downconv(op1);
1029 new_op1 = be_transform_node(op1);
1031 /* the shift amount can be any mode that is bigger than 5 bits, since all
1032 * other bits are ignored anyway */
1033 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1034 op2 = get_Conv_op(op2);
1035 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1037 new_op2 = create_immediate_or_transform(op2, 0);
1039 new_node = func(dbgi, irg, new_block, new_op1, new_op2);
1040 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1042 /* lowered shift instruction may have a dependency operand, handle it here */
1043 if (get_irn_arity(node) == 3) {
1044 /* we have a dependency */
1045 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1046 add_irn_dep(new_node, new_dep);
1054 * Construct a standard unary operation, set AM and immediate if required.
1056 * @param op The operand
1057 * @param func The node constructor function
1058 * @return The constructed ia32 node.
1060 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1061 match_flags_t flags)
1063 ir_graph *irg = current_ir_graph;
1064 dbg_info *dbgi = get_irn_dbg_info(node);
1065 ir_node *block = get_nodes_block(node);
1066 ir_node *new_block = be_transform_node(block);
1070 assert(flags == 0 || flags == match_mode_neutral);
1071 if(flags & match_mode_neutral) {
1072 op = ia32_skip_downconv(op);
1075 new_op = be_transform_node(op);
1076 new_node = func(dbgi, irg, new_block, new_op);
1078 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1083 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1084 ia32_address_t *addr)
1086 ir_graph *irg = current_ir_graph;
1087 ir_node *base = addr->base;
1088 ir_node *index = addr->index;
1092 base = ia32_new_NoReg_gp(env_cg);
1094 base = be_transform_node(base);
1098 index = ia32_new_NoReg_gp(env_cg);
1100 index = be_transform_node(index);
1103 res = new_rd_ia32_Lea(dbgi, irg, block, base, index);
1104 set_address(res, addr);
1109 static int am_has_immediates(const ia32_address_t *addr)
1111 return addr->offset != 0 || addr->symconst_ent != NULL
1112 || addr->frame_entity || addr->use_frame;
1116 * Creates an ia32 Add.
1118 * @return the created ia32 Add node
1120 static ir_node *gen_Add(ir_node *node) {
1121 ir_graph *irg = current_ir_graph;
1122 dbg_info *dbgi = get_irn_dbg_info(node);
1123 ir_node *block = get_nodes_block(node);
1124 ir_node *new_block = be_transform_node(block);
1125 ir_node *op1 = get_Add_left(node);
1126 ir_node *op2 = get_Add_right(node);
1127 ir_mode *mode = get_irn_mode(node);
1129 ir_node *add_immediate_op;
1130 ia32_address_t addr;
1131 ia32_address_mode_t am;
1133 if (mode_is_float(mode)) {
1134 if (ia32_cg_config.use_sse2)
1135 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1136 match_commutative | match_am);
1138 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1139 match_commutative | match_am);
1142 ia32_mark_non_am(node);
1144 op2 = ia32_skip_downconv(op2);
1145 op1 = ia32_skip_downconv(op1);
1149 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1150 * 1. Add with immediate -> Lea
1151 * 2. Add with possible source address mode -> Add
1152 * 3. Otherwise -> Lea
1154 memset(&addr, 0, sizeof(addr));
1155 ia32_create_address_mode(&addr, node, /*force=*/1);
1156 add_immediate_op = NULL;
1158 if(addr.base == NULL && addr.index == NULL) {
1159 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1160 addr.symconst_sign, addr.offset);
1161 add_irn_dep(new_node, get_irg_frame(irg));
1162 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1165 /* add with immediate? */
1166 if(addr.index == NULL) {
1167 add_immediate_op = addr.base;
1168 } else if(addr.base == NULL && addr.scale == 0) {
1169 add_immediate_op = addr.index;
1172 if(add_immediate_op != NULL) {
1173 if(!am_has_immediates(&addr)) {
1174 #ifdef DEBUG_libfirm
1175 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1178 return be_transform_node(add_immediate_op);
1181 new_node = create_lea_from_address(dbgi, new_block, &addr);
1182 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1186 /* test if we can use source address mode */
1187 match_arguments(&am, block, op1, op2, NULL, match_commutative
1188 | match_mode_neutral | match_am | match_immediate | match_try_am);
1190 /* construct an Add with source address mode */
1191 if (am.op_type == ia32_AddrModeS) {
1192 ia32_address_t *am_addr = &am.addr;
1193 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1194 am_addr->index, am_addr->mem, am.new_op1,
1196 set_am_attributes(new_node, &am);
1197 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1199 new_node = fix_mem_proj(new_node, &am);
1204 /* otherwise construct a lea */
1205 new_node = create_lea_from_address(dbgi, new_block, &addr);
1206 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1211 * Creates an ia32 Mul.
1213 * @return the created ia32 Mul node
1215 static ir_node *gen_Mul(ir_node *node) {
1216 ir_node *op1 = get_Mul_left(node);
1217 ir_node *op2 = get_Mul_right(node);
1218 ir_mode *mode = get_irn_mode(node);
1221 if (mode_is_float(mode)) {
1222 if (ia32_cg_config.use_sse2)
1223 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1224 match_commutative | match_am);
1226 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1227 match_commutative | match_am);
1230 /* for the lower 32bit of the result it doesn't matter whether we use
1231 * signed or unsigned multiplication so we use IMul as it has fewer
1233 flags = match_commutative | match_am | match_mode_neutral | match_immediate;
1234 if (ia32_cg_config.use_imul_mem_imm32)
1235 flags |= match_am_and_immediates;
1236 return gen_binop(node, op1, op2, new_rd_ia32_IMul, flags);
1240 * Creates an ia32 Mulh.
1241 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1242 * this result while Mul returns the lower 32 bit.
1244 * @return the created ia32 Mulh node
1246 static ir_node *gen_Mulh(ir_node *node)
1248 ir_node *block = get_nodes_block(node);
1249 ir_node *new_block = be_transform_node(block);
1250 ir_graph *irg = current_ir_graph;
1251 dbg_info *dbgi = get_irn_dbg_info(node);
1252 ir_mode *mode = get_irn_mode(node);
1253 ir_node *op1 = get_Mulh_left(node);
1254 ir_node *op2 = get_Mulh_right(node);
1255 ir_node *proj_res_high;
1257 ia32_address_mode_t am;
1258 ia32_address_t *addr = &am.addr;
1260 assert(!mode_is_float(mode) && "Mulh with float not supported");
1261 assert(get_mode_size_bits(mode) == 32);
1263 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1265 if (mode_is_signed(mode)) {
1266 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1267 addr->index, addr->mem, am.new_op1,
1270 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1271 addr->index, addr->mem, am.new_op1,
1275 set_am_attributes(new_node, &am);
1276 /* we can't use source address mode anymore when using immediates */
1277 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1278 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1279 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1281 assert(get_irn_mode(new_node) == mode_T);
1283 fix_mem_proj(new_node, &am);
1285 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1286 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1287 mode_Iu, pn_ia32_IMul1OP_res_high);
1289 return proj_res_high;
1295 * Creates an ia32 And.
1297 * @return The created ia32 And node
1299 static ir_node *gen_And(ir_node *node) {
1300 ir_node *op1 = get_And_left(node);
1301 ir_node *op2 = get_And_right(node);
1302 assert(! mode_is_float(get_irn_mode(node)));
1304 /* is it a zero extension? */
1305 if (is_Const(op2)) {
1306 tarval *tv = get_Const_tarval(op2);
1307 long v = get_tarval_long(tv);
1309 if (v == 0xFF || v == 0xFFFF) {
1310 dbg_info *dbgi = get_irn_dbg_info(node);
1311 ir_node *block = get_nodes_block(node);
1318 assert(v == 0xFFFF);
1321 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1327 return gen_binop(node, op1, op2, new_rd_ia32_And,
1328 match_commutative | match_mode_neutral | match_am
1335 * Creates an ia32 Or.
1337 * @return The created ia32 Or node
1339 static ir_node *gen_Or(ir_node *node) {
1340 ir_node *op1 = get_Or_left(node);
1341 ir_node *op2 = get_Or_right(node);
1343 assert (! mode_is_float(get_irn_mode(node)));
1344 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1345 | match_mode_neutral | match_am | match_immediate);
1351 * Creates an ia32 Eor.
1353 * @return The created ia32 Eor node
1355 static ir_node *gen_Eor(ir_node *node) {
1356 ir_node *op1 = get_Eor_left(node);
1357 ir_node *op2 = get_Eor_right(node);
1359 assert(! mode_is_float(get_irn_mode(node)));
1360 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1361 | match_mode_neutral | match_am | match_immediate);
1366 * Creates an ia32 Sub.
1368 * @return The created ia32 Sub node
1370 static ir_node *gen_Sub(ir_node *node) {
1371 ir_node *op1 = get_Sub_left(node);
1372 ir_node *op2 = get_Sub_right(node);
1373 ir_mode *mode = get_irn_mode(node);
1375 if (mode_is_float(mode)) {
1376 if (ia32_cg_config.use_sse2)
1377 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1379 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1384 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1388 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1389 | match_am | match_immediate);
1393 * Generates an ia32 DivMod with additional infrastructure for the
1394 * register allocator if needed.
1396 static ir_node *create_Div(ir_node *node)
1398 ir_graph *irg = current_ir_graph;
1399 dbg_info *dbgi = get_irn_dbg_info(node);
1400 ir_node *block = get_nodes_block(node);
1401 ir_node *new_block = be_transform_node(block);
1408 ir_node *sign_extension;
1409 ia32_address_mode_t am;
1410 ia32_address_t *addr = &am.addr;
1412 /* the upper bits have random contents for smaller modes */
1413 switch (get_irn_opcode(node)) {
1415 op1 = get_Div_left(node);
1416 op2 = get_Div_right(node);
1417 mem = get_Div_mem(node);
1418 mode = get_Div_resmode(node);
1421 op1 = get_Mod_left(node);
1422 op2 = get_Mod_right(node);
1423 mem = get_Mod_mem(node);
1424 mode = get_Mod_resmode(node);
1427 op1 = get_DivMod_left(node);
1428 op2 = get_DivMod_right(node);
1429 mem = get_DivMod_mem(node);
1430 mode = get_DivMod_resmode(node);
1433 panic("invalid divmod node %+F", node);
1436 match_arguments(&am, block, op1, op2, NULL, match_am);
1438 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1439 is the memory of the consumed address. We can have only the second op as address
1440 in Div nodes, so check only op2. */
1441 if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
1442 new_mem = be_transform_node(mem);
1443 if(!is_NoMem(addr->mem)) {
1447 new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
1450 new_mem = addr->mem;
1453 if (mode_is_signed(mode)) {
1454 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1455 add_irn_dep(produceval, get_irg_frame(irg));
1456 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1459 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1460 addr->index, new_mem, am.new_op1,
1461 sign_extension, am.new_op2);
1463 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1464 add_irn_dep(sign_extension, get_irg_frame(irg));
1466 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1467 addr->index, new_mem, am.new_op1,
1468 sign_extension, am.new_op2);
1471 set_irn_pinned(new_node, get_irn_pinned(node));
1473 set_am_attributes(new_node, &am);
1474 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1476 new_node = fix_mem_proj(new_node, &am);
1482 static ir_node *gen_Mod(ir_node *node) {
1483 return create_Div(node);
1486 static ir_node *gen_Div(ir_node *node) {
1487 return create_Div(node);
1490 static ir_node *gen_DivMod(ir_node *node) {
1491 return create_Div(node);
1497 * Creates an ia32 floating Div.
1499 * @return The created ia32 xDiv node
1501 static ir_node *gen_Quot(ir_node *node)
1503 ir_node *op1 = get_Quot_left(node);
1504 ir_node *op2 = get_Quot_right(node);
1506 if (ia32_cg_config.use_sse2) {
1507 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1509 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1515 * Creates an ia32 Shl.
1517 * @return The created ia32 Shl node
1519 static ir_node *gen_Shl(ir_node *node) {
1520 ir_node *left = get_Shl_left(node);
1521 ir_node *right = get_Shl_right(node);
1523 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1524 match_mode_neutral | match_immediate);
1528 * Creates an ia32 Shr.
1530 * @return The created ia32 Shr node
1532 static ir_node *gen_Shr(ir_node *node) {
1533 ir_node *left = get_Shr_left(node);
1534 ir_node *right = get_Shr_right(node);
1536 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1542 * Creates an ia32 Sar.
1544 * @return The created ia32 Shrs node
1546 static ir_node *gen_Shrs(ir_node *node) {
1547 ir_node *left = get_Shrs_left(node);
1548 ir_node *right = get_Shrs_right(node);
1549 ir_mode *mode = get_irn_mode(node);
1551 if(is_Const(right) && mode == mode_Is) {
1552 tarval *tv = get_Const_tarval(right);
1553 long val = get_tarval_long(tv);
1555 /* this is a sign extension */
1556 ir_graph *irg = current_ir_graph;
1557 dbg_info *dbgi = get_irn_dbg_info(node);
1558 ir_node *block = be_transform_node(get_nodes_block(node));
1560 ir_node *new_op = be_transform_node(op);
1561 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1562 add_irn_dep(pval, get_irg_frame(irg));
1564 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1568 /* 8 or 16 bit sign extension? */
1569 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1570 ir_node *shl_left = get_Shl_left(left);
1571 ir_node *shl_right = get_Shl_right(left);
1572 if(is_Const(shl_right)) {
1573 tarval *tv1 = get_Const_tarval(right);
1574 tarval *tv2 = get_Const_tarval(shl_right);
1575 if(tv1 == tv2 && tarval_is_long(tv1)) {
1576 long val = get_tarval_long(tv1);
1577 if(val == 16 || val == 24) {
1578 dbg_info *dbgi = get_irn_dbg_info(node);
1579 ir_node *block = get_nodes_block(node);
1589 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1598 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1604 * Creates an ia32 RotL.
1606 * @param op1 The first operator
1607 * @param op2 The second operator
1608 * @return The created ia32 RotL node
1610 static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
1611 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1617 * Creates an ia32 RotR.
1618 * NOTE: There is no RotR with immediate because this would always be a RotL
1619 * "imm-mode_size_bits" which can be pre-calculated.
1621 * @param op1 The first operator
1622 * @param op2 The second operator
1623 * @return The created ia32 RotR node
1625 static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
1626 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1632 * Creates an ia32 RotR or RotL (depending on the found pattern).
1634 * @return The created ia32 RotL or RotR node
1636 static ir_node *gen_Rot(ir_node *node) {
1637 ir_node *rotate = NULL;
1638 ir_node *op1 = get_Rot_left(node);
1639 ir_node *op2 = get_Rot_right(node);
1641 /* Firm has only Rot (which is a RotL), so we are looking for a right (op2)
1642 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1643 that means we can create a RotR instead of an Add and a RotL */
1645 if (get_irn_op(op2) == op_Add) {
1647 ir_node *left = get_Add_left(add);
1648 ir_node *right = get_Add_right(add);
1649 if (is_Const(right)) {
1650 tarval *tv = get_Const_tarval(right);
1651 ir_mode *mode = get_irn_mode(node);
1652 long bits = get_mode_size_bits(mode);
1654 if (get_irn_op(left) == op_Minus &&
1655 tarval_is_long(tv) &&
1656 get_tarval_long(tv) == bits &&
1659 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1660 rotate = gen_RotR(node, op1, get_Minus_op(left));
1665 if (rotate == NULL) {
1666 rotate = gen_RotL(node, op1, op2);
1675 * Transforms a Minus node.
1677 * @return The created ia32 Minus node
1679 static ir_node *gen_Minus(ir_node *node)
1681 ir_node *op = get_Minus_op(node);
1682 ir_node *block = be_transform_node(get_nodes_block(node));
1683 ir_graph *irg = current_ir_graph;
1684 dbg_info *dbgi = get_irn_dbg_info(node);
1685 ir_mode *mode = get_irn_mode(node);
1690 if (mode_is_float(mode)) {
1691 ir_node *new_op = be_transform_node(op);
1692 if (ia32_cg_config.use_sse2) {
1693 /* TODO: non-optimal... if we have many xXors, then we should
1694 * rather create a load for the const and use that instead of
1695 * several AM nodes... */
1696 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1697 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1698 ir_node *nomem = new_rd_NoMem(irg);
1700 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1701 nomem, new_op, noreg_xmm);
1703 size = get_mode_size_bits(mode);
1704 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1706 set_ia32_am_sc(new_node, ent);
1707 set_ia32_op_type(new_node, ia32_AddrModeS);
1708 set_ia32_ls_mode(new_node, mode);
1710 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1713 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1716 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1722 * Transforms a Not node.
1724 * @return The created ia32 Not node
1726 static ir_node *gen_Not(ir_node *node) {
1727 ir_node *op = get_Not_op(node);
1729 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1730 assert (! mode_is_float(get_irn_mode(node)));
1732 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1738 * Transforms an Abs node.
1740 * @return The created ia32 Abs node
1742 static ir_node *gen_Abs(ir_node *node)
1744 ir_node *block = get_nodes_block(node);
1745 ir_node *new_block = be_transform_node(block);
1746 ir_node *op = get_Abs_op(node);
1747 ir_graph *irg = current_ir_graph;
1748 dbg_info *dbgi = get_irn_dbg_info(node);
1749 ir_mode *mode = get_irn_mode(node);
1750 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1751 ir_node *nomem = new_NoMem();
1757 if (mode_is_float(mode)) {
1758 new_op = be_transform_node(op);
1760 if (ia32_cg_config.use_sse2) {
1761 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1762 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1763 nomem, new_op, noreg_fp);
1765 size = get_mode_size_bits(mode);
1766 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1768 set_ia32_am_sc(new_node, ent);
1770 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1772 set_ia32_op_type(new_node, ia32_AddrModeS);
1773 set_ia32_ls_mode(new_node, mode);
1775 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1776 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1779 ir_node *xor, *pval, *sign_extension;
1781 if (get_mode_size_bits(mode) == 32) {
1782 new_op = be_transform_node(op);
1784 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1787 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1788 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1791 add_irn_dep(pval, get_irg_frame(irg));
1792 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1794 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1795 nomem, new_op, sign_extension);
1796 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1798 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1799 nomem, xor, sign_extension);
1800 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1806 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1808 ir_graph *irg = current_ir_graph;
1816 /* we have a Cmp as input */
1818 ir_node *pred = get_Proj_pred(node);
1820 flags = be_transform_node(pred);
1821 *pnc_out = get_Proj_proj(node);
1826 /* a mode_b value, we have to compare it against 0 */
1827 dbgi = get_irn_dbg_info(node);
1828 new_block = be_transform_node(get_nodes_block(node));
1829 new_op = be_transform_node(node);
1830 noreg = ia32_new_NoReg_gp(env_cg);
1831 nomem = new_NoMem();
1832 flags = new_rd_ia32_Test(dbgi, irg, new_block, noreg, noreg, nomem,
1833 new_op, new_op, 0, 0);
1834 *pnc_out = pn_Cmp_Lg;
1839 * Transforms a Load.
1841 * @return the created ia32 Load node
1843 static ir_node *gen_Load(ir_node *node) {
1844 ir_node *old_block = get_nodes_block(node);
1845 ir_node *block = be_transform_node(old_block);
1846 ir_node *ptr = get_Load_ptr(node);
1847 ir_node *mem = get_Load_mem(node);
1848 ir_node *new_mem = be_transform_node(mem);
1851 ir_graph *irg = current_ir_graph;
1852 dbg_info *dbgi = get_irn_dbg_info(node);
1853 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1854 ir_mode *mode = get_Load_mode(node);
1857 ia32_address_t addr;
1859 /* construct load address */
1860 memset(&addr, 0, sizeof(addr));
1861 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1868 base = be_transform_node(base);
1874 index = be_transform_node(index);
1877 if (mode_is_float(mode)) {
1878 if (ia32_cg_config.use_sse2) {
1879 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1881 res_mode = mode_xmm;
1883 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1885 res_mode = mode_vfp;
1888 assert(mode != mode_b);
1890 /* create a conv node with address mode for smaller modes */
1891 if(get_mode_size_bits(mode) < 32) {
1892 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1893 new_mem, noreg, mode);
1895 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1900 set_irn_pinned(new_node, get_irn_pinned(node));
1901 set_ia32_op_type(new_node, ia32_AddrModeS);
1902 set_ia32_ls_mode(new_node, mode);
1903 set_address(new_node, &addr);
1905 if(get_irn_pinned(node) == op_pin_state_floats) {
1906 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1909 /* make sure we are scheduled behind the initial IncSP/Barrier
1910 * to avoid spills being placed before it
1912 if (block == get_irg_start_block(irg)) {
1913 add_irn_dep(new_node, get_irg_frame(irg));
1916 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1921 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1922 ir_node *ptr, ir_node *other)
1929 /* we only use address mode if we're the only user of the load */
1930 if(get_irn_n_edges(node) > 1)
1933 load = get_Proj_pred(node);
1936 if(get_nodes_block(load) != block)
1939 /* Store should be attached to the load */
1940 if(!is_Proj(mem) || get_Proj_pred(mem) != load)
1942 /* store should have the same pointer as the load */
1943 if(get_Load_ptr(load) != ptr)
1946 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1947 if(other != NULL && get_nodes_block(other) == block
1948 && heights_reachable_in_block(heights, other, load))
1954 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1955 ir_node *mem, ir_node *ptr, ir_mode *mode,
1956 construct_binop_dest_func *func,
1957 construct_binop_dest_func *func8bit,
1958 match_flags_t flags)
1960 ir_node *src_block = get_nodes_block(node);
1962 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1963 ir_graph *irg = current_ir_graph;
1968 ia32_address_mode_t am;
1969 ia32_address_t *addr = &am.addr;
1970 memset(&am, 0, sizeof(am));
1972 assert(flags & match_dest_am);
1973 assert(flags & match_immediate); /* there is no destam node without... */
1974 commutative = (flags & match_commutative) != 0;
1976 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
1977 build_address(&am, op1);
1978 new_op = create_immediate_or_transform(op2, 0);
1979 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
1980 build_address(&am, op2);
1981 new_op = create_immediate_or_transform(op1, 0);
1986 if(addr->base == NULL)
1987 addr->base = noreg_gp;
1988 if(addr->index == NULL)
1989 addr->index = noreg_gp;
1990 if(addr->mem == NULL)
1991 addr->mem = new_NoMem();
1993 dbgi = get_irn_dbg_info(node);
1994 block = be_transform_node(src_block);
1995 if(get_mode_size_bits(mode) == 8) {
1996 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
1999 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
2002 set_address(new_node, addr);
2003 set_ia32_op_type(new_node, ia32_AddrModeD);
2004 set_ia32_ls_mode(new_node, mode);
2005 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2010 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2011 ir_node *ptr, ir_mode *mode,
2012 construct_unop_dest_func *func)
2014 ir_graph *irg = current_ir_graph;
2015 ir_node *src_block = get_nodes_block(node);
2019 ia32_address_mode_t am;
2020 ia32_address_t *addr = &am.addr;
2021 memset(&am, 0, sizeof(am));
2023 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2026 build_address(&am, op);
2028 dbgi = get_irn_dbg_info(node);
2029 block = be_transform_node(src_block);
2030 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
2031 set_address(new_node, addr);
2032 set_ia32_op_type(new_node, ia32_AddrModeD);
2033 set_ia32_ls_mode(new_node, mode);
2034 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2039 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2040 ir_mode *mode = get_irn_mode(node);
2041 ir_node *psi_true = get_Psi_val(node, 0);
2042 ir_node *psi_default = get_Psi_default(node);
2053 ia32_address_t addr;
2055 if(get_mode_size_bits(mode) != 8)
2058 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2060 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2066 build_address_ptr(&addr, ptr, mem);
2068 irg = current_ir_graph;
2069 dbgi = get_irn_dbg_info(node);
2070 block = get_nodes_block(node);
2071 new_block = be_transform_node(block);
2072 cond = get_Psi_cond(node, 0);
2073 flags = get_flags_node(cond, &pnc);
2074 new_mem = be_transform_node(mem);
2075 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2076 addr.index, addr.mem, flags, pnc, negated);
2077 set_address(new_node, &addr);
2078 set_ia32_op_type(new_node, ia32_AddrModeD);
2079 set_ia32_ls_mode(new_node, mode);
2080 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2085 static ir_node *try_create_dest_am(ir_node *node) {
2086 ir_node *val = get_Store_value(node);
2087 ir_node *mem = get_Store_mem(node);
2088 ir_node *ptr = get_Store_ptr(node);
2089 ir_mode *mode = get_irn_mode(val);
2090 unsigned bits = get_mode_size_bits(mode);
2095 /* handle only GP modes for now... */
2096 if(!mode_needs_gp_reg(mode))
2100 /* store must be the only user of the val node */
2101 if(get_irn_n_edges(val) > 1)
2103 /* skip pointless convs */
2105 ir_node *conv_op = get_Conv_op(val);
2106 ir_mode *pred_mode = get_irn_mode(conv_op);
2107 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2115 /* value must be in the same block */
2116 if(get_nodes_block(node) != get_nodes_block(val))
2119 switch(get_irn_opcode(val)) {
2121 op1 = get_Add_left(val);
2122 op2 = get_Add_right(val);
2123 if(is_Const_1(op2)) {
2124 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2125 new_rd_ia32_IncMem);
2127 } else if(is_Const_Minus_1(op2)) {
2128 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2129 new_rd_ia32_DecMem);
2132 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2133 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2134 match_dest_am | match_commutative |
2138 op1 = get_Sub_left(val);
2139 op2 = get_Sub_right(val);
2141 ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
2144 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2145 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2146 match_dest_am | match_immediate |
2150 op1 = get_And_left(val);
2151 op2 = get_And_right(val);
2152 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2153 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2154 match_dest_am | match_commutative |
2158 op1 = get_Or_left(val);
2159 op2 = get_Or_right(val);
2160 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2161 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2162 match_dest_am | match_commutative |
2166 op1 = get_Eor_left(val);
2167 op2 = get_Eor_right(val);
2168 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2169 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2170 match_dest_am | match_commutative |
2174 op1 = get_Shl_left(val);
2175 op2 = get_Shl_right(val);
2176 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2177 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2178 match_dest_am | match_immediate);
2181 op1 = get_Shr_left(val);
2182 op2 = get_Shr_right(val);
2183 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2184 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2185 match_dest_am | match_immediate);
2188 op1 = get_Shrs_left(val);
2189 op2 = get_Shrs_right(val);
2190 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2191 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2192 match_dest_am | match_immediate);
2195 op1 = get_Rot_left(val);
2196 op2 = get_Rot_right(val);
2197 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2198 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2199 match_dest_am | match_immediate);
2201 /* TODO: match ROR patterns... */
2203 new_node = try_create_SetMem(val, ptr, mem);
2206 op1 = get_Minus_op(val);
2207 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2210 /* should be lowered already */
2211 assert(mode != mode_b);
2212 op1 = get_Not_op(val);
2213 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2219 if(new_node != NULL) {
2220 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2221 get_irn_pinned(node) == op_pin_state_pinned) {
2222 set_irn_pinned(new_node, op_pin_state_pinned);
2229 static int is_float_to_int32_conv(const ir_node *node)
2231 ir_mode *mode = get_irn_mode(node);
2235 if(get_mode_size_bits(mode) != 32 || !mode_needs_gp_reg(mode))
2240 conv_op = get_Conv_op(node);
2241 conv_mode = get_irn_mode(conv_op);
2243 if(!mode_is_float(conv_mode))
2250 * Transforms a Store.
2252 * @return the created ia32 Store node
2254 static ir_node *gen_Store(ir_node *node)
2256 ir_node *block = get_nodes_block(node);
2257 ir_node *new_block = be_transform_node(block);
2258 ir_node *ptr = get_Store_ptr(node);
2259 ir_node *val = get_Store_value(node);
2260 ir_node *mem = get_Store_mem(node);
2261 ir_graph *irg = current_ir_graph;
2262 dbg_info *dbgi = get_irn_dbg_info(node);
2263 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2264 ir_mode *mode = get_irn_mode(val);
2267 ia32_address_t addr;
2269 /* check for destination address mode */
2270 new_node = try_create_dest_am(node);
2271 if(new_node != NULL)
2274 /* construct store address */
2275 memset(&addr, 0, sizeof(addr));
2276 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2278 if(addr.base == NULL) {
2281 addr.base = be_transform_node(addr.base);
2284 if(addr.index == NULL) {
2287 addr.index = be_transform_node(addr.index);
2289 addr.mem = be_transform_node(mem);
2291 if (mode_is_float(mode)) {
2292 /* convs (and strict-convs) before stores are unnecessary if the mode
2294 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2295 val = get_Conv_op(val);
2297 new_val = be_transform_node(val);
2298 if (ia32_cg_config.use_sse2) {
2299 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2300 addr.index, addr.mem, new_val);
2302 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2303 addr.index, addr.mem, new_val, mode);
2305 } else if(is_float_to_int32_conv(val)) {
2306 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2307 val = get_Conv_op(val);
2309 /* convs (and strict-convs) before stores are unnecessary if the mode
2311 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2312 val = get_Conv_op(val);
2314 new_val = be_transform_node(val);
2316 new_node = new_rd_ia32_vfist(dbgi, irg, new_block, addr.base,
2317 addr.index, addr.mem, new_val, trunc_mode);
2319 new_val = create_immediate_or_transform(val, 0);
2320 assert(mode != mode_b);
2322 if (get_mode_size_bits(mode) == 8) {
2323 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2324 addr.index, addr.mem, new_val);
2326 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2327 addr.index, addr.mem, new_val);
2331 set_irn_pinned(new_node, get_irn_pinned(node));
2332 set_ia32_op_type(new_node, ia32_AddrModeD);
2333 set_ia32_ls_mode(new_node, mode);
2335 set_address(new_node, &addr);
2336 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2341 static ir_node *create_Switch(ir_node *node)
2343 ir_graph *irg = current_ir_graph;
2344 dbg_info *dbgi = get_irn_dbg_info(node);
2345 ir_node *block = be_transform_node(get_nodes_block(node));
2346 ir_node *sel = get_Cond_selector(node);
2347 ir_node *new_sel = be_transform_node(sel);
2348 int switch_min = INT_MAX;
2349 int switch_max = INT_MIN;
2350 long default_pn = get_Cond_defaultProj(node);
2352 const ir_edge_t *edge;
2354 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2356 /* determine the smallest switch case value */
2357 foreach_out_edge(node, edge) {
2358 ir_node *proj = get_edge_src_irn(edge);
2359 long pn = get_Proj_proj(proj);
2360 if(pn == default_pn)
2369 if((unsigned) (switch_max - switch_min) > 256000) {
2370 panic("Size of switch %+F bigger than 256000", node);
2373 if (switch_min != 0) {
2374 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2376 /* if smallest switch case is not 0 we need an additional sub */
2377 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2378 add_ia32_am_offs_int(new_sel, -switch_min);
2379 set_ia32_op_type(new_sel, ia32_AddrModeS);
2381 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2384 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2385 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2391 * Transform a Cond node.
2393 static ir_node *gen_Cond(ir_node *node) {
2394 ir_node *block = get_nodes_block(node);
2395 ir_node *new_block = be_transform_node(block);
2396 ir_graph *irg = current_ir_graph;
2397 dbg_info *dbgi = get_irn_dbg_info(node);
2398 ir_node *sel = get_Cond_selector(node);
2399 ir_mode *sel_mode = get_irn_mode(sel);
2400 ir_node *flags = NULL;
2404 if (sel_mode != mode_b) {
2405 return create_Switch(node);
2408 /* we get flags from a cmp */
2409 flags = get_flags_node(sel, &pnc);
2411 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2412 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2420 * Transforms a CopyB node.
2422 * @return The transformed node.
2424 static ir_node *gen_CopyB(ir_node *node) {
2425 ir_node *block = be_transform_node(get_nodes_block(node));
2426 ir_node *src = get_CopyB_src(node);
2427 ir_node *new_src = be_transform_node(src);
2428 ir_node *dst = get_CopyB_dst(node);
2429 ir_node *new_dst = be_transform_node(dst);
2430 ir_node *mem = get_CopyB_mem(node);
2431 ir_node *new_mem = be_transform_node(mem);
2432 ir_node *res = NULL;
2433 ir_graph *irg = current_ir_graph;
2434 dbg_info *dbgi = get_irn_dbg_info(node);
2435 int size = get_type_size_bytes(get_CopyB_type(node));
2438 /* If we have to copy more than 32 bytes, we use REP MOVSx and */
2439 /* then we need the size explicitly in ECX. */
2440 if (size >= 32 * 4) {
2441 rem = size & 0x3; /* size % 4 */
2444 res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
2445 add_irn_dep(res, get_irg_frame(irg));
2447 res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
2450 ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
2453 res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
2456 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
2461 static ir_node *gen_be_Copy(ir_node *node)
2463 ir_node *new_node = be_duplicate_node(node);
2464 ir_mode *mode = get_irn_mode(new_node);
2466 if (mode_needs_gp_reg(mode)) {
2467 set_irn_mode(new_node, mode_Iu);
2473 static ir_node *create_Fucom(ir_node *node)
2475 ir_graph *irg = current_ir_graph;
2476 dbg_info *dbgi = get_irn_dbg_info(node);
2477 ir_node *block = get_nodes_block(node);
2478 ir_node *new_block = be_transform_node(block);
2479 ir_node *left = get_Cmp_left(node);
2480 ir_node *new_left = be_transform_node(left);
2481 ir_node *right = get_Cmp_right(node);
2485 if(ia32_cg_config.use_fucomi) {
2486 new_right = be_transform_node(right);
2487 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2489 set_ia32_commutative(new_node);
2490 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2492 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2493 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2496 new_right = be_transform_node(right);
2497 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2501 set_ia32_commutative(new_node);
2503 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2505 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2506 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2512 static ir_node *create_Ucomi(ir_node *node)
2514 ir_graph *irg = current_ir_graph;
2515 dbg_info *dbgi = get_irn_dbg_info(node);
2516 ir_node *src_block = get_nodes_block(node);
2517 ir_node *new_block = be_transform_node(src_block);
2518 ir_node *left = get_Cmp_left(node);
2519 ir_node *right = get_Cmp_right(node);
2521 ia32_address_mode_t am;
2522 ia32_address_t *addr = &am.addr;
2524 match_arguments(&am, src_block, left, right, NULL,
2525 match_commutative | match_am);
2527 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2528 addr->mem, am.new_op1, am.new_op2,
2530 set_am_attributes(new_node, &am);
2532 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2534 new_node = fix_mem_proj(new_node, &am);
2540 * helper function: checks wether all Cmp projs are Lg or Eq which is needed
2541 * to fold an and into a test node
2543 static int can_fold_test_and(ir_node *node)
2545 const ir_edge_t *edge;
2547 /** we can only have eq and lg projs */
2548 foreach_out_edge(node, edge) {
2549 ir_node *proj = get_edge_src_irn(edge);
2550 pn_Cmp pnc = get_Proj_proj(proj);
2551 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2558 static ir_node *gen_Cmp(ir_node *node)
2560 ir_graph *irg = current_ir_graph;
2561 dbg_info *dbgi = get_irn_dbg_info(node);
2562 ir_node *block = get_nodes_block(node);
2563 ir_node *new_block = be_transform_node(block);
2564 ir_node *left = get_Cmp_left(node);
2565 ir_node *right = get_Cmp_right(node);
2566 ir_mode *cmp_mode = get_irn_mode(left);
2568 ia32_address_mode_t am;
2569 ia32_address_t *addr = &am.addr;
2572 if(mode_is_float(cmp_mode)) {
2573 if (ia32_cg_config.use_sse2) {
2574 return create_Ucomi(node);
2576 return create_Fucom(node);
2580 assert(mode_needs_gp_reg(cmp_mode));
2582 /* we prefer the Test instruction where possible except cases where
2583 * we can use SourceAM */
2584 cmp_unsigned = !mode_is_signed(cmp_mode);
2585 if (is_Const_0(right)) {
2587 get_irn_n_edges(left) == 1 &&
2588 can_fold_test_and(node)) {
2589 /* Test(and_left, and_right) */
2590 ir_node *and_left = get_And_left(left);
2591 ir_node *and_right = get_And_right(left);
2592 ir_mode *mode = get_irn_mode(and_left);
2594 match_arguments(&am, block, and_left, and_right, NULL,
2596 match_am | match_8bit_am | match_16bit_am |
2597 match_am_and_immediates | match_immediate |
2598 match_8bit | match_16bit);
2599 if (get_mode_size_bits(mode) == 8) {
2600 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2601 addr->index, addr->mem, am.new_op1,
2602 am.new_op2, am.ins_permuted,
2605 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2606 addr->index, addr->mem, am.new_op1,
2607 am.new_op2, am.ins_permuted, cmp_unsigned);
2610 match_arguments(&am, block, NULL, left, NULL,
2611 match_am | match_8bit_am | match_16bit_am |
2612 match_8bit | match_16bit);
2613 if (am.op_type == ia32_AddrModeS) {
2615 ir_node *imm_zero = try_create_Immediate(right, 0);
2616 if (get_mode_size_bits(cmp_mode) == 8) {
2617 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2618 addr->index, addr->mem, am.new_op2,
2619 imm_zero, am.ins_permuted,
2622 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2623 addr->index, addr->mem, am.new_op2,
2624 imm_zero, am.ins_permuted, cmp_unsigned);
2627 /* Test(left, left) */
2628 if (get_mode_size_bits(cmp_mode) == 8) {
2629 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2630 addr->index, addr->mem, am.new_op2,
2631 am.new_op2, am.ins_permuted,
2634 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2635 addr->index, addr->mem, am.new_op2,
2636 am.new_op2, am.ins_permuted,
2642 /* Cmp(left, right) */
2643 match_arguments(&am, block, left, right, NULL,
2644 match_commutative | match_am | match_8bit_am |
2645 match_16bit_am | match_am_and_immediates |
2646 match_immediate | match_8bit | match_16bit);
2647 if (get_mode_size_bits(cmp_mode) == 8) {
2648 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2649 addr->index, addr->mem, am.new_op1,
2650 am.new_op2, am.ins_permuted,
2653 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2654 addr->index, addr->mem, am.new_op1,
2655 am.new_op2, am.ins_permuted, cmp_unsigned);
2658 set_am_attributes(new_node, &am);
2659 assert(cmp_mode != NULL);
2660 set_ia32_ls_mode(new_node, cmp_mode);
2662 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2664 new_node = fix_mem_proj(new_node, &am);
2669 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2672 ir_graph *irg = current_ir_graph;
2673 dbg_info *dbgi = get_irn_dbg_info(node);
2674 ir_node *block = get_nodes_block(node);
2675 ir_node *new_block = be_transform_node(block);
2676 ir_node *val_true = get_Psi_val(node, 0);
2677 ir_node *val_false = get_Psi_default(node);
2679 match_flags_t match_flags;
2680 ia32_address_mode_t am;
2681 ia32_address_t *addr;
2683 assert(ia32_cg_config.use_cmov);
2684 assert(mode_needs_gp_reg(get_irn_mode(val_true)));
2688 match_flags = match_commutative | match_am | match_16bit_am |
2691 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2693 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2694 addr->mem, am.new_op1, am.new_op2, new_flags,
2695 am.ins_permuted, pnc);
2696 set_am_attributes(new_node, &am);
2698 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2700 new_node = fix_mem_proj(new_node, &am);
2707 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2708 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2711 ir_graph *irg = current_ir_graph;
2712 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2713 ir_node *nomem = new_NoMem();
2714 ir_mode *mode = get_irn_mode(orig_node);
2717 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2718 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2720 /* we might need to conv the result up */
2721 if(get_mode_size_bits(mode) > 8) {
2722 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2723 nomem, new_node, mode_Bu);
2724 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2731 * Transforms a Psi node into CMov.
2733 * @return The transformed node.
2735 static ir_node *gen_Psi(ir_node *node)
2737 dbg_info *dbgi = get_irn_dbg_info(node);
2738 ir_node *block = get_nodes_block(node);
2739 ir_node *new_block = be_transform_node(block);
2740 ir_node *psi_true = get_Psi_val(node, 0);
2741 ir_node *psi_default = get_Psi_default(node);
2742 ir_node *cond = get_Psi_cond(node, 0);
2743 ir_node *flags = NULL;
2747 assert(get_Psi_n_conds(node) == 1);
2748 assert(get_irn_mode(cond) == mode_b);
2749 assert(mode_needs_gp_reg(get_irn_mode(node)));
2751 flags = get_flags_node(cond, &pnc);
2753 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2754 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 0);
2755 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2756 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 1);
2758 new_node = create_CMov(node, cond, flags, pnc);
2765 * Create a conversion from x87 state register to general purpose.
2767 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
2768 ir_node *block = be_transform_node(get_nodes_block(node));
2769 ir_node *op = get_Conv_op(node);
2770 ir_node *new_op = be_transform_node(op);
2771 ia32_code_gen_t *cg = env_cg;
2772 ir_graph *irg = current_ir_graph;
2773 dbg_info *dbgi = get_irn_dbg_info(node);
2774 ir_node *noreg = ia32_new_NoReg_gp(cg);
2775 ir_node *trunc_mode = ia32_new_Fpu_truncate(cg);
2776 ir_mode *mode = get_irn_mode(node);
2777 ir_node *fist, *load;
2780 fist = new_rd_ia32_vfist(dbgi, irg, block, get_irg_frame(irg), noreg,
2781 new_NoMem(), new_op, trunc_mode);
2783 set_irn_pinned(fist, op_pin_state_floats);
2784 set_ia32_use_frame(fist);
2785 set_ia32_op_type(fist, ia32_AddrModeD);
2787 assert(get_mode_size_bits(mode) <= 32);
2788 /* exception we can only store signed 32 bit integers, so for unsigned
2789 we store a 64bit (signed) integer and load the lower bits */
2790 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
2791 set_ia32_ls_mode(fist, mode_Ls);
2793 set_ia32_ls_mode(fist, mode_Is);
2795 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
2798 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, fist);
2800 set_irn_pinned(load, op_pin_state_floats);
2801 set_ia32_use_frame(load);
2802 set_ia32_op_type(load, ia32_AddrModeS);
2803 set_ia32_ls_mode(load, mode_Is);
2804 if(get_ia32_ls_mode(fist) == mode_Ls) {
2805 ia32_attr_t *attr = get_ia32_attr(load);
2806 attr->data.need_64bit_stackent = 1;
2808 ia32_attr_t *attr = get_ia32_attr(load);
2809 attr->data.need_32bit_stackent = 1;
2811 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
2813 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
2817 * Creates a x87 strict Conv by placing a Sore and a Load
2819 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
2821 ir_node *block = get_nodes_block(node);
2822 ir_graph *irg = current_ir_graph;
2823 dbg_info *dbgi = get_irn_dbg_info(node);
2824 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2825 ir_node *nomem = new_NoMem();
2826 ir_node *frame = get_irg_frame(irg);
2827 ir_node *store, *load;
2830 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
2832 set_ia32_use_frame(store);
2833 set_ia32_op_type(store, ia32_AddrModeD);
2834 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2836 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
2838 set_ia32_use_frame(load);
2839 set_ia32_op_type(load, ia32_AddrModeS);
2840 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
2842 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
2847 * Create a conversion from general purpose to x87 register
2849 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
2850 ir_node *src_block = get_nodes_block(node);
2851 ir_node *block = be_transform_node(src_block);
2852 ir_graph *irg = current_ir_graph;
2853 dbg_info *dbgi = get_irn_dbg_info(node);
2854 ir_node *op = get_Conv_op(node);
2855 ir_node *new_op = NULL;
2859 ir_mode *store_mode;
2865 /* fild can use source AM if the operand is a signed 32bit integer */
2866 if (src_mode == mode_Is) {
2867 ia32_address_mode_t am;
2869 match_arguments(&am, src_block, NULL, op, NULL,
2870 match_am | match_try_am);
2871 if (am.op_type == ia32_AddrModeS) {
2872 ia32_address_t *addr = &am.addr;
2874 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
2875 addr->index, addr->mem);
2876 new_node = new_r_Proj(irg, block, fild, mode_vfp,
2879 set_am_attributes(fild, &am);
2880 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
2882 fix_mem_proj(fild, &am);
2887 if(new_op == NULL) {
2888 new_op = be_transform_node(op);
2891 noreg = ia32_new_NoReg_gp(env_cg);
2892 nomem = new_NoMem();
2893 mode = get_irn_mode(op);
2895 /* first convert to 32 bit signed if necessary */
2896 src_bits = get_mode_size_bits(src_mode);
2897 if (src_bits == 8) {
2898 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
2900 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2902 } else if (src_bits < 32) {
2903 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
2905 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2909 assert(get_mode_size_bits(mode) == 32);
2912 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
2915 set_ia32_use_frame(store);
2916 set_ia32_op_type(store, ia32_AddrModeD);
2917 set_ia32_ls_mode(store, mode_Iu);
2919 /* exception for 32bit unsigned, do a 64bit spill+load */
2920 if(!mode_is_signed(mode)) {
2923 ir_node *zero_const = create_Immediate(NULL, 0, 0);
2925 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
2926 get_irg_frame(irg), noreg, nomem,
2929 set_ia32_use_frame(zero_store);
2930 set_ia32_op_type(zero_store, ia32_AddrModeD);
2931 add_ia32_am_offs_int(zero_store, 4);
2932 set_ia32_ls_mode(zero_store, mode_Iu);
2937 store = new_rd_Sync(dbgi, irg, block, 2, in);
2938 store_mode = mode_Ls;
2940 store_mode = mode_Is;
2944 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
2946 set_ia32_use_frame(fild);
2947 set_ia32_op_type(fild, ia32_AddrModeS);
2948 set_ia32_ls_mode(fild, store_mode);
2950 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
2956 * Create a conversion from one integer mode into another one
2958 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
2959 dbg_info *dbgi, ir_node *block, ir_node *op,
2962 ir_graph *irg = current_ir_graph;
2963 int src_bits = get_mode_size_bits(src_mode);
2964 int tgt_bits = get_mode_size_bits(tgt_mode);
2965 ir_node *new_block = be_transform_node(block);
2967 ir_mode *smaller_mode;
2969 ia32_address_mode_t am;
2970 ia32_address_t *addr = &am.addr;
2973 if (src_bits < tgt_bits) {
2974 smaller_mode = src_mode;
2975 smaller_bits = src_bits;
2977 smaller_mode = tgt_mode;
2978 smaller_bits = tgt_bits;
2981 #ifdef DEBUG_libfirm
2983 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
2988 match_arguments(&am, block, NULL, op, NULL,
2989 match_8bit | match_16bit |
2990 match_am | match_8bit_am | match_16bit_am);
2991 if (smaller_bits == 8) {
2992 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
2993 addr->index, addr->mem, am.new_op2,
2996 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
2997 addr->index, addr->mem, am.new_op2,
3000 set_am_attributes(new_node, &am);
3001 /* match_arguments assume that out-mode = in-mode, this isn't true here
3003 set_ia32_ls_mode(new_node, smaller_mode);
3004 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3005 new_node = fix_mem_proj(new_node, &am);
3010 * Transforms a Conv node.
3012 * @return The created ia32 Conv node
3014 static ir_node *gen_Conv(ir_node *node) {
3015 ir_node *block = get_nodes_block(node);
3016 ir_node *new_block = be_transform_node(block);
3017 ir_node *op = get_Conv_op(node);
3018 ir_node *new_op = NULL;
3019 ir_graph *irg = current_ir_graph;
3020 dbg_info *dbgi = get_irn_dbg_info(node);
3021 ir_mode *src_mode = get_irn_mode(op);
3022 ir_mode *tgt_mode = get_irn_mode(node);
3023 int src_bits = get_mode_size_bits(src_mode);
3024 int tgt_bits = get_mode_size_bits(tgt_mode);
3025 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3026 ir_node *nomem = new_rd_NoMem(irg);
3027 ir_node *res = NULL;
3029 if (src_mode == mode_b) {
3030 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3031 /* nothing to do, we already model bools as 0/1 ints */
3032 return be_transform_node(op);
3035 if (src_mode == tgt_mode) {
3036 if (get_Conv_strict(node)) {
3037 if (ia32_cg_config.use_sse2) {
3038 /* when we are in SSE mode, we can kill all strict no-op conversion */
3039 return be_transform_node(op);
3042 /* this should be optimized already, but who knows... */
3043 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3044 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3045 return be_transform_node(op);
3049 if (mode_is_float(src_mode)) {
3050 new_op = be_transform_node(op);
3051 /* we convert from float ... */
3052 if (mode_is_float(tgt_mode)) {
3053 if(src_mode == mode_E && tgt_mode == mode_D
3054 && !get_Conv_strict(node)) {
3055 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3060 if (ia32_cg_config.use_sse2) {
3061 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3062 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3064 set_ia32_ls_mode(res, tgt_mode);
3066 if(get_Conv_strict(node)) {
3067 res = gen_x87_strict_conv(tgt_mode, new_op);
3068 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3071 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3076 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3077 if (ia32_cg_config.use_sse2) {
3078 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3080 set_ia32_ls_mode(res, src_mode);
3082 return gen_x87_fp_to_gp(node);
3086 /* we convert from int ... */
3087 if (mode_is_float(tgt_mode)) {
3089 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3090 if (ia32_cg_config.use_sse2) {
3091 new_op = be_transform_node(op);
3092 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3094 set_ia32_ls_mode(res, tgt_mode);
3096 res = gen_x87_gp_to_fp(node, src_mode);
3097 if(get_Conv_strict(node)) {
3098 res = gen_x87_strict_conv(tgt_mode, res);
3099 SET_IA32_ORIG_NODE(get_Proj_pred(res),
3100 ia32_get_old_node_name(env_cg, node));
3104 } else if(tgt_mode == mode_b) {
3105 /* mode_b lowering already took care that we only have 0/1 values */
3106 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3107 src_mode, tgt_mode));
3108 return be_transform_node(op);
3111 if (src_bits == tgt_bits) {
3112 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3113 src_mode, tgt_mode));
3114 return be_transform_node(op);
3117 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3125 static int check_immediate_constraint(long val, char immediate_constraint_type)
3127 switch (immediate_constraint_type) {
3131 return val >= 0 && val <= 32;
3133 return val >= 0 && val <= 63;
3135 return val >= -128 && val <= 127;
3137 return val == 0xff || val == 0xffff;
3139 return val >= 0 && val <= 3;
3141 return val >= 0 && val <= 255;
3143 return val >= 0 && val <= 127;
3147 panic("Invalid immediate constraint found");
3151 static ir_node *try_create_Immediate(ir_node *node,
3152 char immediate_constraint_type)
3155 tarval *offset = NULL;
3156 int offset_sign = 0;
3158 ir_entity *symconst_ent = NULL;
3159 int symconst_sign = 0;
3161 ir_node *cnst = NULL;
3162 ir_node *symconst = NULL;
3165 mode = get_irn_mode(node);
3166 if(!mode_is_int(mode) && !mode_is_reference(mode)) {
3170 if(is_Minus(node)) {
3172 node = get_Minus_op(node);
3175 if(is_Const(node)) {
3178 offset_sign = minus;
3179 } else if(is_SymConst(node)) {
3182 symconst_sign = minus;
3183 } else if(is_Add(node)) {
3184 ir_node *left = get_Add_left(node);
3185 ir_node *right = get_Add_right(node);
3186 if(is_Const(left) && is_SymConst(right)) {
3189 symconst_sign = minus;
3190 offset_sign = minus;
3191 } else if(is_SymConst(left) && is_Const(right)) {
3194 symconst_sign = minus;
3195 offset_sign = minus;
3197 } else if(is_Sub(node)) {
3198 ir_node *left = get_Sub_left(node);
3199 ir_node *right = get_Sub_right(node);
3200 if(is_Const(left) && is_SymConst(right)) {
3203 symconst_sign = !minus;
3204 offset_sign = minus;
3205 } else if(is_SymConst(left) && is_Const(right)) {
3208 symconst_sign = minus;
3209 offset_sign = !minus;
3216 offset = get_Const_tarval(cnst);
3217 if(tarval_is_long(offset)) {
3218 val = get_tarval_long(offset);
3220 ir_fprintf(stderr, "Optimisation Warning: tarval from %+F is not a "
3225 if(!check_immediate_constraint(val, immediate_constraint_type))
3228 if(symconst != NULL) {
3229 if(immediate_constraint_type != 0) {
3230 /* we need full 32bits for symconsts */
3234 /* unfortunately the assembler/linker doesn't support -symconst */
3238 if(get_SymConst_kind(symconst) != symconst_addr_ent)
3240 symconst_ent = get_SymConst_entity(symconst);
3242 if(cnst == NULL && symconst == NULL)
3245 if(offset_sign && offset != NULL) {
3246 offset = tarval_neg(offset);
3249 new_node = create_Immediate(symconst_ent, symconst_sign, val);
3254 static ir_node *create_immediate_or_transform(ir_node *node,
3255 char immediate_constraint_type)
3257 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3258 if (new_node == NULL) {
3259 new_node = be_transform_node(node);
3264 static const arch_register_req_t no_register_req = {
3265 arch_register_req_type_none,
3266 NULL, /* regclass */
3267 NULL, /* limit bitset */
3269 0 /* different pos */
3273 * An assembler constraint.
3275 typedef struct constraint_t constraint_t;
3276 struct constraint_t {
3279 const arch_register_req_t **out_reqs;
3281 const arch_register_req_t *req;
3282 unsigned immediate_possible;
3283 char immediate_type;
3286 static void parse_asm_constraint(int pos, constraint_t *constraint, const char *c)
3288 int immediate_possible = 0;
3289 char immediate_type = 0;
3290 unsigned limited = 0;
3291 const arch_register_class_t *cls = NULL;
3292 ir_graph *irg = current_ir_graph;
3293 struct obstack *obst = get_irg_obstack(irg);
3294 arch_register_req_t *req;
3295 unsigned *limited_ptr = NULL;
3299 /* TODO: replace all the asserts with nice error messages */
3302 /* a memory constraint: no need to do anything in backend about it
3303 * (the dependencies are already respected by the memory edge of
3305 constraint->req = &no_register_req;
3317 assert(cls == NULL ||
3318 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3319 cls = &ia32_reg_classes[CLASS_ia32_gp];
3320 limited |= 1 << REG_EAX;
3323 assert(cls == NULL ||
3324 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3325 cls = &ia32_reg_classes[CLASS_ia32_gp];
3326 limited |= 1 << REG_EBX;
3329 assert(cls == NULL ||
3330 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3331 cls = &ia32_reg_classes[CLASS_ia32_gp];
3332 limited |= 1 << REG_ECX;
3335 assert(cls == NULL ||
3336 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3337 cls = &ia32_reg_classes[CLASS_ia32_gp];
3338 limited |= 1 << REG_EDX;
3341 assert(cls == NULL ||
3342 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3343 cls = &ia32_reg_classes[CLASS_ia32_gp];
3344 limited |= 1 << REG_EDI;
3347 assert(cls == NULL ||
3348 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3349 cls = &ia32_reg_classes[CLASS_ia32_gp];
3350 limited |= 1 << REG_ESI;
3353 case 'q': /* q means lower part of the regs only, this makes no
3354 * difference to Q for us (we only assigne whole registers) */
3355 assert(cls == NULL ||
3356 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3357 cls = &ia32_reg_classes[CLASS_ia32_gp];
3358 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3362 assert(cls == NULL ||
3363 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3364 cls = &ia32_reg_classes[CLASS_ia32_gp];
3365 limited |= 1 << REG_EAX | 1 << REG_EDX;
3368 assert(cls == NULL ||
3369 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3370 cls = &ia32_reg_classes[CLASS_ia32_gp];
3371 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3372 1 << REG_EDX | 1 << REG_ESI | 1 << REG_EDI |
3379 assert(cls == NULL);
3380 cls = &ia32_reg_classes[CLASS_ia32_gp];
3386 /* TODO: mark values so the x87 simulator knows about t and u */
3387 assert(cls == NULL);
3388 cls = &ia32_reg_classes[CLASS_ia32_vfp];
3393 assert(cls == NULL);
3394 /* TODO: check that sse2 is supported */
3395 cls = &ia32_reg_classes[CLASS_ia32_xmm];
3405 assert(!immediate_possible);
3406 immediate_possible = 1;
3407 immediate_type = *c;
3411 assert(!immediate_possible);
3412 immediate_possible = 1;
3416 assert(!immediate_possible && cls == NULL);
3417 immediate_possible = 1;
3418 cls = &ia32_reg_classes[CLASS_ia32_gp];
3431 assert(constraint->is_in && "can only specify same constraint "
3434 sscanf(c, "%d%n", &same_as, &p);
3442 /* memory constraint no need to do anything in backend about it
3443 * (the dependencies are already respected by the memory edge of
3445 constraint->req = &no_register_req;
3448 case 'E': /* no float consts yet */
3449 case 'F': /* no float consts yet */
3450 case 's': /* makes no sense on x86 */
3451 case 'X': /* we can't support that in firm */
3454 case '<': /* no autodecrement on x86 */
3455 case '>': /* no autoincrement on x86 */
3456 case 'C': /* sse constant not supported yet */
3457 case 'G': /* 80387 constant not supported yet */
3458 case 'y': /* we don't support mmx registers yet */
3459 case 'Z': /* not available in 32 bit mode */
3460 case 'e': /* not available in 32 bit mode */
3461 panic("unsupported asm constraint '%c' found in (%+F)",
3462 *c, current_ir_graph);
3465 panic("unknown asm constraint '%c' found in (%+F)", *c,
3473 const arch_register_req_t *other_constr;
3475 assert(cls == NULL && "same as and register constraint not supported");
3476 assert(!immediate_possible && "same as and immediate constraint not "
3478 assert(same_as < constraint->n_outs && "wrong constraint number in "
3479 "same_as constraint");
3481 other_constr = constraint->out_reqs[same_as];
3483 req = obstack_alloc(obst, sizeof(req[0]));
3484 req->cls = other_constr->cls;
3485 req->type = arch_register_req_type_should_be_same;
3486 req->limited = NULL;
3487 req->other_same = 1U << pos;
3488 req->other_different = 0;
3490 /* switch constraints. This is because in firm we have same_as
3491 * constraints on the output constraints while in the gcc asm syntax
3492 * they are specified on the input constraints */
3493 constraint->req = other_constr;
3494 constraint->out_reqs[same_as] = req;
3495 constraint->immediate_possible = 0;
3499 if(immediate_possible && cls == NULL) {
3500 cls = &ia32_reg_classes[CLASS_ia32_gp];
3502 assert(!immediate_possible || cls == &ia32_reg_classes[CLASS_ia32_gp]);
3503 assert(cls != NULL);
3505 if(immediate_possible) {
3506 assert(constraint->is_in
3507 && "immediate make no sense for output constraints");
3509 /* todo: check types (no float input on 'r' constrained in and such... */
3512 req = obstack_alloc(obst, sizeof(req[0]) + sizeof(unsigned));
3513 limited_ptr = (unsigned*) (req+1);
3515 req = obstack_alloc(obst, sizeof(req[0]));
3517 memset(req, 0, sizeof(req[0]));
3520 req->type = arch_register_req_type_limited;
3521 *limited_ptr = limited;
3522 req->limited = limited_ptr;
3524 req->type = arch_register_req_type_normal;
3528 constraint->req = req;
3529 constraint->immediate_possible = immediate_possible;
3530 constraint->immediate_type = immediate_type;
3533 static void parse_clobber(ir_node *node, int pos, constraint_t *constraint,
3534 const char *clobber)
3536 ir_graph *irg = get_irn_irg(node);
3537 struct obstack *obst = get_irg_obstack(irg);
3538 const arch_register_t *reg = NULL;
3541 arch_register_req_t *req;
3542 const arch_register_class_t *cls;
3547 /* TODO: construct a hashmap instead of doing linear search for clobber
3549 for(c = 0; c < N_CLASSES; ++c) {
3550 cls = & ia32_reg_classes[c];
3551 for(r = 0; r < cls->n_regs; ++r) {
3552 const arch_register_t *temp_reg = arch_register_for_index(cls, r);
3553 if(strcmp(temp_reg->name, clobber) == 0
3554 || (c == CLASS_ia32_gp && strcmp(temp_reg->name+1, clobber) == 0)) {
3563 panic("Register '%s' mentioned in asm clobber is unknown\n", clobber);
3567 assert(reg->index < 32);
3569 limited = obstack_alloc(obst, sizeof(limited[0]));
3570 *limited = 1 << reg->index;
3572 req = obstack_alloc(obst, sizeof(req[0]));
3573 memset(req, 0, sizeof(req[0]));
3574 req->type = arch_register_req_type_limited;
3576 req->limited = limited;
3578 constraint->req = req;
3579 constraint->immediate_possible = 0;
3580 constraint->immediate_type = 0;
3583 static int is_memory_op(const ir_asm_constraint *constraint)
3585 ident *id = constraint->constraint;
3586 const char *str = get_id_str(id);
3589 for(c = str; *c != '\0'; ++c) {
3598 * generates code for a ASM node
3600 static ir_node *gen_ASM(ir_node *node)
3603 ir_graph *irg = current_ir_graph;
3604 ir_node *block = get_nodes_block(node);
3605 ir_node *new_block = be_transform_node(block);
3606 dbg_info *dbgi = get_irn_dbg_info(node);
3610 int n_out_constraints;
3612 const arch_register_req_t **out_reg_reqs;
3613 const arch_register_req_t **in_reg_reqs;
3614 ia32_asm_reg_t *register_map;
3615 unsigned reg_map_size = 0;
3616 struct obstack *obst;
3617 const ir_asm_constraint *in_constraints;
3618 const ir_asm_constraint *out_constraints;
3620 constraint_t parsed_constraint;
3622 arity = get_irn_arity(node);
3623 in = alloca(arity * sizeof(in[0]));
3624 memset(in, 0, arity * sizeof(in[0]));
3626 n_out_constraints = get_ASM_n_output_constraints(node);
3627 n_clobbers = get_ASM_n_clobbers(node);
3628 out_arity = n_out_constraints + n_clobbers;
3629 /* hack to keep space for mem proj */
3633 in_constraints = get_ASM_input_constraints(node);
3634 out_constraints = get_ASM_output_constraints(node);
3635 clobbers = get_ASM_clobbers(node);
3637 /* construct output constraints */
3638 obst = get_irg_obstack(irg);
3639 out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
3640 parsed_constraint.out_reqs = out_reg_reqs;
3641 parsed_constraint.n_outs = n_out_constraints;
3642 parsed_constraint.is_in = 0;
3644 for(i = 0; i < out_arity; ++i) {
3647 if(i < n_out_constraints) {
3648 const ir_asm_constraint *constraint = &out_constraints[i];
3649 c = get_id_str(constraint->constraint);
3650 parse_asm_constraint(i, &parsed_constraint, c);
3652 if(constraint->pos > reg_map_size)
3653 reg_map_size = constraint->pos;
3655 out_reg_reqs[i] = parsed_constraint.req;
3656 } else if(i < out_arity - 1) {
3657 ident *glob_id = clobbers [i - n_out_constraints];
3658 assert(glob_id != NULL);
3659 c = get_id_str(glob_id);
3660 parse_clobber(node, i, &parsed_constraint, c);
3662 out_reg_reqs[i+1] = parsed_constraint.req;
3666 out_reg_reqs[n_out_constraints] = &no_register_req;
3668 /* construct input constraints */
3669 in_reg_reqs = obstack_alloc(obst, arity * sizeof(in_reg_reqs[0]));
3670 parsed_constraint.is_in = 1;
3671 for(i = 0; i < arity; ++i) {
3672 const ir_asm_constraint *constraint = &in_constraints[i];
3673 ident *constr_id = constraint->constraint;
3674 const char *c = get_id_str(constr_id);
3676 parse_asm_constraint(i, &parsed_constraint, c);
3677 in_reg_reqs[i] = parsed_constraint.req;
3679 if(constraint->pos > reg_map_size)
3680 reg_map_size = constraint->pos;
3682 if(parsed_constraint.immediate_possible) {
3683 ir_node *pred = get_irn_n(node, i);
3684 char imm_type = parsed_constraint.immediate_type;
3685 ir_node *immediate = try_create_Immediate(pred, imm_type);
3687 if(immediate != NULL) {
3694 register_map = NEW_ARR_D(ia32_asm_reg_t, obst, reg_map_size);
3695 memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
3697 for(i = 0; i < n_out_constraints; ++i) {
3698 const ir_asm_constraint *constraint = &out_constraints[i];
3699 unsigned pos = constraint->pos;
3701 assert(pos < reg_map_size);
3702 register_map[pos].use_input = 0;
3703 register_map[pos].valid = 1;
3704 register_map[pos].memory = is_memory_op(constraint);
3705 register_map[pos].inout_pos = i;
3706 register_map[pos].mode = constraint->mode;
3709 /* transform inputs */
3710 for(i = 0; i < arity; ++i) {
3711 const ir_asm_constraint *constraint = &in_constraints[i];
3712 unsigned pos = constraint->pos;
3713 ir_node *pred = get_irn_n(node, i);
3714 ir_node *transformed;
3716 assert(pos < reg_map_size);
3717 register_map[pos].use_input = 1;
3718 register_map[pos].valid = 1;
3719 register_map[pos].memory = is_memory_op(constraint);
3720 register_map[pos].inout_pos = i;
3721 register_map[pos].mode = constraint->mode;
3726 transformed = be_transform_node(pred);
3727 in[i] = transformed;
3730 new_node = new_rd_ia32_Asm(dbgi, irg, new_block, arity, in, out_arity,
3731 get_ASM_text(node), register_map);
3733 set_ia32_out_req_all(new_node, out_reg_reqs);
3734 set_ia32_in_req_all(new_node, in_reg_reqs);
3736 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3742 * Transforms a FrameAddr into an ia32 Add.
3744 static ir_node *gen_be_FrameAddr(ir_node *node) {
3745 ir_node *block = be_transform_node(get_nodes_block(node));
3746 ir_node *op = be_get_FrameAddr_frame(node);
3747 ir_node *new_op = be_transform_node(op);
3748 ir_graph *irg = current_ir_graph;
3749 dbg_info *dbgi = get_irn_dbg_info(node);
3750 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3753 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3754 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3755 set_ia32_use_frame(new_node);
3757 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3763 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3765 static ir_node *gen_be_Return(ir_node *node) {
3766 ir_graph *irg = current_ir_graph;
3767 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3768 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3769 ir_entity *ent = get_irg_entity(irg);
3770 ir_type *tp = get_entity_type(ent);
3775 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3776 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3779 int pn_ret_val, pn_ret_mem, arity, i;
3781 assert(ret_val != NULL);
3782 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3783 return be_duplicate_node(node);
3786 res_type = get_method_res_type(tp, 0);
3788 if (! is_Primitive_type(res_type)) {
3789 return be_duplicate_node(node);
3792 mode = get_type_mode(res_type);
3793 if (! mode_is_float(mode)) {
3794 return be_duplicate_node(node);
3797 assert(get_method_n_ress(tp) == 1);
3799 pn_ret_val = get_Proj_proj(ret_val);
3800 pn_ret_mem = get_Proj_proj(ret_mem);
3802 /* get the Barrier */
3803 barrier = get_Proj_pred(ret_val);
3805 /* get result input of the Barrier */
3806 ret_val = get_irn_n(barrier, pn_ret_val);
3807 new_ret_val = be_transform_node(ret_val);
3809 /* get memory input of the Barrier */
3810 ret_mem = get_irn_n(barrier, pn_ret_mem);
3811 new_ret_mem = be_transform_node(ret_mem);
3813 frame = get_irg_frame(irg);
3815 dbgi = get_irn_dbg_info(barrier);
3816 block = be_transform_node(get_nodes_block(barrier));
3818 noreg = ia32_new_NoReg_gp(env_cg);
3820 /* store xmm0 onto stack */
3821 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3822 new_ret_mem, new_ret_val);
3823 set_ia32_ls_mode(sse_store, mode);
3824 set_ia32_op_type(sse_store, ia32_AddrModeD);
3825 set_ia32_use_frame(sse_store);
3827 /* load into x87 register */
3828 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3829 set_ia32_op_type(fld, ia32_AddrModeS);
3830 set_ia32_use_frame(fld);
3832 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3833 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3835 /* create a new barrier */
3836 arity = get_irn_arity(barrier);
3837 in = alloca(arity * sizeof(in[0]));
3838 for (i = 0; i < arity; ++i) {
3841 if (i == pn_ret_val) {
3843 } else if (i == pn_ret_mem) {
3846 ir_node *in = get_irn_n(barrier, i);
3847 new_in = be_transform_node(in);
3852 new_barrier = new_ir_node(dbgi, irg, block,
3853 get_irn_op(barrier), get_irn_mode(barrier),
3855 copy_node_attr(barrier, new_barrier);
3856 be_duplicate_deps(barrier, new_barrier);
3857 be_set_transformed_node(barrier, new_barrier);
3858 mark_irn_visited(barrier);
3860 /* transform normally */
3861 return be_duplicate_node(node);
3865 * Transform a be_AddSP into an ia32_SubSP.
3867 static ir_node *gen_be_AddSP(ir_node *node)
3869 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3870 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3872 return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
3876 * Transform a be_SubSP into an ia32_AddSP
3878 static ir_node *gen_be_SubSP(ir_node *node)
3880 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3881 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3883 return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
3887 * This function just sets the register for the Unknown node
3888 * as this is not done during register allocation because Unknown
3889 * is an "ignore" node.
3891 static ir_node *gen_Unknown(ir_node *node) {
3892 ir_mode *mode = get_irn_mode(node);
3894 if (mode_is_float(mode)) {
3895 if (ia32_cg_config.use_sse2) {
3896 return ia32_new_Unknown_xmm(env_cg);
3898 /* Unknown nodes are buggy in x87 sim, use zero for now... */
3899 ir_graph *irg = current_ir_graph;
3900 dbg_info *dbgi = get_irn_dbg_info(node);
3901 ir_node *block = get_irg_start_block(irg);
3902 return new_rd_ia32_vfldz(dbgi, irg, block);
3904 } else if (mode_needs_gp_reg(mode)) {
3905 return ia32_new_Unknown_gp(env_cg);
3907 panic("unsupported Unknown-Mode");
3913 * Change some phi modes
3915 static ir_node *gen_Phi(ir_node *node) {
3916 ir_node *block = be_transform_node(get_nodes_block(node));
3917 ir_graph *irg = current_ir_graph;
3918 dbg_info *dbgi = get_irn_dbg_info(node);
3919 ir_mode *mode = get_irn_mode(node);
3922 if(mode_needs_gp_reg(mode)) {
3923 /* we shouldn't have any 64bit stuff around anymore */
3924 assert(get_mode_size_bits(mode) <= 32);
3925 /* all integer operations are on 32bit registers now */
3927 } else if(mode_is_float(mode)) {
3928 if (ia32_cg_config.use_sse2) {
3935 /* phi nodes allow loops, so we use the old arguments for now
3936 * and fix this later */
3937 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3938 get_irn_in(node) + 1);
3939 copy_node_attr(node, phi);
3940 be_duplicate_deps(node, phi);
3942 be_set_transformed_node(node, phi);
3943 be_enqueue_preds(node);
3951 static ir_node *gen_IJmp(ir_node *node)
3953 ir_node *block = get_nodes_block(node);
3954 ir_node *new_block = be_transform_node(block);
3955 ir_graph *irg = current_ir_graph;
3956 dbg_info *dbgi = get_irn_dbg_info(node);
3957 ir_node *op = get_IJmp_target(node);
3959 ia32_address_mode_t am;
3960 ia32_address_t *addr = &am.addr;
3962 assert(get_irn_mode(op) == mode_P);
3964 match_arguments(&am, block, NULL, op, NULL,
3965 match_am | match_8bit_am | match_16bit_am |
3966 match_immediate | match_8bit | match_16bit);
3968 new_node = new_rd_ia32_IJmp(dbgi, irg, new_block, addr->base, addr->index,
3969 addr->mem, am.new_op2);
3970 set_am_attributes(new_node, &am);
3971 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3973 new_node = fix_mem_proj(new_node, &am);
3978 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
3981 typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
3982 ir_node *val, ir_node *mem);
3985 * Transforms a lowered Load into a "real" one.
3987 static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
3989 ir_node *block = be_transform_node(get_nodes_block(node));
3990 ir_node *ptr = get_irn_n(node, 0);
3991 ir_node *new_ptr = be_transform_node(ptr);
3992 ir_node *mem = get_irn_n(node, 1);
3993 ir_node *new_mem = be_transform_node(mem);
3994 ir_graph *irg = current_ir_graph;
3995 dbg_info *dbgi = get_irn_dbg_info(node);
3996 ir_mode *mode = get_ia32_ls_mode(node);
3997 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4000 new_op = func(dbgi, irg, block, new_ptr, noreg, new_mem);
4002 set_ia32_op_type(new_op, ia32_AddrModeS);
4003 set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
4004 set_ia32_am_scale(new_op, get_ia32_am_scale(node));
4005 set_ia32_am_sc(new_op, get_ia32_am_sc(node));
4006 if (is_ia32_am_sc_sign(node))
4007 set_ia32_am_sc_sign(new_op);
4008 set_ia32_ls_mode(new_op, mode);
4009 if (is_ia32_use_frame(node)) {
4010 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4011 set_ia32_use_frame(new_op);
4014 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4020 * Transforms a lowered Store into a "real" one.
4022 static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
4024 ir_node *block = be_transform_node(get_nodes_block(node));
4025 ir_node *ptr = get_irn_n(node, 0);
4026 ir_node *new_ptr = be_transform_node(ptr);
4027 ir_node *val = get_irn_n(node, 1);
4028 ir_node *new_val = be_transform_node(val);
4029 ir_node *mem = get_irn_n(node, 2);
4030 ir_node *new_mem = be_transform_node(mem);
4031 ir_graph *irg = current_ir_graph;
4032 dbg_info *dbgi = get_irn_dbg_info(node);
4033 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4034 ir_mode *mode = get_ia32_ls_mode(node);
4038 new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
4040 am_offs = get_ia32_am_offs_int(node);
4041 add_ia32_am_offs_int(new_op, am_offs);
4043 set_ia32_op_type(new_op, ia32_AddrModeD);
4044 set_ia32_ls_mode(new_op, mode);
4045 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4046 set_ia32_use_frame(new_op);
4048 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4053 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4055 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4056 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4058 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
4059 match_immediate | match_mode_neutral);
4062 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4064 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4065 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4066 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
4070 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4072 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4073 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4074 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
4078 static ir_node *gen_ia32_l_Add(ir_node *node) {
4079 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4080 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4081 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
4082 match_commutative | match_am | match_immediate |
4083 match_mode_neutral);
4085 if(is_Proj(lowered)) {
4086 lowered = get_Proj_pred(lowered);
4088 assert(is_ia32_Add(lowered));
4089 set_irn_mode(lowered, mode_T);
4095 static ir_node *gen_ia32_l_Adc(ir_node *node)
4097 return gen_binop_flags(node, new_rd_ia32_Adc,
4098 match_commutative | match_am | match_immediate |
4099 match_mode_neutral);
4103 * Transforms an ia32_l_vfild into a "real" ia32_vfild node
4105 * @param node The node to transform
4106 * @return the created ia32 vfild node
4108 static ir_node *gen_ia32_l_vfild(ir_node *node) {
4109 return gen_lowered_Load(node, new_rd_ia32_vfild);
4113 * Transforms an ia32_l_Load into a "real" ia32_Load node
4115 * @param node The node to transform
4116 * @return the created ia32 Load node
4118 static ir_node *gen_ia32_l_Load(ir_node *node) {
4119 return gen_lowered_Load(node, new_rd_ia32_Load);
4123 * Transforms an ia32_l_Store into a "real" ia32_Store node
4125 * @param node The node to transform
4126 * @return the created ia32 Store node
4128 static ir_node *gen_ia32_l_Store(ir_node *node) {
4129 return gen_lowered_Store(node, new_rd_ia32_Store);
4133 * Transforms a l_vfist into a "real" vfist node.
4135 * @param node The node to transform
4136 * @return the created ia32 vfist node
4138 static ir_node *gen_ia32_l_vfist(ir_node *node) {
4139 ir_node *block = be_transform_node(get_nodes_block(node));
4140 ir_node *ptr = get_irn_n(node, 0);
4141 ir_node *new_ptr = be_transform_node(ptr);
4142 ir_node *val = get_irn_n(node, 1);
4143 ir_node *new_val = be_transform_node(val);
4144 ir_node *mem = get_irn_n(node, 2);
4145 ir_node *new_mem = be_transform_node(mem);
4146 ir_graph *irg = current_ir_graph;
4147 dbg_info *dbgi = get_irn_dbg_info(node);
4148 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4149 ir_mode *mode = get_ia32_ls_mode(node);
4150 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4154 new_op = new_rd_ia32_vfist(dbgi, irg, block, new_ptr, noreg, new_mem,
4155 new_val, trunc_mode);
4157 am_offs = get_ia32_am_offs_int(node);
4158 add_ia32_am_offs_int(new_op, am_offs);
4160 set_ia32_op_type(new_op, ia32_AddrModeD);
4161 set_ia32_ls_mode(new_op, mode);
4162 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4163 set_ia32_use_frame(new_op);
4165 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4171 * Transforms a l_MulS into a "real" MulS node.
4173 * @return the created ia32 Mul node
4175 static ir_node *gen_ia32_l_Mul(ir_node *node) {
4176 ir_node *left = get_binop_left(node);
4177 ir_node *right = get_binop_right(node);
4179 return gen_binop(node, left, right, new_rd_ia32_Mul,
4180 match_commutative | match_am | match_mode_neutral);
4184 * Transforms a l_IMulS into a "real" IMul1OPS node.
4186 * @return the created ia32 IMul1OP node
4188 static ir_node *gen_ia32_l_IMul(ir_node *node) {
4189 ir_node *left = get_binop_left(node);
4190 ir_node *right = get_binop_right(node);
4192 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
4193 match_commutative | match_am | match_mode_neutral);
4196 static ir_node *gen_ia32_l_Sub(ir_node *node) {
4197 ir_node *left = get_irn_n(node, n_ia32_l_Sub_left);
4198 ir_node *right = get_irn_n(node, n_ia32_l_Sub_right);
4199 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
4200 match_am | match_immediate | match_mode_neutral);
4202 if(is_Proj(lowered)) {
4203 lowered = get_Proj_pred(lowered);
4205 assert(is_ia32_Sub(lowered));
4206 set_irn_mode(lowered, mode_T);
4212 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
4213 return gen_binop_flags(node, new_rd_ia32_Sbb,
4214 match_am | match_immediate | match_mode_neutral);
4218 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4219 * op1 - target to be shifted
4220 * op2 - contains bits to be shifted into target
4222 * Only op3 can be an immediate.
4224 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4225 ir_node *low, ir_node *count)
4227 ir_node *block = get_nodes_block(node);
4228 ir_node *new_block = be_transform_node(block);
4229 ir_graph *irg = current_ir_graph;
4230 dbg_info *dbgi = get_irn_dbg_info(node);
4231 ir_node *new_high = be_transform_node(high);
4232 ir_node *new_low = be_transform_node(low);
4236 /* the shift amount can be any mode that is bigger than 5 bits, since all
4237 * other bits are ignored anyway */
4238 while (is_Conv(count) && get_irn_n_edges(count) == 1) {
4239 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4240 count = get_Conv_op(count);
4242 new_count = create_immediate_or_transform(count, 0);
4244 if (is_ia32_l_ShlD(node)) {
4245 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
4248 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
4251 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4256 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4258 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4259 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4260 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4261 return gen_lowered_64bit_shifts(node, high, low, count);
4264 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4266 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4267 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4268 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4269 return gen_lowered_64bit_shifts(node, high, low, count);
4272 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
4273 ir_node *src_block = get_nodes_block(node);
4274 ir_node *block = be_transform_node(src_block);
4275 ir_graph *irg = current_ir_graph;
4276 dbg_info *dbgi = get_irn_dbg_info(node);
4277 ir_node *frame = get_irg_frame(irg);
4278 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4279 ir_node *nomem = new_NoMem();
4280 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4281 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4282 ir_node *new_val_low = be_transform_node(val_low);
4283 ir_node *new_val_high = be_transform_node(val_high);
4288 ir_node *store_high;
4290 if(!mode_is_signed(get_irn_mode(val_high))) {
4291 panic("unsigned long long -> float not supported yet (%+F)", node);
4295 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4297 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4299 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
4300 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
4302 set_ia32_use_frame(store_low);
4303 set_ia32_use_frame(store_high);
4304 set_ia32_op_type(store_low, ia32_AddrModeD);
4305 set_ia32_op_type(store_high, ia32_AddrModeD);
4306 set_ia32_ls_mode(store_low, mode_Iu);
4307 set_ia32_ls_mode(store_high, mode_Is);
4308 add_ia32_am_offs_int(store_high, 4);
4312 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4315 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
4317 set_ia32_use_frame(fild);
4318 set_ia32_op_type(fild, ia32_AddrModeS);
4319 set_ia32_ls_mode(fild, mode_Ls);
4321 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
4323 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4326 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
4327 ir_node *src_block = get_nodes_block(node);
4328 ir_node *block = be_transform_node(src_block);
4329 ir_graph *irg = current_ir_graph;
4330 dbg_info *dbgi = get_irn_dbg_info(node);
4331 ir_node *frame = get_irg_frame(irg);
4332 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4333 ir_node *nomem = new_NoMem();
4334 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4335 ir_node *new_val = be_transform_node(val);
4336 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4341 fist = new_rd_ia32_vfist(dbgi, irg, block, frame, noreg, nomem, new_val,
4343 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
4344 set_ia32_use_frame(fist);
4345 set_ia32_op_type(fist, ia32_AddrModeD);
4346 set_ia32_ls_mode(fist, mode_Ls);
4352 * the BAD transformer.
4354 static ir_node *bad_transform(ir_node *node) {
4355 panic("No transform function for %+F available.\n", node);
4359 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
4360 ir_graph *irg = current_ir_graph;
4361 ir_node *block = be_transform_node(get_nodes_block(node));
4362 ir_node *pred = get_Proj_pred(node);
4363 ir_node *new_pred = be_transform_node(pred);
4364 ir_node *frame = get_irg_frame(irg);
4365 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4366 dbg_info *dbgi = get_irn_dbg_info(node);
4367 long pn = get_Proj_proj(node);
4372 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
4373 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
4374 set_ia32_use_frame(load);
4375 set_ia32_op_type(load, ia32_AddrModeS);
4376 set_ia32_ls_mode(load, mode_Iu);
4377 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4378 * 32 bit from it with this particular load */
4379 attr = get_ia32_attr(load);
4380 attr->data.need_64bit_stackent = 1;
4382 if (pn == pn_ia32_l_FloattoLL_res_high) {
4383 add_ia32_am_offs_int(load, 4);
4385 assert(pn == pn_ia32_l_FloattoLL_res_low);
4388 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4394 * Transform the Projs of an AddSP.
4396 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
4397 ir_node *block = be_transform_node(get_nodes_block(node));
4398 ir_node *pred = get_Proj_pred(node);
4399 ir_node *new_pred = be_transform_node(pred);
4400 ir_graph *irg = current_ir_graph;
4401 dbg_info *dbgi = get_irn_dbg_info(node);
4402 long proj = get_Proj_proj(node);
4404 if (proj == pn_be_AddSP_sp) {
4405 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4406 pn_ia32_SubSP_stack);
4407 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4409 } else if(proj == pn_be_AddSP_res) {
4410 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4411 pn_ia32_SubSP_addr);
4412 } else if (proj == pn_be_AddSP_M) {
4413 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4417 return new_rd_Unknown(irg, get_irn_mode(node));
4421 * Transform the Projs of a SubSP.
4423 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
4424 ir_node *block = be_transform_node(get_nodes_block(node));
4425 ir_node *pred = get_Proj_pred(node);
4426 ir_node *new_pred = be_transform_node(pred);
4427 ir_graph *irg = current_ir_graph;
4428 dbg_info *dbgi = get_irn_dbg_info(node);
4429 long proj = get_Proj_proj(node);
4431 if (proj == pn_be_SubSP_sp) {
4432 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4433 pn_ia32_AddSP_stack);
4434 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4436 } else if (proj == pn_be_SubSP_M) {
4437 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4441 return new_rd_Unknown(irg, get_irn_mode(node));
4445 * Transform and renumber the Projs from a Load.
4447 static ir_node *gen_Proj_Load(ir_node *node) {
4449 ir_node *block = be_transform_node(get_nodes_block(node));
4450 ir_node *pred = get_Proj_pred(node);
4451 ir_graph *irg = current_ir_graph;
4452 dbg_info *dbgi = get_irn_dbg_info(node);
4453 long proj = get_Proj_proj(node);
4456 /* loads might be part of source address mode matches, so we don't
4457 transform the ProjMs yet (with the exception of loads whose result is
4460 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4463 assert(pn_ia32_Load_M == 1); /* convention: mem-result of Source-AM
4465 /* this is needed, because sometimes we have loops that are only
4466 reachable through the ProjM */
4467 be_enqueue_preds(node);
4468 /* do it in 2 steps, to silence firm verifier */
4469 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4470 set_Proj_proj(res, pn_ia32_Load_M);
4474 /* renumber the proj */
4475 new_pred = be_transform_node(pred);
4476 if (is_ia32_Load(new_pred)) {
4479 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4481 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4482 case pn_Load_X_regular:
4483 return new_rd_Jmp(dbgi, irg, block);
4484 case pn_Load_X_except:
4485 /* This Load might raise an exception. Mark it. */
4486 set_ia32_exc_label(new_pred, 1);
4487 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4491 } else if (is_ia32_Conv_I2I(new_pred) ||
4492 is_ia32_Conv_I2I8Bit(new_pred)) {
4493 set_irn_mode(new_pred, mode_T);
4494 if (proj == pn_Load_res) {
4495 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4496 } else if (proj == pn_Load_M) {
4497 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4499 } else if (is_ia32_xLoad(new_pred)) {
4502 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4504 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4505 case pn_Load_X_regular:
4506 return new_rd_Jmp(dbgi, irg, block);
4507 case pn_Load_X_except:
4508 /* This Load might raise an exception. Mark it. */
4509 set_ia32_exc_label(new_pred, 1);
4510 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4514 } else if (is_ia32_vfld(new_pred)) {
4517 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4519 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4520 case pn_Load_X_regular:
4521 return new_rd_Jmp(dbgi, irg, block);
4522 case pn_Load_X_except:
4523 /* This Load might raise an exception. Mark it. */
4524 set_ia32_exc_label(new_pred, 1);
4525 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4530 /* can happen for ProJMs when source address mode happened for the
4533 /* however it should not be the result proj, as that would mean the
4534 load had multiple users and should not have been used for
4536 if (proj != pn_Load_M) {
4537 panic("internal error: transformed node not a Load");
4539 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4543 return new_rd_Unknown(irg, get_irn_mode(node));
4547 * Transform and renumber the Projs from a DivMod like instruction.
4549 static ir_node *gen_Proj_DivMod(ir_node *node) {
4550 ir_node *block = be_transform_node(get_nodes_block(node));
4551 ir_node *pred = get_Proj_pred(node);
4552 ir_node *new_pred = be_transform_node(pred);
4553 ir_graph *irg = current_ir_graph;
4554 dbg_info *dbgi = get_irn_dbg_info(node);
4555 ir_mode *mode = get_irn_mode(node);
4556 long proj = get_Proj_proj(node);
4558 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4560 switch (get_irn_opcode(pred)) {
4564 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4566 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4567 case pn_Div_X_regular:
4568 return new_rd_Jmp(dbgi, irg, block);
4569 case pn_Div_X_except:
4570 set_ia32_exc_label(new_pred, 1);
4571 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4579 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4581 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4582 case pn_Mod_X_except:
4583 set_ia32_exc_label(new_pred, 1);
4584 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4592 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4593 case pn_DivMod_res_div:
4594 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4595 case pn_DivMod_res_mod:
4596 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4597 case pn_DivMod_X_regular:
4598 return new_rd_Jmp(dbgi, irg, block);
4599 case pn_DivMod_X_except:
4600 set_ia32_exc_label(new_pred, 1);
4601 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4611 return new_rd_Unknown(irg, mode);
4615 * Transform and renumber the Projs from a CopyB.
4617 static ir_node *gen_Proj_CopyB(ir_node *node) {
4618 ir_node *block = be_transform_node(get_nodes_block(node));
4619 ir_node *pred = get_Proj_pred(node);
4620 ir_node *new_pred = be_transform_node(pred);
4621 ir_graph *irg = current_ir_graph;
4622 dbg_info *dbgi = get_irn_dbg_info(node);
4623 ir_mode *mode = get_irn_mode(node);
4624 long proj = get_Proj_proj(node);
4627 case pn_CopyB_M_regular:
4628 if (is_ia32_CopyB_i(new_pred)) {
4629 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4630 } else if (is_ia32_CopyB(new_pred)) {
4631 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4639 return new_rd_Unknown(irg, mode);
4643 * Transform and renumber the Projs from a Quot.
4645 static ir_node *gen_Proj_Quot(ir_node *node) {
4646 ir_node *block = be_transform_node(get_nodes_block(node));
4647 ir_node *pred = get_Proj_pred(node);
4648 ir_node *new_pred = be_transform_node(pred);
4649 ir_graph *irg = current_ir_graph;
4650 dbg_info *dbgi = get_irn_dbg_info(node);
4651 ir_mode *mode = get_irn_mode(node);
4652 long proj = get_Proj_proj(node);
4656 if (is_ia32_xDiv(new_pred)) {
4657 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4658 } else if (is_ia32_vfdiv(new_pred)) {
4659 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4663 if (is_ia32_xDiv(new_pred)) {
4664 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4665 } else if (is_ia32_vfdiv(new_pred)) {
4666 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4674 return new_rd_Unknown(irg, mode);
4678 * Transform the Thread Local Storage Proj.
4680 static ir_node *gen_Proj_tls(ir_node *node) {
4681 ir_node *block = be_transform_node(get_nodes_block(node));
4682 ir_graph *irg = current_ir_graph;
4683 dbg_info *dbgi = NULL;
4684 ir_node *res = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
4689 static ir_node *gen_be_Call(ir_node *node) {
4690 ir_node *res = be_duplicate_node(node);
4691 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4696 static ir_node *gen_be_IncSP(ir_node *node) {
4697 ir_node *res = be_duplicate_node(node);
4698 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4704 * Transform the Projs from a be_Call.
4706 static ir_node *gen_Proj_be_Call(ir_node *node) {
4707 ir_node *block = be_transform_node(get_nodes_block(node));
4708 ir_node *call = get_Proj_pred(node);
4709 ir_node *new_call = be_transform_node(call);
4710 ir_graph *irg = current_ir_graph;
4711 dbg_info *dbgi = get_irn_dbg_info(node);
4712 ir_type *method_type = be_Call_get_type(call);
4713 int n_res = get_method_n_ress(method_type);
4714 long proj = get_Proj_proj(node);
4715 ir_mode *mode = get_irn_mode(node);
4717 const arch_register_class_t *cls;
4719 /* The following is kinda tricky: If we're using SSE, then we have to
4720 * move the result value of the call in floating point registers to an
4721 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4722 * after the call, we have to make sure to correctly make the
4723 * MemProj and the result Proj use these 2 nodes
4725 if (proj == pn_be_Call_M_regular) {
4726 // get new node for result, are we doing the sse load/store hack?
4727 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4728 ir_node *call_res_new;
4729 ir_node *call_res_pred = NULL;
4731 if (call_res != NULL) {
4732 call_res_new = be_transform_node(call_res);
4733 call_res_pred = get_Proj_pred(call_res_new);
4736 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4737 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4738 pn_be_Call_M_regular);
4740 assert(is_ia32_xLoad(call_res_pred));
4741 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4745 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4746 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4748 ir_node *frame = get_irg_frame(irg);
4749 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4751 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4754 /* in case there is no memory output: create one to serialize the copy
4756 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4757 pn_be_Call_M_regular);
4758 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4759 pn_be_Call_first_res);
4761 /* store st(0) onto stack */
4762 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4764 set_ia32_op_type(fstp, ia32_AddrModeD);
4765 set_ia32_use_frame(fstp);
4767 /* load into SSE register */
4768 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4770 set_ia32_op_type(sse_load, ia32_AddrModeS);
4771 set_ia32_use_frame(sse_load);
4773 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4779 /* transform call modes */
4780 if (mode_is_data(mode)) {
4781 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4785 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4789 * Transform the Projs from a Cmp.
4791 static ir_node *gen_Proj_Cmp(ir_node *node)
4793 /* this probably means not all mode_b nodes were lowered... */
4794 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4799 * Transform and potentially renumber Proj nodes.
4801 static ir_node *gen_Proj(ir_node *node) {
4802 ir_graph *irg = current_ir_graph;
4803 dbg_info *dbgi = get_irn_dbg_info(node);
4804 ir_node *pred = get_Proj_pred(node);
4805 long proj = get_Proj_proj(node);
4807 if (is_Store(pred)) {
4808 if (proj == pn_Store_M) {
4809 return be_transform_node(pred);
4812 return new_r_Bad(irg);
4814 } else if (is_Load(pred)) {
4815 return gen_Proj_Load(node);
4816 } else if (is_Div(pred) || is_Mod(pred) || is_DivMod(pred)) {
4817 return gen_Proj_DivMod(node);
4818 } else if (is_CopyB(pred)) {
4819 return gen_Proj_CopyB(node);
4820 } else if (is_Quot(pred)) {
4821 return gen_Proj_Quot(node);
4822 } else if (be_is_SubSP(pred)) {
4823 return gen_Proj_be_SubSP(node);
4824 } else if (be_is_AddSP(pred)) {
4825 return gen_Proj_be_AddSP(node);
4826 } else if (be_is_Call(pred)) {
4827 return gen_Proj_be_Call(node);
4828 } else if (is_Cmp(pred)) {
4829 return gen_Proj_Cmp(node);
4830 } else if (get_irn_op(pred) == op_Start) {
4831 if (proj == pn_Start_X_initial_exec) {
4832 ir_node *block = get_nodes_block(pred);
4835 /* we exchange the ProjX with a jump */
4836 block = be_transform_node(block);
4837 jump = new_rd_Jmp(dbgi, irg, block);
4840 if (node == be_get_old_anchor(anchor_tls)) {
4841 return gen_Proj_tls(node);
4843 } else if (is_ia32_l_FloattoLL(pred)) {
4844 return gen_Proj_l_FloattoLL(node);
4846 } else if(!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4850 ir_node *new_pred = be_transform_node(pred);
4851 ir_node *block = be_transform_node(get_nodes_block(node));
4852 ir_mode *mode = get_irn_mode(node);
4853 if (mode_needs_gp_reg(mode)) {
4854 ir_node *new_proj = new_r_Proj(irg, block, new_pred, mode_Iu,
4855 get_Proj_proj(node));
4856 #ifdef DEBUG_libfirm
4857 new_proj->node_nr = node->node_nr;
4863 return be_duplicate_node(node);
4867 * Enters all transform functions into the generic pointer
4869 static void register_transformers(void)
4873 /* first clear the generic function pointer for all ops */
4874 clear_irp_opcodes_generic_func();
4876 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4877 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4915 /* transform ops from intrinsic lowering */
4931 GEN(ia32_l_LLtoFloat);
4932 GEN(ia32_l_FloattoLL);
4938 /* we should never see these nodes */
4953 /* handle generic backend nodes */
4962 op_Mulh = get_op_Mulh();
4971 * Pre-transform all unknown and noreg nodes.
4973 static void ia32_pretransform_node(void *arch_cg) {
4974 ia32_code_gen_t *cg = arch_cg;
4976 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4977 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4978 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4979 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4980 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4981 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4986 * Walker, checks if all ia32 nodes producing more than one result have
4987 * its Projs, other wise creates new projs and keep them using a be_Keep node.
4989 static void add_missing_keep_walker(ir_node *node, void *data)
4992 unsigned found_projs = 0;
4993 const ir_edge_t *edge;
4994 ir_mode *mode = get_irn_mode(node);
4999 if(!is_ia32_irn(node))
5002 n_outs = get_ia32_n_res(node);
5005 if(is_ia32_SwitchJmp(node))
5008 assert(n_outs < (int) sizeof(unsigned) * 8);
5009 foreach_out_edge(node, edge) {
5010 ir_node *proj = get_edge_src_irn(edge);
5011 int pn = get_Proj_proj(proj);
5013 assert(get_irn_mode(proj) == mode_M || pn < n_outs);
5014 found_projs |= 1 << pn;
5018 /* are keeps missing? */
5020 for(i = 0; i < n_outs; ++i) {
5023 const arch_register_req_t *req;
5024 const arch_register_class_t *class;
5026 if(found_projs & (1 << i)) {
5030 req = get_ia32_out_req(node, i);
5035 if(class == &ia32_reg_classes[CLASS_ia32_flags]) {
5039 block = get_nodes_block(node);
5040 in[0] = new_r_Proj(current_ir_graph, block, node,
5041 arch_register_class_mode(class), i);
5042 if(last_keep != NULL) {
5043 be_Keep_add_node(last_keep, class, in[0]);
5045 last_keep = be_new_Keep(class, current_ir_graph, block, 1, in);
5046 if(sched_is_scheduled(node)) {
5047 sched_add_after(node, last_keep);
5054 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5057 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5059 ir_graph *irg = be_get_birg_irg(cg->birg);
5060 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5063 /* do the transformation */
5064 void ia32_transform_graph(ia32_code_gen_t *cg) {
5066 ir_graph *irg = cg->irg;
5068 register_transformers();
5070 initial_fpcw = NULL;
5072 BE_TIMER_PUSH(t_heights);
5073 heights = heights_new(irg);
5074 BE_TIMER_POP(t_heights);
5075 ia32_calculate_non_address_mode_nodes(cg->birg);
5077 /* the transform phase is not safe for CSE (yet) because several nodes get
5078 * attributes set after their creation */
5079 cse_last = get_opt_cse();
5082 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
5084 set_opt_cse(cse_last);
5086 ia32_free_non_address_mode_nodes();
5087 heights_free(heights);
5091 void ia32_init_transform(void)
5093 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");