2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
35 #include "irgraph_t.h"
40 #include "iredges_t.h"
52 #include "../benode_t.h"
53 #include "../besched.h"
55 #include "../beutil.h"
56 #include "../beirg_t.h"
57 #include "../betranshlp.h"
60 #include "bearch_ia32_t.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
79 #define TP_SFP_SIGN "ia32_sfp_sign"
80 #define TP_DFP_SIGN "ia32_dfp_sign"
81 #define TP_SFP_ABS "ia32_sfp_abs"
82 #define TP_DFP_ABS "ia32_dfp_abs"
83 #define TP_INT_MAX "ia32_int_max"
85 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
86 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
87 #define ENT_SFP_ABS "IA32_SFP_ABS"
88 #define ENT_DFP_ABS "IA32_DFP_ABS"
89 #define ENT_INT_MAX "IA32_INT_MAX"
91 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
92 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
94 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
96 /** hold the current code generator during transformation */
97 static ia32_code_gen_t *env_cg = NULL;
98 static ir_node *initial_fpcw = NULL;
99 static heights_t *heights = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *try_create_Immediate(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_immediate_or_transform(ir_node *node,
132 char immediate_constraint_type);
134 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
135 dbg_info *dbgi, ir_node *block,
136 ir_node *op, ir_node *orig_node);
139 * Return true if a mode can be stored in the GP register set
141 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
142 if(mode == mode_fpcw)
144 if(get_mode_size_bits(mode) > 32)
146 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
150 * creates a unique ident by adding a number to a tag
152 * @param tag the tag string, must contain a %d if a number
155 static ident *unique_id(const char *tag)
157 static unsigned id = 0;
160 snprintf(str, sizeof(str), tag, ++id);
161 return new_id_from_str(str);
165 * Get a primitive type for a mode.
167 static ir_type *get_prim_type(pmap *types, ir_mode *mode)
169 pmap_entry *e = pmap_find(types, mode);
174 snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
175 res = new_type_primitive(new_id_from_str(buf), mode);
176 set_type_alignment_bytes(res, 16);
177 pmap_insert(types, mode, res);
185 * Creates an immediate.
187 * @param symconst if set, create a SymConst immediate
188 * @param symconst_sign sign for the symconst
189 * @param val integer value for the immediate
191 static ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
193 ir_graph *irg = current_ir_graph;
194 ir_node *start_block = get_irg_start_block(irg);
195 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block,
196 symconst, symconst_sign, val);
197 arch_set_irn_register(env_cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
203 * Get an atomic entity that is initialized with a tarval
205 static ir_entity *create_float_const_entity(ir_node *cnst)
207 ia32_isa_t *isa = env_cg->isa;
208 tarval *tv = get_Const_tarval(cnst);
209 pmap_entry *e = pmap_find(isa->tv_ent, tv);
214 ir_mode *mode = get_irn_mode(cnst);
215 ir_type *tp = get_Const_type(cnst);
216 if (tp == firm_unknown_type)
217 tp = get_prim_type(isa->types, mode);
219 res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
221 set_entity_ld_ident(res, get_entity_ident(res));
222 set_entity_visibility(res, visibility_local);
223 set_entity_variability(res, variability_constant);
224 set_entity_allocation(res, allocation_static);
226 /* we create a new entity here: It's initialization must resist on the
228 rem = current_ir_graph;
229 current_ir_graph = get_const_code_irg();
230 set_atomic_ent_value(res, new_Const_type(tv, tp));
231 current_ir_graph = rem;
233 pmap_insert(isa->tv_ent, tv, res);
241 static int is_Const_0(ir_node *node) {
242 return is_Const(node) && is_Const_null(node);
245 static int is_Const_1(ir_node *node) {
246 return is_Const(node) && is_Const_one(node);
249 static int is_Const_Minus_1(ir_node *node) {
250 return is_Const(node) && is_Const_all_one(node);
254 * returns true if constant can be created with a simple float command
256 static int is_simple_x87_Const(ir_node *node)
258 tarval *tv = get_Const_tarval(node);
260 if (tarval_is_null(tv) || tarval_is_one(tv))
263 /* TODO: match all the other float constants */
268 * returns true if constant can be created with a simple float command
270 static int is_simple_sse_Const(ir_node *node)
272 tarval *tv = get_Const_tarval(node);
273 ir_mode *mode = get_tarval_mode(tv);
278 if (tarval_is_null(tv) || tarval_is_one(tv))
281 if (mode == mode_D) {
282 unsigned val = get_tarval_sub_bits(tv, 0) |
283 (get_tarval_sub_bits(tv, 1) << 8) |
284 (get_tarval_sub_bits(tv, 2) << 16) |
285 (get_tarval_sub_bits(tv, 3) << 24);
287 /* really a 32bit constant */
291 /* TODO: match all the other float constants */
296 * Transforms a Const.
298 static ir_node *gen_Const(ir_node *node) {
299 ir_graph *irg = current_ir_graph;
300 ir_node *old_block = get_nodes_block(node);
301 ir_node *block = be_transform_node(old_block);
302 dbg_info *dbgi = get_irn_dbg_info(node);
303 ir_mode *mode = get_irn_mode(node);
305 assert(is_Const(node));
307 if (mode_is_float(mode)) {
309 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
310 ir_node *nomem = new_NoMem();
314 if (ia32_cg_config.use_sse2) {
315 tarval *tv = get_Const_tarval(node);
316 if (tarval_is_null(tv)) {
317 load = new_rd_ia32_xZero(dbgi, irg, block);
318 set_ia32_ls_mode(load, mode);
320 } else if (tarval_is_one(tv)) {
321 int cnst = mode == mode_F ? 26 : 55;
322 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
323 ir_node *imm2 = create_Immediate(NULL, 0, 2);
324 ir_node *pslld, *psrld;
326 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
327 set_ia32_ls_mode(load, mode);
328 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
329 set_ia32_ls_mode(pslld, mode);
330 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
331 set_ia32_ls_mode(psrld, mode);
333 } else if (mode == mode_F) {
334 /* we can place any 32bit constant by using a movd gp, sse */
335 unsigned val = get_tarval_sub_bits(tv, 0) |
336 (get_tarval_sub_bits(tv, 1) << 8) |
337 (get_tarval_sub_bits(tv, 2) << 16) |
338 (get_tarval_sub_bits(tv, 3) << 24);
339 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
340 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
341 set_ia32_ls_mode(load, mode);
344 if (mode == mode_D) {
345 unsigned val = get_tarval_sub_bits(tv, 0) |
346 (get_tarval_sub_bits(tv, 1) << 8) |
347 (get_tarval_sub_bits(tv, 2) << 16) |
348 (get_tarval_sub_bits(tv, 3) << 24);
350 ir_node *imm32 = create_Immediate(NULL, 0, 32);
351 ir_node *cnst, *psllq;
353 /* fine, lower 32bit are zero, produce 32bit value */
354 val = get_tarval_sub_bits(tv, 4) |
355 (get_tarval_sub_bits(tv, 5) << 8) |
356 (get_tarval_sub_bits(tv, 6) << 16) |
357 (get_tarval_sub_bits(tv, 7) << 24);
358 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
359 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
360 set_ia32_ls_mode(load, mode);
361 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
362 set_ia32_ls_mode(psllq, mode);
367 floatent = create_float_const_entity(node);
369 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
371 set_ia32_op_type(load, ia32_AddrModeS);
372 set_ia32_am_sc(load, floatent);
373 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
374 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
377 if (is_Const_null(node)) {
378 load = new_rd_ia32_vfldz(dbgi, irg, block);
380 } else if (is_Const_one(node)) {
381 load = new_rd_ia32_vfld1(dbgi, irg, block);
384 floatent = create_float_const_entity(node);
386 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
387 set_ia32_op_type(load, ia32_AddrModeS);
388 set_ia32_am_sc(load, floatent);
389 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
390 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
392 set_ia32_ls_mode(load, mode);
395 /* Const Nodes before the initial IncSP are a bad idea, because
396 * they could be spilled and we have no SP ready at that point yet.
397 * So add a dependency to the initial frame pointer calculation to
398 * avoid that situation.
400 if (get_irg_start_block(irg) == block) {
401 add_irn_dep(load, get_irg_frame(irg));
404 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
408 tarval *tv = get_Const_tarval(node);
411 tv = tarval_convert_to(tv, mode_Iu);
413 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
415 panic("couldn't convert constant tarval (%+F)", node);
417 val = get_tarval_long(tv);
419 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
420 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
423 if (get_irg_start_block(irg) == block) {
424 add_irn_dep(cnst, get_irg_frame(irg));
432 * Transforms a SymConst.
434 static ir_node *gen_SymConst(ir_node *node) {
435 ir_graph *irg = current_ir_graph;
436 ir_node *old_block = get_nodes_block(node);
437 ir_node *block = be_transform_node(old_block);
438 dbg_info *dbgi = get_irn_dbg_info(node);
439 ir_mode *mode = get_irn_mode(node);
442 if (mode_is_float(mode)) {
443 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
444 ir_node *nomem = new_NoMem();
446 if (ia32_cg_config.use_sse2)
447 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
449 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
450 set_ia32_am_sc(cnst, get_SymConst_entity(node));
451 set_ia32_use_frame(cnst);
455 if(get_SymConst_kind(node) != symconst_addr_ent) {
456 panic("backend only support symconst_addr_ent (at %+F)", node);
458 entity = get_SymConst_entity(node);
459 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
462 /* Const Nodes before the initial IncSP are a bad idea, because
463 * they could be spilled and we have no SP ready at that point yet
465 if (get_irg_start_block(irg) == block) {
466 add_irn_dep(cnst, get_irg_frame(irg));
469 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
474 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
475 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
476 static const struct {
478 const char *ent_name;
479 const char *cnst_str;
482 } names [ia32_known_const_max] = {
483 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
484 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
485 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
486 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
487 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
489 static ir_entity *ent_cache[ia32_known_const_max];
491 const char *tp_name, *ent_name, *cnst_str;
499 ent_name = names[kct].ent_name;
500 if (! ent_cache[kct]) {
501 tp_name = names[kct].tp_name;
502 cnst_str = names[kct].cnst_str;
504 switch (names[kct].mode) {
505 case 0: mode = mode_Iu; break;
506 case 1: mode = mode_Lu; break;
507 default: mode = mode_F; break;
509 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
510 tp = new_type_primitive(new_id_from_str(tp_name), mode);
511 /* set the specified alignment */
512 set_type_alignment_bytes(tp, names[kct].align);
514 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
516 set_entity_ld_ident(ent, get_entity_ident(ent));
517 set_entity_visibility(ent, visibility_local);
518 set_entity_variability(ent, variability_constant);
519 set_entity_allocation(ent, allocation_static);
521 /* we create a new entity here: It's initialization must resist on the
523 rem = current_ir_graph;
524 current_ir_graph = get_const_code_irg();
525 cnst = new_Const(mode, tv);
526 current_ir_graph = rem;
528 set_atomic_ent_value(ent, cnst);
530 /* cache the entry */
531 ent_cache[kct] = ent;
534 return ent_cache[kct];
539 * Prints the old node name on cg obst and returns a pointer to it.
541 const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
542 ia32_isa_t *isa = (ia32_isa_t *)cg->arch_env->isa;
544 lc_eoprintf(firm_get_arg_env(), isa->name_obst, "%+F", irn);
545 obstack_1grow(isa->name_obst, 0);
546 return obstack_finish(isa->name_obst);
551 * return true if the node is a Proj(Load) and could be used in source address
552 * mode for another node. Will return only true if the @p other node is not
553 * dependent on the memory of the Load (for binary operations use the other
554 * input here, for unary operations use NULL).
556 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
557 ir_node *other, ir_node *other2)
559 ir_mode *mode = get_irn_mode(node);
563 /* float constants are always available */
564 if (is_Const(node) && mode_is_float(mode)) {
565 if (ia32_cg_config.use_sse2) {
566 if (is_simple_sse_Const(node))
569 if (is_simple_x87_Const(node))
572 if (get_irn_n_edges(node) > 1)
579 load = get_Proj_pred(node);
580 pn = get_Proj_proj(node);
581 if(!is_Load(load) || pn != pn_Load_res)
583 if(get_nodes_block(load) != block)
585 /* we only use address mode if we're the only user of the load */
586 if(get_irn_n_edges(node) > 1)
588 /* in some edge cases with address mode we might reach the load normally
589 * and through some AM sequence, if it is already materialized then we
590 * can't create an AM node from it */
591 if(be_is_transformed(node))
594 /* don't do AM if other node inputs depend on the load (via mem-proj) */
595 if(other != NULL && get_nodes_block(other) == block
596 && heights_reachable_in_block(heights, other, load))
598 if(other2 != NULL && get_nodes_block(other2) == block
599 && heights_reachable_in_block(heights, other2, load))
605 typedef struct ia32_address_mode_t ia32_address_mode_t;
606 struct ia32_address_mode_t {
610 ia32_op_type_t op_type;
614 unsigned commutative : 1;
615 unsigned ins_permuted : 1;
618 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
620 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
622 /* construct load address */
623 memset(addr, 0, sizeof(addr[0]));
624 ia32_create_address_mode(addr, ptr, /*force=*/0);
626 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
627 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
628 addr->mem = be_transform_node(mem);
631 static void build_address(ia32_address_mode_t *am, ir_node *node)
633 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
634 ia32_address_t *addr = &am->addr;
640 if (is_Const(node)) {
641 ir_entity *entity = create_float_const_entity(node);
642 addr->base = noreg_gp;
643 addr->index = noreg_gp;
644 addr->mem = new_NoMem();
645 addr->symconst_ent = entity;
647 am->ls_mode = get_irn_mode(node);
648 am->pinned = op_pin_state_floats;
652 load = get_Proj_pred(node);
653 ptr = get_Load_ptr(load);
654 mem = get_Load_mem(load);
655 new_mem = be_transform_node(mem);
656 am->pinned = get_irn_pinned(load);
657 am->ls_mode = get_Load_mode(load);
658 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
660 /* construct load address */
661 ia32_create_address_mode(addr, ptr, /*force=*/0);
663 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
664 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
668 static void set_address(ir_node *node, const ia32_address_t *addr)
670 set_ia32_am_scale(node, addr->scale);
671 set_ia32_am_sc(node, addr->symconst_ent);
672 set_ia32_am_offs_int(node, addr->offset);
673 if(addr->symconst_sign)
674 set_ia32_am_sc_sign(node);
676 set_ia32_use_frame(node);
677 set_ia32_frame_ent(node, addr->frame_entity);
680 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
682 set_address(node, &am->addr);
684 set_ia32_op_type(node, am->op_type);
685 set_ia32_ls_mode(node, am->ls_mode);
686 if(am->pinned == op_pin_state_pinned && get_irn_pinned(node) != op_pin_state_pinned) {
687 set_irn_pinned(node, am->pinned);
690 set_ia32_commutative(node);
694 * Check, if a given node is a Down-Conv, ie. a integer Conv
695 * from a mode with a mode with more bits to a mode with lesser bits.
696 * Moreover, we return only true if the node has not more than 1 user.
698 * @param node the node
699 * @return non-zero if node is a Down-Conv
701 static int is_downconv(const ir_node *node)
709 /* we only want to skip the conv when we're the only user
710 * (not optimal but for now...)
712 if(get_irn_n_edges(node) > 1)
715 src_mode = get_irn_mode(get_Conv_op(node));
716 dest_mode = get_irn_mode(node);
717 return mode_needs_gp_reg(src_mode)
718 && mode_needs_gp_reg(dest_mode)
719 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
722 /* Skip all Down-Conv's on a given node and return the resulting node. */
723 ir_node *ia32_skip_downconv(ir_node *node) {
724 while (is_downconv(node))
725 node = get_Conv_op(node);
731 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
733 ir_mode *mode = get_irn_mode(node);
738 if(mode_is_signed(mode)) {
743 block = get_nodes_block(node);
744 dbgi = get_irn_dbg_info(node);
746 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
751 * matches operands of a node into ia32 addressing/operand modes. This covers
752 * usage of source address mode, immediates, operations with non 32-bit modes,
754 * The resulting data is filled into the @p am struct. block is the block
755 * of the node whose arguments are matched. op1, op2 are the first and second
756 * input that are matched (op1 may be NULL). other_op is another unrelated
757 * input that is not matched! but which is needed sometimes to check if AM
758 * for op1/op2 is legal.
759 * @p flags describes the supported modes of the operation in detail.
761 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
762 ir_node *op1, ir_node *op2, ir_node *other_op,
765 ia32_address_t *addr = &am->addr;
766 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
769 ir_mode *mode = get_irn_mode(op2);
771 unsigned commutative;
772 int use_am_and_immediates;
774 int mode_bits = get_mode_size_bits(mode);
776 memset(am, 0, sizeof(am[0]));
778 commutative = (flags & match_commutative) != 0;
779 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
780 use_am = (flags & match_am) != 0;
781 use_immediate = (flags & match_immediate) != 0;
782 assert(!use_am_and_immediates || use_immediate);
785 assert(!commutative || op1 != NULL);
786 assert(use_am || !(flags & match_8bit_am));
787 assert(use_am || !(flags & match_16bit_am));
789 if (mode_bits == 8) {
790 if (!(flags & match_8bit_am))
792 /* we don't automatically add upconvs yet */
793 assert((flags & match_mode_neutral) || (flags & match_8bit));
794 } else if (mode_bits == 16) {
795 if (!(flags & match_16bit_am))
797 /* we don't automatically add upconvs yet */
798 assert((flags & match_mode_neutral) || (flags & match_16bit));
801 /* we can simply skip downconvs for mode neutral nodes: the upper bits
802 * can be random for these operations */
803 if (flags & match_mode_neutral) {
804 op2 = ia32_skip_downconv(op2);
806 op1 = ia32_skip_downconv(op1);
810 /* match immediates. firm nodes are normalized: constants are always on the
813 if (!(flags & match_try_am) && use_immediate) {
814 new_op2 = try_create_Immediate(op2, 0);
817 if (new_op2 == NULL &&
818 use_am && ia32_use_source_address_mode(block, op2, op1, other_op)) {
819 build_address(am, op2);
820 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
821 if(mode_is_float(mode)) {
822 new_op2 = ia32_new_NoReg_vfp(env_cg);
826 am->op_type = ia32_AddrModeS;
827 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
829 ia32_use_source_address_mode(block, op1, op2, other_op)) {
831 build_address(am, op1);
833 if (mode_is_float(mode)) {
834 noreg = ia32_new_NoReg_vfp(env_cg);
839 if(new_op2 != NULL) {
842 new_op1 = be_transform_node(op2);
844 am->ins_permuted = 1;
846 am->op_type = ia32_AddrModeS;
848 if(flags & match_try_am) {
851 am->op_type = ia32_Normal;
855 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
857 new_op2 = be_transform_node(op2);
858 am->op_type = ia32_Normal;
859 am->ls_mode = get_irn_mode(op2);
860 if(flags & match_mode_neutral)
861 am->ls_mode = mode_Iu;
863 if(addr->base == NULL)
864 addr->base = noreg_gp;
865 if(addr->index == NULL)
866 addr->index = noreg_gp;
867 if(addr->mem == NULL)
868 addr->mem = new_NoMem();
870 am->new_op1 = new_op1;
871 am->new_op2 = new_op2;
872 am->commutative = commutative;
875 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
877 ir_graph *irg = current_ir_graph;
881 if(am->mem_proj == NULL)
884 /* we have to create a mode_T so the old MemProj can attach to us */
885 mode = get_irn_mode(node);
886 load = get_Proj_pred(am->mem_proj);
888 mark_irn_visited(load);
889 be_set_transformed_node(load, node);
892 set_irn_mode(node, mode_T);
893 return new_rd_Proj(NULL, irg, get_nodes_block(node), node, mode, pn_ia32_res);
900 * Construct a standard binary operation, set AM and immediate if required.
902 * @param op1 The first operand
903 * @param op2 The second operand
904 * @param func The node constructor function
905 * @return The constructed ia32 node.
907 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
908 construct_binop_func *func, match_flags_t flags)
910 ir_node *block = get_nodes_block(node);
911 ir_node *new_block = be_transform_node(block);
912 ir_graph *irg = current_ir_graph;
913 dbg_info *dbgi = get_irn_dbg_info(node);
915 ia32_address_mode_t am;
916 ia32_address_t *addr = &am.addr;
918 match_arguments(&am, block, op1, op2, NULL, flags);
920 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
921 am.new_op1, am.new_op2);
922 set_am_attributes(new_node, &am);
923 /* we can't use source address mode anymore when using immediates */
924 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
925 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
926 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
928 new_node = fix_mem_proj(new_node, &am);
935 n_ia32_l_binop_right,
936 n_ia32_l_binop_eflags
938 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
939 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
940 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
941 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
942 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
943 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
946 * Construct a binary operation which also consumes the eflags.
948 * @param node The node to transform
949 * @param func The node constructor function
950 * @param flags The match flags
951 * @return The constructor ia32 node
953 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
956 ir_node *src_block = get_nodes_block(node);
957 ir_node *block = be_transform_node(src_block);
958 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
959 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
960 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
961 ir_node *new_eflags = be_transform_node(eflags);
962 ir_graph *irg = current_ir_graph;
963 dbg_info *dbgi = get_irn_dbg_info(node);
965 ia32_address_mode_t am;
966 ia32_address_t *addr = &am.addr;
968 match_arguments(&am, src_block, op1, op2, NULL, flags);
970 new_node = func(dbgi, irg, block, addr->base, addr->index,
971 addr->mem, am.new_op1, am.new_op2, new_eflags);
972 set_am_attributes(new_node, &am);
973 /* we can't use source address mode anymore when using immediates */
974 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
975 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
976 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
978 new_node = fix_mem_proj(new_node, &am);
983 static ir_node *get_fpcw(void)
986 if(initial_fpcw != NULL)
989 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
990 &ia32_fp_cw_regs[REG_FPCW]);
991 initial_fpcw = be_transform_node(fpcw);
997 * Construct a standard binary operation, set AM and immediate if required.
999 * @param op1 The first operand
1000 * @param op2 The second operand
1001 * @param func The node constructor function
1002 * @return The constructed ia32 node.
1004 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1005 construct_binop_float_func *func,
1006 match_flags_t flags)
1008 ir_graph *irg = current_ir_graph;
1009 dbg_info *dbgi = get_irn_dbg_info(node);
1010 ir_node *block = get_nodes_block(node);
1011 ir_node *new_block = be_transform_node(block);
1012 ir_mode *mode = get_irn_mode(node);
1014 ia32_address_mode_t am;
1015 ia32_address_t *addr = &am.addr;
1017 /* cannot use addresmode with long double on x87 */
1018 if (get_mode_size_bits(mode) > 64)
1021 match_arguments(&am, block, op1, op2, NULL, flags);
1023 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
1024 am.new_op1, am.new_op2, get_fpcw());
1025 set_am_attributes(new_node, &am);
1027 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1029 new_node = fix_mem_proj(new_node, &am);
1035 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1037 * @param op1 The first operand
1038 * @param op2 The second operand
1039 * @param func The node constructor function
1040 * @return The constructed ia32 node.
1042 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1043 construct_shift_func *func,
1044 match_flags_t flags)
1046 dbg_info *dbgi = get_irn_dbg_info(node);
1047 ir_graph *irg = current_ir_graph;
1048 ir_node *block = get_nodes_block(node);
1049 ir_node *new_block = be_transform_node(block);
1054 assert(! mode_is_float(get_irn_mode(node)));
1055 assert(flags & match_immediate);
1056 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1058 if(flags & match_mode_neutral) {
1059 op1 = ia32_skip_downconv(op1);
1060 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1061 panic("right shifting of non-32bit values not supported, yet");
1063 new_op1 = be_transform_node(op1);
1065 /* the shift amount can be any mode that is bigger than 5 bits, since all
1066 * other bits are ignored anyway */
1067 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1068 op2 = get_Conv_op(op2);
1069 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1071 new_op2 = create_immediate_or_transform(op2, 0);
1073 new_node = func(dbgi, irg, new_block, new_op1, new_op2);
1074 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1076 /* lowered shift instruction may have a dependency operand, handle it here */
1077 if (get_irn_arity(node) == 3) {
1078 /* we have a dependency */
1079 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1080 add_irn_dep(new_node, new_dep);
1088 * Construct a standard unary operation, set AM and immediate if required.
1090 * @param op The operand
1091 * @param func The node constructor function
1092 * @return The constructed ia32 node.
1094 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1095 match_flags_t flags)
1097 ir_graph *irg = current_ir_graph;
1098 dbg_info *dbgi = get_irn_dbg_info(node);
1099 ir_node *block = get_nodes_block(node);
1100 ir_node *new_block = be_transform_node(block);
1104 assert(flags == 0 || flags == match_mode_neutral);
1105 if(flags & match_mode_neutral) {
1106 op = ia32_skip_downconv(op);
1109 new_op = be_transform_node(op);
1110 new_node = func(dbgi, irg, new_block, new_op);
1112 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1117 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1118 ia32_address_t *addr)
1120 ir_graph *irg = current_ir_graph;
1121 ir_node *base = addr->base;
1122 ir_node *index = addr->index;
1126 base = ia32_new_NoReg_gp(env_cg);
1128 base = be_transform_node(base);
1132 index = ia32_new_NoReg_gp(env_cg);
1134 index = be_transform_node(index);
1137 res = new_rd_ia32_Lea(dbgi, irg, block, base, index);
1138 set_address(res, addr);
1143 static int am_has_immediates(const ia32_address_t *addr)
1145 return addr->offset != 0 || addr->symconst_ent != NULL
1146 || addr->frame_entity || addr->use_frame;
1150 * Creates an ia32 Add.
1152 * @return the created ia32 Add node
1154 static ir_node *gen_Add(ir_node *node) {
1155 ir_graph *irg = current_ir_graph;
1156 dbg_info *dbgi = get_irn_dbg_info(node);
1157 ir_node *block = get_nodes_block(node);
1158 ir_node *new_block = be_transform_node(block);
1159 ir_node *op1 = get_Add_left(node);
1160 ir_node *op2 = get_Add_right(node);
1161 ir_mode *mode = get_irn_mode(node);
1163 ir_node *add_immediate_op;
1164 ia32_address_t addr;
1165 ia32_address_mode_t am;
1167 if (mode_is_float(mode)) {
1168 if (ia32_cg_config.use_sse2)
1169 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1170 match_commutative | match_am);
1172 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1173 match_commutative | match_am);
1176 ia32_mark_non_am(node);
1178 op2 = ia32_skip_downconv(op2);
1179 op1 = ia32_skip_downconv(op1);
1183 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1184 * 1. Add with immediate -> Lea
1185 * 2. Add with possible source address mode -> Add
1186 * 3. Otherwise -> Lea
1188 memset(&addr, 0, sizeof(addr));
1189 ia32_create_address_mode(&addr, node, /*force=*/1);
1190 add_immediate_op = NULL;
1192 if(addr.base == NULL && addr.index == NULL) {
1193 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1194 addr.symconst_sign, addr.offset);
1195 add_irn_dep(new_node, get_irg_frame(irg));
1196 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1199 /* add with immediate? */
1200 if(addr.index == NULL) {
1201 add_immediate_op = addr.base;
1202 } else if(addr.base == NULL && addr.scale == 0) {
1203 add_immediate_op = addr.index;
1206 if(add_immediate_op != NULL) {
1207 if(!am_has_immediates(&addr)) {
1208 #ifdef DEBUG_libfirm
1209 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1212 return be_transform_node(add_immediate_op);
1215 new_node = create_lea_from_address(dbgi, new_block, &addr);
1216 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1220 /* test if we can use source address mode */
1221 match_arguments(&am, block, op1, op2, NULL, match_commutative
1222 | match_mode_neutral | match_am | match_immediate | match_try_am);
1224 /* construct an Add with source address mode */
1225 if (am.op_type == ia32_AddrModeS) {
1226 ia32_address_t *am_addr = &am.addr;
1227 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1228 am_addr->index, am_addr->mem, am.new_op1,
1230 set_am_attributes(new_node, &am);
1231 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1233 new_node = fix_mem_proj(new_node, &am);
1238 /* otherwise construct a lea */
1239 new_node = create_lea_from_address(dbgi, new_block, &addr);
1240 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1245 * Creates an ia32 Mul.
1247 * @return the created ia32 Mul node
1249 static ir_node *gen_Mul(ir_node *node) {
1250 ir_node *op1 = get_Mul_left(node);
1251 ir_node *op2 = get_Mul_right(node);
1252 ir_mode *mode = get_irn_mode(node);
1254 if (mode_is_float(mode)) {
1255 if (ia32_cg_config.use_sse2)
1256 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1257 match_commutative | match_am);
1259 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1260 match_commutative | match_am);
1262 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1263 match_commutative | match_am | match_mode_neutral |
1264 match_immediate | match_am_and_immediates);
1268 * Creates an ia32 Mulh.
1269 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1270 * this result while Mul returns the lower 32 bit.
1272 * @return the created ia32 Mulh node
1274 static ir_node *gen_Mulh(ir_node *node)
1276 ir_node *block = get_nodes_block(node);
1277 ir_node *new_block = be_transform_node(block);
1278 ir_graph *irg = current_ir_graph;
1279 dbg_info *dbgi = get_irn_dbg_info(node);
1280 ir_mode *mode = get_irn_mode(node);
1281 ir_node *op1 = get_Mulh_left(node);
1282 ir_node *op2 = get_Mulh_right(node);
1283 ir_node *proj_res_high;
1285 ia32_address_mode_t am;
1286 ia32_address_t *addr = &am.addr;
1288 assert(!mode_is_float(mode) && "Mulh with float not supported");
1289 assert(get_mode_size_bits(mode) == 32);
1291 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1293 if (mode_is_signed(mode)) {
1294 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1295 addr->index, addr->mem, am.new_op1,
1298 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1299 addr->index, addr->mem, am.new_op1,
1303 set_am_attributes(new_node, &am);
1304 /* we can't use source address mode anymore when using immediates */
1305 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1306 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1307 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1309 assert(get_irn_mode(new_node) == mode_T);
1311 fix_mem_proj(new_node, &am);
1313 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1314 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1315 mode_Iu, pn_ia32_IMul1OP_res_high);
1317 return proj_res_high;
1323 * Creates an ia32 And.
1325 * @return The created ia32 And node
1327 static ir_node *gen_And(ir_node *node) {
1328 ir_node *op1 = get_And_left(node);
1329 ir_node *op2 = get_And_right(node);
1330 assert(! mode_is_float(get_irn_mode(node)));
1332 /* is it a zero extension? */
1333 if (is_Const(op2)) {
1334 tarval *tv = get_Const_tarval(op2);
1335 long v = get_tarval_long(tv);
1337 if (v == 0xFF || v == 0xFFFF) {
1338 dbg_info *dbgi = get_irn_dbg_info(node);
1339 ir_node *block = get_nodes_block(node);
1346 assert(v == 0xFFFF);
1349 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1355 return gen_binop(node, op1, op2, new_rd_ia32_And,
1356 match_commutative | match_mode_neutral | match_am
1363 * Creates an ia32 Or.
1365 * @return The created ia32 Or node
1367 static ir_node *gen_Or(ir_node *node) {
1368 ir_node *op1 = get_Or_left(node);
1369 ir_node *op2 = get_Or_right(node);
1371 assert (! mode_is_float(get_irn_mode(node)));
1372 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1373 | match_mode_neutral | match_am | match_immediate);
1379 * Creates an ia32 Eor.
1381 * @return The created ia32 Eor node
1383 static ir_node *gen_Eor(ir_node *node) {
1384 ir_node *op1 = get_Eor_left(node);
1385 ir_node *op2 = get_Eor_right(node);
1387 assert(! mode_is_float(get_irn_mode(node)));
1388 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1389 | match_mode_neutral | match_am | match_immediate);
1394 * Creates an ia32 Sub.
1396 * @return The created ia32 Sub node
1398 static ir_node *gen_Sub(ir_node *node) {
1399 ir_node *op1 = get_Sub_left(node);
1400 ir_node *op2 = get_Sub_right(node);
1401 ir_mode *mode = get_irn_mode(node);
1403 if (mode_is_float(mode)) {
1404 if (ia32_cg_config.use_sse2)
1405 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1407 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1412 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1416 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1417 | match_am | match_immediate);
1421 * Generates an ia32 DivMod with additional infrastructure for the
1422 * register allocator if needed.
1424 static ir_node *create_Div(ir_node *node)
1426 ir_graph *irg = current_ir_graph;
1427 dbg_info *dbgi = get_irn_dbg_info(node);
1428 ir_node *block = get_nodes_block(node);
1429 ir_node *new_block = be_transform_node(block);
1436 ir_node *sign_extension;
1437 ia32_address_mode_t am;
1438 ia32_address_t *addr = &am.addr;
1440 /* the upper bits have random contents for smaller modes */
1441 switch (get_irn_opcode(node)) {
1443 op1 = get_Div_left(node);
1444 op2 = get_Div_right(node);
1445 mem = get_Div_mem(node);
1446 mode = get_Div_resmode(node);
1449 op1 = get_Mod_left(node);
1450 op2 = get_Mod_right(node);
1451 mem = get_Mod_mem(node);
1452 mode = get_Mod_resmode(node);
1455 op1 = get_DivMod_left(node);
1456 op2 = get_DivMod_right(node);
1457 mem = get_DivMod_mem(node);
1458 mode = get_DivMod_resmode(node);
1461 panic("invalid divmod node %+F", node);
1464 match_arguments(&am, block, op1, op2, NULL, match_am);
1466 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1467 is the memory of the consumed address. We can have only the second op as address
1468 in Div nodes, so check only op2. */
1469 if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
1470 new_mem = be_transform_node(mem);
1471 if(!is_NoMem(addr->mem)) {
1475 new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
1478 new_mem = addr->mem;
1481 if (mode_is_signed(mode)) {
1482 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1483 add_irn_dep(produceval, get_irg_frame(irg));
1484 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1487 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1488 addr->index, new_mem, am.new_op2,
1489 am.new_op1, sign_extension);
1491 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1492 add_irn_dep(sign_extension, get_irg_frame(irg));
1494 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1495 addr->index, new_mem, am.new_op2,
1496 am.new_op1, sign_extension);
1499 set_irn_pinned(new_node, get_irn_pinned(node));
1501 set_am_attributes(new_node, &am);
1502 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1504 new_node = fix_mem_proj(new_node, &am);
1510 static ir_node *gen_Mod(ir_node *node) {
1511 return create_Div(node);
1514 static ir_node *gen_Div(ir_node *node) {
1515 return create_Div(node);
1518 static ir_node *gen_DivMod(ir_node *node) {
1519 return create_Div(node);
1525 * Creates an ia32 floating Div.
1527 * @return The created ia32 xDiv node
1529 static ir_node *gen_Quot(ir_node *node)
1531 ir_node *op1 = get_Quot_left(node);
1532 ir_node *op2 = get_Quot_right(node);
1534 if (ia32_cg_config.use_sse2) {
1535 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1537 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1543 * Creates an ia32 Shl.
1545 * @return The created ia32 Shl node
1547 static ir_node *gen_Shl(ir_node *node) {
1548 ir_node *left = get_Shl_left(node);
1549 ir_node *right = get_Shl_right(node);
1551 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1552 match_mode_neutral | match_immediate);
1556 * Creates an ia32 Shr.
1558 * @return The created ia32 Shr node
1560 static ir_node *gen_Shr(ir_node *node) {
1561 ir_node *left = get_Shr_left(node);
1562 ir_node *right = get_Shr_right(node);
1564 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1570 * Creates an ia32 Sar.
1572 * @return The created ia32 Shrs node
1574 static ir_node *gen_Shrs(ir_node *node) {
1575 ir_node *left = get_Shrs_left(node);
1576 ir_node *right = get_Shrs_right(node);
1577 ir_mode *mode = get_irn_mode(node);
1579 if(is_Const(right) && mode == mode_Is) {
1580 tarval *tv = get_Const_tarval(right);
1581 long val = get_tarval_long(tv);
1583 /* this is a sign extension */
1584 ir_graph *irg = current_ir_graph;
1585 dbg_info *dbgi = get_irn_dbg_info(node);
1586 ir_node *block = be_transform_node(get_nodes_block(node));
1588 ir_node *new_op = be_transform_node(op);
1589 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1590 add_irn_dep(pval, get_irg_frame(irg));
1592 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1596 /* 8 or 16 bit sign extension? */
1597 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1598 ir_node *shl_left = get_Shl_left(left);
1599 ir_node *shl_right = get_Shl_right(left);
1600 if(is_Const(shl_right)) {
1601 tarval *tv1 = get_Const_tarval(right);
1602 tarval *tv2 = get_Const_tarval(shl_right);
1603 if(tv1 == tv2 && tarval_is_long(tv1)) {
1604 long val = get_tarval_long(tv1);
1605 if(val == 16 || val == 24) {
1606 dbg_info *dbgi = get_irn_dbg_info(node);
1607 ir_node *block = get_nodes_block(node);
1617 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1626 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1632 * Creates an ia32 RotL.
1634 * @param op1 The first operator
1635 * @param op2 The second operator
1636 * @return The created ia32 RotL node
1638 static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
1639 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1645 * Creates an ia32 RotR.
1646 * NOTE: There is no RotR with immediate because this would always be a RotL
1647 * "imm-mode_size_bits" which can be pre-calculated.
1649 * @param op1 The first operator
1650 * @param op2 The second operator
1651 * @return The created ia32 RotR node
1653 static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
1654 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1660 * Creates an ia32 RotR or RotL (depending on the found pattern).
1662 * @return The created ia32 RotL or RotR node
1664 static ir_node *gen_Rot(ir_node *node) {
1665 ir_node *rotate = NULL;
1666 ir_node *op1 = get_Rot_left(node);
1667 ir_node *op2 = get_Rot_right(node);
1669 /* Firm has only Rot (which is a RotL), so we are looking for a right (op2)
1670 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1671 that means we can create a RotR instead of an Add and a RotL */
1673 if (get_irn_op(op2) == op_Add) {
1675 ir_node *left = get_Add_left(add);
1676 ir_node *right = get_Add_right(add);
1677 if (is_Const(right)) {
1678 tarval *tv = get_Const_tarval(right);
1679 ir_mode *mode = get_irn_mode(node);
1680 long bits = get_mode_size_bits(mode);
1682 if (get_irn_op(left) == op_Minus &&
1683 tarval_is_long(tv) &&
1684 get_tarval_long(tv) == bits &&
1687 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1688 rotate = gen_RotR(node, op1, get_Minus_op(left));
1693 if (rotate == NULL) {
1694 rotate = gen_RotL(node, op1, op2);
1703 * Transforms a Minus node.
1705 * @return The created ia32 Minus node
1707 static ir_node *gen_Minus(ir_node *node)
1709 ir_node *op = get_Minus_op(node);
1710 ir_node *block = be_transform_node(get_nodes_block(node));
1711 ir_graph *irg = current_ir_graph;
1712 dbg_info *dbgi = get_irn_dbg_info(node);
1713 ir_mode *mode = get_irn_mode(node);
1718 if (mode_is_float(mode)) {
1719 ir_node *new_op = be_transform_node(op);
1720 if (ia32_cg_config.use_sse2) {
1721 /* TODO: non-optimal... if we have many xXors, then we should
1722 * rather create a load for the const and use that instead of
1723 * several AM nodes... */
1724 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1725 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1726 ir_node *nomem = new_rd_NoMem(irg);
1728 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1729 nomem, new_op, noreg_xmm);
1731 size = get_mode_size_bits(mode);
1732 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1734 set_ia32_am_sc(new_node, ent);
1735 set_ia32_op_type(new_node, ia32_AddrModeS);
1736 set_ia32_ls_mode(new_node, mode);
1738 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1741 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1744 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1750 * Transforms a Not node.
1752 * @return The created ia32 Not node
1754 static ir_node *gen_Not(ir_node *node) {
1755 ir_node *op = get_Not_op(node);
1757 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1758 assert (! mode_is_float(get_irn_mode(node)));
1760 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1766 * Transforms an Abs node.
1768 * @return The created ia32 Abs node
1770 static ir_node *gen_Abs(ir_node *node)
1772 ir_node *block = get_nodes_block(node);
1773 ir_node *new_block = be_transform_node(block);
1774 ir_node *op = get_Abs_op(node);
1775 ir_graph *irg = current_ir_graph;
1776 dbg_info *dbgi = get_irn_dbg_info(node);
1777 ir_mode *mode = get_irn_mode(node);
1778 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1779 ir_node *nomem = new_NoMem();
1785 if (mode_is_float(mode)) {
1786 new_op = be_transform_node(op);
1788 if (ia32_cg_config.use_sse2) {
1789 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1790 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1791 nomem, new_op, noreg_fp);
1793 size = get_mode_size_bits(mode);
1794 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1796 set_ia32_am_sc(new_node, ent);
1798 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1800 set_ia32_op_type(new_node, ia32_AddrModeS);
1801 set_ia32_ls_mode(new_node, mode);
1803 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1804 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1807 ir_node *xor, *pval, *sign_extension;
1809 if (get_mode_size_bits(mode) == 32) {
1810 new_op = be_transform_node(op);
1812 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1815 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1816 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1819 add_irn_dep(pval, get_irg_frame(irg));
1820 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1822 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1823 nomem, new_op, sign_extension);
1824 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1826 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1827 nomem, xor, sign_extension);
1828 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1834 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1836 ir_graph *irg = current_ir_graph;
1844 /* we have a Cmp as input */
1846 ir_node *pred = get_Proj_pred(node);
1848 flags = be_transform_node(pred);
1849 *pnc_out = get_Proj_proj(node);
1854 /* a mode_b value, we have to compare it against 0 */
1855 dbgi = get_irn_dbg_info(node);
1856 new_block = be_transform_node(get_nodes_block(node));
1857 new_op = be_transform_node(node);
1858 noreg = ia32_new_NoReg_gp(env_cg);
1859 nomem = new_NoMem();
1860 flags = new_rd_ia32_Test(dbgi, irg, new_block, noreg, noreg, nomem,
1861 new_op, new_op, 0, 0);
1862 *pnc_out = pn_Cmp_Lg;
1867 * Transforms a Load.
1869 * @return the created ia32 Load node
1871 static ir_node *gen_Load(ir_node *node) {
1872 ir_node *old_block = get_nodes_block(node);
1873 ir_node *block = be_transform_node(old_block);
1874 ir_node *ptr = get_Load_ptr(node);
1875 ir_node *mem = get_Load_mem(node);
1876 ir_node *new_mem = be_transform_node(mem);
1879 ir_graph *irg = current_ir_graph;
1880 dbg_info *dbgi = get_irn_dbg_info(node);
1881 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1882 ir_mode *mode = get_Load_mode(node);
1885 ia32_address_t addr;
1887 /* construct load address */
1888 memset(&addr, 0, sizeof(addr));
1889 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1896 base = be_transform_node(base);
1902 index = be_transform_node(index);
1905 if (mode_is_float(mode)) {
1906 if (ia32_cg_config.use_sse2) {
1907 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1909 res_mode = mode_xmm;
1911 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1913 res_mode = mode_vfp;
1916 assert(mode != mode_b);
1918 /* create a conv node with address mode for smaller modes */
1919 if(get_mode_size_bits(mode) < 32) {
1920 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1921 new_mem, noreg, mode);
1923 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1928 set_irn_pinned(new_node, get_irn_pinned(node));
1929 set_ia32_op_type(new_node, ia32_AddrModeS);
1930 set_ia32_ls_mode(new_node, mode);
1931 set_address(new_node, &addr);
1933 if(get_irn_pinned(node) == op_pin_state_floats) {
1934 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1937 /* make sure we are scheduled behind the initial IncSP/Barrier
1938 * to avoid spills being placed before it
1940 if (block == get_irg_start_block(irg)) {
1941 add_irn_dep(new_node, get_irg_frame(irg));
1944 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1949 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1950 ir_node *ptr, ir_node *other)
1957 /* we only use address mode if we're the only user of the load */
1958 if(get_irn_n_edges(node) > 1)
1961 load = get_Proj_pred(node);
1964 if(get_nodes_block(load) != block)
1967 /* Store should be attached to the load */
1968 if(!is_Proj(mem) || get_Proj_pred(mem) != load)
1970 /* store should have the same pointer as the load */
1971 if(get_Load_ptr(load) != ptr)
1974 /* don't do AM if other node inputs depend on the load (via mem-proj) */
1975 if(other != NULL && get_nodes_block(other) == block
1976 && heights_reachable_in_block(heights, other, load))
1982 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
1983 ir_node *mem, ir_node *ptr, ir_mode *mode,
1984 construct_binop_dest_func *func,
1985 construct_binop_dest_func *func8bit,
1986 match_flags_t flags)
1988 ir_node *src_block = get_nodes_block(node);
1990 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1991 ir_graph *irg = current_ir_graph;
1996 ia32_address_mode_t am;
1997 ia32_address_t *addr = &am.addr;
1998 memset(&am, 0, sizeof(am));
2000 assert(flags & match_dest_am);
2001 assert(flags & match_immediate); /* there is no destam node without... */
2002 commutative = (flags & match_commutative) != 0;
2004 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
2005 build_address(&am, op1);
2006 new_op = create_immediate_or_transform(op2, 0);
2007 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2008 build_address(&am, op2);
2009 new_op = create_immediate_or_transform(op1, 0);
2014 if(addr->base == NULL)
2015 addr->base = noreg_gp;
2016 if(addr->index == NULL)
2017 addr->index = noreg_gp;
2018 if(addr->mem == NULL)
2019 addr->mem = new_NoMem();
2021 dbgi = get_irn_dbg_info(node);
2022 block = be_transform_node(src_block);
2023 if(get_mode_size_bits(mode) == 8) {
2024 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2027 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
2030 set_address(new_node, addr);
2031 set_ia32_op_type(new_node, ia32_AddrModeD);
2032 set_ia32_ls_mode(new_node, mode);
2033 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2038 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2039 ir_node *ptr, ir_mode *mode,
2040 construct_unop_dest_func *func)
2042 ir_graph *irg = current_ir_graph;
2043 ir_node *src_block = get_nodes_block(node);
2047 ia32_address_mode_t am;
2048 ia32_address_t *addr = &am.addr;
2049 memset(&am, 0, sizeof(am));
2051 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2054 build_address(&am, op);
2056 dbgi = get_irn_dbg_info(node);
2057 block = be_transform_node(src_block);
2058 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
2059 set_address(new_node, addr);
2060 set_ia32_op_type(new_node, ia32_AddrModeD);
2061 set_ia32_ls_mode(new_node, mode);
2062 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2067 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2068 ir_mode *mode = get_irn_mode(node);
2069 ir_node *psi_true = get_Psi_val(node, 0);
2070 ir_node *psi_default = get_Psi_default(node);
2081 ia32_address_t addr;
2083 if(get_mode_size_bits(mode) != 8)
2086 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2088 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2094 build_address_ptr(&addr, ptr, mem);
2096 irg = current_ir_graph;
2097 dbgi = get_irn_dbg_info(node);
2098 block = get_nodes_block(node);
2099 new_block = be_transform_node(block);
2100 cond = get_Psi_cond(node, 0);
2101 flags = get_flags_node(cond, &pnc);
2102 new_mem = be_transform_node(mem);
2103 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2104 addr.index, addr.mem, flags, pnc, negated);
2105 set_address(new_node, &addr);
2106 set_ia32_op_type(new_node, ia32_AddrModeD);
2107 set_ia32_ls_mode(new_node, mode);
2108 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2113 static ir_node *try_create_dest_am(ir_node *node) {
2114 ir_node *val = get_Store_value(node);
2115 ir_node *mem = get_Store_mem(node);
2116 ir_node *ptr = get_Store_ptr(node);
2117 ir_mode *mode = get_irn_mode(val);
2118 unsigned bits = get_mode_size_bits(mode);
2123 /* handle only GP modes for now... */
2124 if(!mode_needs_gp_reg(mode))
2128 /* store must be the only user of the val node */
2129 if(get_irn_n_edges(val) > 1)
2131 /* skip pointless convs */
2133 ir_node *conv_op = get_Conv_op(val);
2134 ir_mode *pred_mode = get_irn_mode(conv_op);
2135 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2143 /* value must be in the same block */
2144 if(get_nodes_block(node) != get_nodes_block(val))
2147 switch(get_irn_opcode(val)) {
2149 op1 = get_Add_left(val);
2150 op2 = get_Add_right(val);
2151 if(is_Const_1(op2)) {
2152 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2153 new_rd_ia32_IncMem);
2155 } else if(is_Const_Minus_1(op2)) {
2156 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2157 new_rd_ia32_DecMem);
2160 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2161 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2162 match_dest_am | match_commutative |
2166 op1 = get_Sub_left(val);
2167 op2 = get_Sub_right(val);
2169 ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
2172 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2173 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2174 match_dest_am | match_immediate |
2178 op1 = get_And_left(val);
2179 op2 = get_And_right(val);
2180 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2181 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2182 match_dest_am | match_commutative |
2186 op1 = get_Or_left(val);
2187 op2 = get_Or_right(val);
2188 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2189 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2190 match_dest_am | match_commutative |
2194 op1 = get_Eor_left(val);
2195 op2 = get_Eor_right(val);
2196 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2197 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2198 match_dest_am | match_commutative |
2202 op1 = get_Shl_left(val);
2203 op2 = get_Shl_right(val);
2204 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2205 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2206 match_dest_am | match_immediate);
2209 op1 = get_Shr_left(val);
2210 op2 = get_Shr_right(val);
2211 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2212 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2213 match_dest_am | match_immediate);
2216 op1 = get_Shrs_left(val);
2217 op2 = get_Shrs_right(val);
2218 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2219 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2220 match_dest_am | match_immediate);
2223 op1 = get_Rot_left(val);
2224 op2 = get_Rot_right(val);
2225 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2226 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2227 match_dest_am | match_immediate);
2229 /* TODO: match ROR patterns... */
2231 new_node = try_create_SetMem(val, ptr, mem);
2234 op1 = get_Minus_op(val);
2235 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2238 /* should be lowered already */
2239 assert(mode != mode_b);
2240 op1 = get_Not_op(val);
2241 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2247 if(new_node != NULL) {
2248 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2249 get_irn_pinned(node) == op_pin_state_pinned) {
2250 set_irn_pinned(new_node, op_pin_state_pinned);
2257 static int is_float_to_int32_conv(const ir_node *node)
2259 ir_mode *mode = get_irn_mode(node);
2263 if(get_mode_size_bits(mode) != 32 || !mode_needs_gp_reg(mode))
2268 conv_op = get_Conv_op(node);
2269 conv_mode = get_irn_mode(conv_op);
2271 if(!mode_is_float(conv_mode))
2278 * Transforms a Store.
2280 * @return the created ia32 Store node
2282 static ir_node *gen_Store(ir_node *node)
2284 ir_node *block = get_nodes_block(node);
2285 ir_node *new_block = be_transform_node(block);
2286 ir_node *ptr = get_Store_ptr(node);
2287 ir_node *val = get_Store_value(node);
2288 ir_node *mem = get_Store_mem(node);
2289 ir_graph *irg = current_ir_graph;
2290 dbg_info *dbgi = get_irn_dbg_info(node);
2291 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2292 ir_mode *mode = get_irn_mode(val);
2295 ia32_address_t addr;
2297 /* check for destination address mode */
2298 new_node = try_create_dest_am(node);
2299 if(new_node != NULL)
2302 /* construct store address */
2303 memset(&addr, 0, sizeof(addr));
2304 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2306 if(addr.base == NULL) {
2309 addr.base = be_transform_node(addr.base);
2312 if(addr.index == NULL) {
2315 addr.index = be_transform_node(addr.index);
2317 addr.mem = be_transform_node(mem);
2319 if (mode_is_float(mode)) {
2320 /* convs (and strict-convs) before stores are unnecessary if the mode
2322 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2323 val = get_Conv_op(val);
2325 new_val = be_transform_node(val);
2326 if (ia32_cg_config.use_sse2) {
2327 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2328 addr.index, addr.mem, new_val);
2330 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2331 addr.index, addr.mem, new_val, mode);
2333 } else if(is_float_to_int32_conv(val)) {
2334 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2335 val = get_Conv_op(val);
2337 /* convs (and strict-convs) before stores are unnecessary if the mode
2339 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2340 val = get_Conv_op(val);
2342 new_val = be_transform_node(val);
2344 new_node = new_rd_ia32_vfist(dbgi, irg, new_block, addr.base,
2345 addr.index, addr.mem, new_val, trunc_mode);
2347 new_val = create_immediate_or_transform(val, 0);
2348 assert(mode != mode_b);
2350 if (get_mode_size_bits(mode) == 8) {
2351 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2352 addr.index, addr.mem, new_val);
2354 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2355 addr.index, addr.mem, new_val);
2359 set_irn_pinned(new_node, get_irn_pinned(node));
2360 set_ia32_op_type(new_node, ia32_AddrModeD);
2361 set_ia32_ls_mode(new_node, mode);
2363 set_address(new_node, &addr);
2364 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2369 static ir_node *create_Switch(ir_node *node)
2371 ir_graph *irg = current_ir_graph;
2372 dbg_info *dbgi = get_irn_dbg_info(node);
2373 ir_node *block = be_transform_node(get_nodes_block(node));
2374 ir_node *sel = get_Cond_selector(node);
2375 ir_node *new_sel = be_transform_node(sel);
2376 int switch_min = INT_MAX;
2377 int switch_max = INT_MIN;
2378 long default_pn = get_Cond_defaultProj(node);
2380 const ir_edge_t *edge;
2382 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2384 /* determine the smallest switch case value */
2385 foreach_out_edge(node, edge) {
2386 ir_node *proj = get_edge_src_irn(edge);
2387 long pn = get_Proj_proj(proj);
2388 if(pn == default_pn)
2397 if((unsigned) (switch_max - switch_min) > 256000) {
2398 panic("Size of switch %+F bigger than 256000", node);
2401 if (switch_min != 0) {
2402 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2404 /* if smallest switch case is not 0 we need an additional sub */
2405 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2406 add_ia32_am_offs_int(new_sel, -switch_min);
2407 set_ia32_op_type(new_sel, ia32_AddrModeS);
2409 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2412 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2413 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2419 * Transform a Cond node.
2421 static ir_node *gen_Cond(ir_node *node) {
2422 ir_node *block = get_nodes_block(node);
2423 ir_node *new_block = be_transform_node(block);
2424 ir_graph *irg = current_ir_graph;
2425 dbg_info *dbgi = get_irn_dbg_info(node);
2426 ir_node *sel = get_Cond_selector(node);
2427 ir_mode *sel_mode = get_irn_mode(sel);
2428 ir_node *flags = NULL;
2432 if (sel_mode != mode_b) {
2433 return create_Switch(node);
2436 /* we get flags from a cmp */
2437 flags = get_flags_node(sel, &pnc);
2439 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2440 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2448 * Transforms a CopyB node.
2450 * @return The transformed node.
2452 static ir_node *gen_CopyB(ir_node *node) {
2453 ir_node *block = be_transform_node(get_nodes_block(node));
2454 ir_node *src = get_CopyB_src(node);
2455 ir_node *new_src = be_transform_node(src);
2456 ir_node *dst = get_CopyB_dst(node);
2457 ir_node *new_dst = be_transform_node(dst);
2458 ir_node *mem = get_CopyB_mem(node);
2459 ir_node *new_mem = be_transform_node(mem);
2460 ir_node *res = NULL;
2461 ir_graph *irg = current_ir_graph;
2462 dbg_info *dbgi = get_irn_dbg_info(node);
2463 int size = get_type_size_bytes(get_CopyB_type(node));
2466 /* If we have to copy more than 32 bytes, we use REP MOVSx and */
2467 /* then we need the size explicitly in ECX. */
2468 if (size >= 32 * 4) {
2469 rem = size & 0x3; /* size % 4 */
2472 res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
2473 add_irn_dep(res, get_irg_frame(irg));
2475 res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
2478 ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
2481 res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
2484 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
2489 static ir_node *gen_be_Copy(ir_node *node)
2491 ir_node *new_node = be_duplicate_node(node);
2492 ir_mode *mode = get_irn_mode(new_node);
2494 if (mode_needs_gp_reg(mode)) {
2495 set_irn_mode(new_node, mode_Iu);
2501 static ir_node *create_Fucom(ir_node *node)
2503 ir_graph *irg = current_ir_graph;
2504 dbg_info *dbgi = get_irn_dbg_info(node);
2505 ir_node *block = get_nodes_block(node);
2506 ir_node *new_block = be_transform_node(block);
2507 ir_node *left = get_Cmp_left(node);
2508 ir_node *new_left = be_transform_node(left);
2509 ir_node *right = get_Cmp_right(node);
2513 if(ia32_cg_config.use_fucomi) {
2514 new_right = be_transform_node(right);
2515 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2517 set_ia32_commutative(new_node);
2518 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2520 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2521 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2524 new_right = be_transform_node(right);
2525 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2529 set_ia32_commutative(new_node);
2531 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2533 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2534 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2540 static ir_node *create_Ucomi(ir_node *node)
2542 ir_graph *irg = current_ir_graph;
2543 dbg_info *dbgi = get_irn_dbg_info(node);
2544 ir_node *src_block = get_nodes_block(node);
2545 ir_node *new_block = be_transform_node(src_block);
2546 ir_node *left = get_Cmp_left(node);
2547 ir_node *right = get_Cmp_right(node);
2549 ia32_address_mode_t am;
2550 ia32_address_t *addr = &am.addr;
2552 match_arguments(&am, src_block, left, right, NULL,
2553 match_commutative | match_am);
2555 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2556 addr->mem, am.new_op1, am.new_op2,
2558 set_am_attributes(new_node, &am);
2560 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2562 new_node = fix_mem_proj(new_node, &am);
2568 * helper function: checks wether all Cmp projs are Lg or Eq which is needed
2569 * to fold an and into a test node
2571 static int can_fold_test_and(ir_node *node)
2573 const ir_edge_t *edge;
2575 /** we can only have eq and lg projs */
2576 foreach_out_edge(node, edge) {
2577 ir_node *proj = get_edge_src_irn(edge);
2578 pn_Cmp pnc = get_Proj_proj(proj);
2579 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2586 static ir_node *gen_Cmp(ir_node *node)
2588 ir_graph *irg = current_ir_graph;
2589 dbg_info *dbgi = get_irn_dbg_info(node);
2590 ir_node *block = get_nodes_block(node);
2591 ir_node *new_block = be_transform_node(block);
2592 ir_node *left = get_Cmp_left(node);
2593 ir_node *right = get_Cmp_right(node);
2594 ir_mode *cmp_mode = get_irn_mode(left);
2596 ia32_address_mode_t am;
2597 ia32_address_t *addr = &am.addr;
2600 if(mode_is_float(cmp_mode)) {
2601 if (ia32_cg_config.use_sse2) {
2602 return create_Ucomi(node);
2604 return create_Fucom(node);
2608 assert(mode_needs_gp_reg(cmp_mode));
2610 /* we prefer the Test instruction where possible except cases where
2611 * we can use SourceAM */
2612 cmp_unsigned = !mode_is_signed(cmp_mode);
2613 if (is_Const_0(right)) {
2615 get_irn_n_edges(left) == 1 &&
2616 can_fold_test_and(node)) {
2617 /* Test(and_left, and_right) */
2618 ir_node *and_left = get_And_left(left);
2619 ir_node *and_right = get_And_right(left);
2620 ir_mode *mode = get_irn_mode(and_left);
2622 match_arguments(&am, block, and_left, and_right, NULL,
2624 match_am | match_8bit_am | match_16bit_am |
2625 match_am_and_immediates | match_immediate |
2626 match_8bit | match_16bit);
2627 if (get_mode_size_bits(mode) == 8) {
2628 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2629 addr->index, addr->mem, am.new_op1,
2630 am.new_op2, am.ins_permuted,
2633 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2634 addr->index, addr->mem, am.new_op1,
2635 am.new_op2, am.ins_permuted, cmp_unsigned);
2638 match_arguments(&am, block, NULL, left, NULL,
2639 match_am | match_8bit_am | match_16bit_am |
2640 match_8bit | match_16bit);
2641 if (am.op_type == ia32_AddrModeS) {
2643 ir_node *imm_zero = try_create_Immediate(right, 0);
2644 if (get_mode_size_bits(cmp_mode) == 8) {
2645 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2646 addr->index, addr->mem, am.new_op2,
2647 imm_zero, am.ins_permuted,
2650 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2651 addr->index, addr->mem, am.new_op2,
2652 imm_zero, am.ins_permuted, cmp_unsigned);
2655 /* Test(left, left) */
2656 if (get_mode_size_bits(cmp_mode) == 8) {
2657 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2658 addr->index, addr->mem, am.new_op2,
2659 am.new_op2, am.ins_permuted,
2662 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2663 addr->index, addr->mem, am.new_op2,
2664 am.new_op2, am.ins_permuted,
2670 /* Cmp(left, right) */
2671 match_arguments(&am, block, left, right, NULL,
2672 match_commutative | match_am | match_8bit_am |
2673 match_16bit_am | match_am_and_immediates |
2674 match_immediate | match_8bit | match_16bit);
2675 if (get_mode_size_bits(cmp_mode) == 8) {
2676 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2677 addr->index, addr->mem, am.new_op1,
2678 am.new_op2, am.ins_permuted,
2681 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2682 addr->index, addr->mem, am.new_op1,
2683 am.new_op2, am.ins_permuted, cmp_unsigned);
2686 set_am_attributes(new_node, &am);
2687 assert(cmp_mode != NULL);
2688 set_ia32_ls_mode(new_node, cmp_mode);
2690 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2692 new_node = fix_mem_proj(new_node, &am);
2697 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2700 ir_graph *irg = current_ir_graph;
2701 dbg_info *dbgi = get_irn_dbg_info(node);
2702 ir_node *block = get_nodes_block(node);
2703 ir_node *new_block = be_transform_node(block);
2704 ir_node *val_true = get_Psi_val(node, 0);
2705 ir_node *val_false = get_Psi_default(node);
2707 match_flags_t match_flags;
2708 ia32_address_mode_t am;
2709 ia32_address_t *addr;
2711 assert(ia32_cg_config.use_cmov);
2712 assert(mode_needs_gp_reg(get_irn_mode(val_true)));
2716 match_flags = match_commutative | match_am | match_16bit_am |
2719 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2721 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2722 addr->mem, am.new_op1, am.new_op2, new_flags,
2723 am.ins_permuted, pnc);
2724 set_am_attributes(new_node, &am);
2726 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2728 new_node = fix_mem_proj(new_node, &am);
2735 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2736 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2739 ir_graph *irg = current_ir_graph;
2740 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2741 ir_node *nomem = new_NoMem();
2742 ir_mode *mode = get_irn_mode(orig_node);
2745 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2746 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2748 /* we might need to conv the result up */
2749 if(get_mode_size_bits(mode) > 8) {
2750 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2751 nomem, new_node, mode_Bu);
2752 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2759 * Transforms a Psi node into CMov.
2761 * @return The transformed node.
2763 static ir_node *gen_Psi(ir_node *node)
2765 dbg_info *dbgi = get_irn_dbg_info(node);
2766 ir_node *block = get_nodes_block(node);
2767 ir_node *new_block = be_transform_node(block);
2768 ir_node *psi_true = get_Psi_val(node, 0);
2769 ir_node *psi_default = get_Psi_default(node);
2770 ir_node *cond = get_Psi_cond(node, 0);
2771 ir_node *flags = NULL;
2775 assert(get_Psi_n_conds(node) == 1);
2776 assert(get_irn_mode(cond) == mode_b);
2777 assert(mode_needs_gp_reg(get_irn_mode(node)));
2779 flags = get_flags_node(cond, &pnc);
2781 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2782 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 0);
2783 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2784 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 1);
2786 new_node = create_CMov(node, cond, flags, pnc);
2793 * Create a conversion from x87 state register to general purpose.
2795 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
2796 ir_node *block = be_transform_node(get_nodes_block(node));
2797 ir_node *op = get_Conv_op(node);
2798 ir_node *new_op = be_transform_node(op);
2799 ia32_code_gen_t *cg = env_cg;
2800 ir_graph *irg = current_ir_graph;
2801 dbg_info *dbgi = get_irn_dbg_info(node);
2802 ir_node *noreg = ia32_new_NoReg_gp(cg);
2803 ir_node *trunc_mode = ia32_new_Fpu_truncate(cg);
2804 ir_mode *mode = get_irn_mode(node);
2805 ir_node *fist, *load;
2808 fist = new_rd_ia32_vfist(dbgi, irg, block, get_irg_frame(irg), noreg,
2809 new_NoMem(), new_op, trunc_mode);
2811 set_irn_pinned(fist, op_pin_state_floats);
2812 set_ia32_use_frame(fist);
2813 set_ia32_op_type(fist, ia32_AddrModeD);
2815 assert(get_mode_size_bits(mode) <= 32);
2816 /* exception we can only store signed 32 bit integers, so for unsigned
2817 we store a 64bit (signed) integer and load the lower bits */
2818 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
2819 set_ia32_ls_mode(fist, mode_Ls);
2821 set_ia32_ls_mode(fist, mode_Is);
2823 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
2826 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, fist);
2828 set_irn_pinned(load, op_pin_state_floats);
2829 set_ia32_use_frame(load);
2830 set_ia32_op_type(load, ia32_AddrModeS);
2831 set_ia32_ls_mode(load, mode_Is);
2832 if(get_ia32_ls_mode(fist) == mode_Ls) {
2833 ia32_attr_t *attr = get_ia32_attr(load);
2834 attr->data.need_64bit_stackent = 1;
2836 ia32_attr_t *attr = get_ia32_attr(load);
2837 attr->data.need_32bit_stackent = 1;
2839 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
2841 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
2845 * Creates a x87 strict Conv by placing a Sore and a Load
2847 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
2849 ir_node *block = get_nodes_block(node);
2850 ir_graph *irg = current_ir_graph;
2851 dbg_info *dbgi = get_irn_dbg_info(node);
2852 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2853 ir_node *nomem = new_NoMem();
2854 ir_node *frame = get_irg_frame(irg);
2855 ir_node *store, *load;
2858 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
2860 set_ia32_use_frame(store);
2861 set_ia32_op_type(store, ia32_AddrModeD);
2862 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2864 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
2866 set_ia32_use_frame(load);
2867 set_ia32_op_type(load, ia32_AddrModeS);
2868 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
2870 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
2875 * Create a conversion from general purpose to x87 register
2877 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
2878 ir_node *src_block = get_nodes_block(node);
2879 ir_node *block = be_transform_node(src_block);
2880 ir_graph *irg = current_ir_graph;
2881 dbg_info *dbgi = get_irn_dbg_info(node);
2882 ir_node *op = get_Conv_op(node);
2883 ir_node *new_op = NULL;
2887 ir_mode *store_mode;
2893 /* fild can use source AM if the operand is a signed 32bit integer */
2894 if (src_mode == mode_Is) {
2895 ia32_address_mode_t am;
2897 match_arguments(&am, src_block, NULL, op, NULL,
2898 match_am | match_try_am);
2899 if (am.op_type == ia32_AddrModeS) {
2900 ia32_address_t *addr = &am.addr;
2902 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
2903 addr->index, addr->mem);
2904 new_node = new_r_Proj(irg, block, fild, mode_vfp,
2907 set_am_attributes(fild, &am);
2908 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
2910 fix_mem_proj(fild, &am);
2915 if(new_op == NULL) {
2916 new_op = be_transform_node(op);
2919 noreg = ia32_new_NoReg_gp(env_cg);
2920 nomem = new_NoMem();
2921 mode = get_irn_mode(op);
2923 /* first convert to 32 bit signed if necessary */
2924 src_bits = get_mode_size_bits(src_mode);
2925 if (src_bits == 8) {
2926 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
2928 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2930 } else if (src_bits < 32) {
2931 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
2933 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2937 assert(get_mode_size_bits(mode) == 32);
2940 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
2943 set_ia32_use_frame(store);
2944 set_ia32_op_type(store, ia32_AddrModeD);
2945 set_ia32_ls_mode(store, mode_Iu);
2947 /* exception for 32bit unsigned, do a 64bit spill+load */
2948 if(!mode_is_signed(mode)) {
2951 ir_node *zero_const = create_Immediate(NULL, 0, 0);
2953 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
2954 get_irg_frame(irg), noreg, nomem,
2957 set_ia32_use_frame(zero_store);
2958 set_ia32_op_type(zero_store, ia32_AddrModeD);
2959 add_ia32_am_offs_int(zero_store, 4);
2960 set_ia32_ls_mode(zero_store, mode_Iu);
2965 store = new_rd_Sync(dbgi, irg, block, 2, in);
2966 store_mode = mode_Ls;
2968 store_mode = mode_Is;
2972 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
2974 set_ia32_use_frame(fild);
2975 set_ia32_op_type(fild, ia32_AddrModeS);
2976 set_ia32_ls_mode(fild, store_mode);
2978 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
2984 * Create a conversion from one integer mode into another one
2986 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
2987 dbg_info *dbgi, ir_node *block, ir_node *op,
2990 ir_graph *irg = current_ir_graph;
2991 int src_bits = get_mode_size_bits(src_mode);
2992 int tgt_bits = get_mode_size_bits(tgt_mode);
2993 ir_node *new_block = be_transform_node(block);
2995 ir_mode *smaller_mode;
2997 ia32_address_mode_t am;
2998 ia32_address_t *addr = &am.addr;
3001 if (src_bits < tgt_bits) {
3002 smaller_mode = src_mode;
3003 smaller_bits = src_bits;
3005 smaller_mode = tgt_mode;
3006 smaller_bits = tgt_bits;
3009 #ifdef DEBUG_libfirm
3011 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3016 match_arguments(&am, block, NULL, op, NULL,
3017 match_8bit | match_16bit |
3018 match_am | match_8bit_am | match_16bit_am);
3019 if (smaller_bits == 8) {
3020 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3021 addr->index, addr->mem, am.new_op2,
3024 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3025 addr->index, addr->mem, am.new_op2,
3028 set_am_attributes(new_node, &am);
3029 /* match_arguments assume that out-mode = in-mode, this isn't true here
3031 set_ia32_ls_mode(new_node, smaller_mode);
3032 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3033 new_node = fix_mem_proj(new_node, &am);
3038 * Transforms a Conv node.
3040 * @return The created ia32 Conv node
3042 static ir_node *gen_Conv(ir_node *node) {
3043 ir_node *block = get_nodes_block(node);
3044 ir_node *new_block = be_transform_node(block);
3045 ir_node *op = get_Conv_op(node);
3046 ir_node *new_op = NULL;
3047 ir_graph *irg = current_ir_graph;
3048 dbg_info *dbgi = get_irn_dbg_info(node);
3049 ir_mode *src_mode = get_irn_mode(op);
3050 ir_mode *tgt_mode = get_irn_mode(node);
3051 int src_bits = get_mode_size_bits(src_mode);
3052 int tgt_bits = get_mode_size_bits(tgt_mode);
3053 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3054 ir_node *nomem = new_rd_NoMem(irg);
3055 ir_node *res = NULL;
3057 if (src_mode == mode_b) {
3058 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3059 /* nothing to do, we already model bools as 0/1 ints */
3060 return be_transform_node(op);
3063 if (src_mode == tgt_mode) {
3064 if (get_Conv_strict(node)) {
3065 if (ia32_cg_config.use_sse2) {
3066 /* when we are in SSE mode, we can kill all strict no-op conversion */
3067 return be_transform_node(op);
3070 /* this should be optimized already, but who knows... */
3071 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3072 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3073 return be_transform_node(op);
3077 if (mode_is_float(src_mode)) {
3078 new_op = be_transform_node(op);
3079 /* we convert from float ... */
3080 if (mode_is_float(tgt_mode)) {
3081 if(src_mode == mode_E && tgt_mode == mode_D
3082 && !get_Conv_strict(node)) {
3083 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3088 if (ia32_cg_config.use_sse2) {
3089 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3090 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3092 set_ia32_ls_mode(res, tgt_mode);
3094 if(get_Conv_strict(node)) {
3095 res = gen_x87_strict_conv(tgt_mode, new_op);
3096 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3099 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3104 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3105 if (ia32_cg_config.use_sse2) {
3106 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3108 set_ia32_ls_mode(res, src_mode);
3110 return gen_x87_fp_to_gp(node);
3114 /* we convert from int ... */
3115 if (mode_is_float(tgt_mode)) {
3117 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3118 if (ia32_cg_config.use_sse2) {
3119 new_op = be_transform_node(op);
3120 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3122 set_ia32_ls_mode(res, tgt_mode);
3124 res = gen_x87_gp_to_fp(node, src_mode);
3125 if(get_Conv_strict(node)) {
3126 res = gen_x87_strict_conv(tgt_mode, res);
3127 SET_IA32_ORIG_NODE(get_Proj_pred(res),
3128 ia32_get_old_node_name(env_cg, node));
3132 } else if(tgt_mode == mode_b) {
3133 /* mode_b lowering already took care that we only have 0/1 values */
3134 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3135 src_mode, tgt_mode));
3136 return be_transform_node(op);
3139 if (src_bits == tgt_bits) {
3140 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3141 src_mode, tgt_mode));
3142 return be_transform_node(op);
3145 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3153 static int check_immediate_constraint(long val, char immediate_constraint_type)
3155 switch (immediate_constraint_type) {
3159 return val >= 0 && val <= 32;
3161 return val >= 0 && val <= 63;
3163 return val >= -128 && val <= 127;
3165 return val == 0xff || val == 0xffff;
3167 return val >= 0 && val <= 3;
3169 return val >= 0 && val <= 255;
3171 return val >= 0 && val <= 127;
3175 panic("Invalid immediate constraint found");
3179 static ir_node *try_create_Immediate(ir_node *node,
3180 char immediate_constraint_type)
3183 tarval *offset = NULL;
3184 int offset_sign = 0;
3186 ir_entity *symconst_ent = NULL;
3187 int symconst_sign = 0;
3189 ir_node *cnst = NULL;
3190 ir_node *symconst = NULL;
3193 mode = get_irn_mode(node);
3194 if(!mode_is_int(mode) && !mode_is_reference(mode)) {
3198 if(is_Minus(node)) {
3200 node = get_Minus_op(node);
3203 if(is_Const(node)) {
3206 offset_sign = minus;
3207 } else if(is_SymConst(node)) {
3210 symconst_sign = minus;
3211 } else if(is_Add(node)) {
3212 ir_node *left = get_Add_left(node);
3213 ir_node *right = get_Add_right(node);
3214 if(is_Const(left) && is_SymConst(right)) {
3217 symconst_sign = minus;
3218 offset_sign = minus;
3219 } else if(is_SymConst(left) && is_Const(right)) {
3222 symconst_sign = minus;
3223 offset_sign = minus;
3225 } else if(is_Sub(node)) {
3226 ir_node *left = get_Sub_left(node);
3227 ir_node *right = get_Sub_right(node);
3228 if(is_Const(left) && is_SymConst(right)) {
3231 symconst_sign = !minus;
3232 offset_sign = minus;
3233 } else if(is_SymConst(left) && is_Const(right)) {
3236 symconst_sign = minus;
3237 offset_sign = !minus;
3244 offset = get_Const_tarval(cnst);
3245 if(tarval_is_long(offset)) {
3246 val = get_tarval_long(offset);
3248 ir_fprintf(stderr, "Optimisation Warning: tarval from %+F is not a "
3253 if(!check_immediate_constraint(val, immediate_constraint_type))
3256 if(symconst != NULL) {
3257 if(immediate_constraint_type != 0) {
3258 /* we need full 32bits for symconsts */
3262 /* unfortunately the assembler/linker doesn't support -symconst */
3266 if(get_SymConst_kind(symconst) != symconst_addr_ent)
3268 symconst_ent = get_SymConst_entity(symconst);
3270 if(cnst == NULL && symconst == NULL)
3273 if(offset_sign && offset != NULL) {
3274 offset = tarval_neg(offset);
3277 new_node = create_Immediate(symconst_ent, symconst_sign, val);
3282 static ir_node *create_immediate_or_transform(ir_node *node,
3283 char immediate_constraint_type)
3285 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3286 if (new_node == NULL) {
3287 new_node = be_transform_node(node);
3292 static const arch_register_req_t no_register_req = {
3293 arch_register_req_type_none,
3294 NULL, /* regclass */
3295 NULL, /* limit bitset */
3297 0 /* different pos */
3301 * An assembler constraint.
3303 typedef struct constraint_t constraint_t;
3304 struct constraint_t {
3307 const arch_register_req_t **out_reqs;
3309 const arch_register_req_t *req;
3310 unsigned immediate_possible;
3311 char immediate_type;
3314 static void parse_asm_constraint(int pos, constraint_t *constraint, const char *c)
3316 int immediate_possible = 0;
3317 char immediate_type = 0;
3318 unsigned limited = 0;
3319 const arch_register_class_t *cls = NULL;
3320 ir_graph *irg = current_ir_graph;
3321 struct obstack *obst = get_irg_obstack(irg);
3322 arch_register_req_t *req;
3323 unsigned *limited_ptr = NULL;
3327 /* TODO: replace all the asserts with nice error messages */
3330 /* a memory constraint: no need to do anything in backend about it
3331 * (the dependencies are already respected by the memory edge of
3333 constraint->req = &no_register_req;
3345 assert(cls == NULL ||
3346 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3347 cls = &ia32_reg_classes[CLASS_ia32_gp];
3348 limited |= 1 << REG_EAX;
3351 assert(cls == NULL ||
3352 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3353 cls = &ia32_reg_classes[CLASS_ia32_gp];
3354 limited |= 1 << REG_EBX;
3357 assert(cls == NULL ||
3358 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3359 cls = &ia32_reg_classes[CLASS_ia32_gp];
3360 limited |= 1 << REG_ECX;
3363 assert(cls == NULL ||
3364 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3365 cls = &ia32_reg_classes[CLASS_ia32_gp];
3366 limited |= 1 << REG_EDX;
3369 assert(cls == NULL ||
3370 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3371 cls = &ia32_reg_classes[CLASS_ia32_gp];
3372 limited |= 1 << REG_EDI;
3375 assert(cls == NULL ||
3376 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3377 cls = &ia32_reg_classes[CLASS_ia32_gp];
3378 limited |= 1 << REG_ESI;
3381 case 'q': /* q means lower part of the regs only, this makes no
3382 * difference to Q for us (we only assigne whole registers) */
3383 assert(cls == NULL ||
3384 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3385 cls = &ia32_reg_classes[CLASS_ia32_gp];
3386 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3390 assert(cls == NULL ||
3391 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3392 cls = &ia32_reg_classes[CLASS_ia32_gp];
3393 limited |= 1 << REG_EAX | 1 << REG_EDX;
3396 assert(cls == NULL ||
3397 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3398 cls = &ia32_reg_classes[CLASS_ia32_gp];
3399 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3400 1 << REG_EDX | 1 << REG_ESI | 1 << REG_EDI |
3407 assert(cls == NULL);
3408 cls = &ia32_reg_classes[CLASS_ia32_gp];
3414 /* TODO: mark values so the x87 simulator knows about t and u */
3415 assert(cls == NULL);
3416 cls = &ia32_reg_classes[CLASS_ia32_vfp];
3421 assert(cls == NULL);
3422 /* TODO: check that sse2 is supported */
3423 cls = &ia32_reg_classes[CLASS_ia32_xmm];
3433 assert(!immediate_possible);
3434 immediate_possible = 1;
3435 immediate_type = *c;
3439 assert(!immediate_possible);
3440 immediate_possible = 1;
3444 assert(!immediate_possible && cls == NULL);
3445 immediate_possible = 1;
3446 cls = &ia32_reg_classes[CLASS_ia32_gp];
3459 assert(constraint->is_in && "can only specify same constraint "
3462 sscanf(c, "%d%n", &same_as, &p);
3470 /* memory constraint no need to do anything in backend about it
3471 * (the dependencies are already respected by the memory edge of
3473 constraint->req = &no_register_req;
3476 case 'E': /* no float consts yet */
3477 case 'F': /* no float consts yet */
3478 case 's': /* makes no sense on x86 */
3479 case 'X': /* we can't support that in firm */
3482 case '<': /* no autodecrement on x86 */
3483 case '>': /* no autoincrement on x86 */
3484 case 'C': /* sse constant not supported yet */
3485 case 'G': /* 80387 constant not supported yet */
3486 case 'y': /* we don't support mmx registers yet */
3487 case 'Z': /* not available in 32 bit mode */
3488 case 'e': /* not available in 32 bit mode */
3489 panic("unsupported asm constraint '%c' found in (%+F)",
3490 *c, current_ir_graph);
3493 panic("unknown asm constraint '%c' found in (%+F)", *c,
3501 const arch_register_req_t *other_constr;
3503 assert(cls == NULL && "same as and register constraint not supported");
3504 assert(!immediate_possible && "same as and immediate constraint not "
3506 assert(same_as < constraint->n_outs && "wrong constraint number in "
3507 "same_as constraint");
3509 other_constr = constraint->out_reqs[same_as];
3511 req = obstack_alloc(obst, sizeof(req[0]));
3512 req->cls = other_constr->cls;
3513 req->type = arch_register_req_type_should_be_same;
3514 req->limited = NULL;
3515 req->other_same = 1U << pos;
3516 req->other_different = 0;
3518 /* switch constraints. This is because in firm we have same_as
3519 * constraints on the output constraints while in the gcc asm syntax
3520 * they are specified on the input constraints */
3521 constraint->req = other_constr;
3522 constraint->out_reqs[same_as] = req;
3523 constraint->immediate_possible = 0;
3527 if(immediate_possible && cls == NULL) {
3528 cls = &ia32_reg_classes[CLASS_ia32_gp];
3530 assert(!immediate_possible || cls == &ia32_reg_classes[CLASS_ia32_gp]);
3531 assert(cls != NULL);
3533 if(immediate_possible) {
3534 assert(constraint->is_in
3535 && "immediate make no sense for output constraints");
3537 /* todo: check types (no float input on 'r' constrained in and such... */
3540 req = obstack_alloc(obst, sizeof(req[0]) + sizeof(unsigned));
3541 limited_ptr = (unsigned*) (req+1);
3543 req = obstack_alloc(obst, sizeof(req[0]));
3545 memset(req, 0, sizeof(req[0]));
3548 req->type = arch_register_req_type_limited;
3549 *limited_ptr = limited;
3550 req->limited = limited_ptr;
3552 req->type = arch_register_req_type_normal;
3556 constraint->req = req;
3557 constraint->immediate_possible = immediate_possible;
3558 constraint->immediate_type = immediate_type;
3561 static void parse_clobber(ir_node *node, int pos, constraint_t *constraint,
3562 const char *clobber)
3564 ir_graph *irg = get_irn_irg(node);
3565 struct obstack *obst = get_irg_obstack(irg);
3566 const arch_register_t *reg = NULL;
3569 arch_register_req_t *req;
3570 const arch_register_class_t *cls;
3575 /* TODO: construct a hashmap instead of doing linear search for clobber
3577 for(c = 0; c < N_CLASSES; ++c) {
3578 cls = & ia32_reg_classes[c];
3579 for(r = 0; r < cls->n_regs; ++r) {
3580 const arch_register_t *temp_reg = arch_register_for_index(cls, r);
3581 if(strcmp(temp_reg->name, clobber) == 0
3582 || (c == CLASS_ia32_gp && strcmp(temp_reg->name+1, clobber) == 0)) {
3591 panic("Register '%s' mentioned in asm clobber is unknown\n", clobber);
3595 assert(reg->index < 32);
3597 limited = obstack_alloc(obst, sizeof(limited[0]));
3598 *limited = 1 << reg->index;
3600 req = obstack_alloc(obst, sizeof(req[0]));
3601 memset(req, 0, sizeof(req[0]));
3602 req->type = arch_register_req_type_limited;
3604 req->limited = limited;
3606 constraint->req = req;
3607 constraint->immediate_possible = 0;
3608 constraint->immediate_type = 0;
3611 static int is_memory_op(const ir_asm_constraint *constraint)
3613 ident *id = constraint->constraint;
3614 const char *str = get_id_str(id);
3617 for(c = str; *c != '\0'; ++c) {
3626 * generates code for a ASM node
3628 static ir_node *gen_ASM(ir_node *node)
3631 ir_graph *irg = current_ir_graph;
3632 ir_node *block = get_nodes_block(node);
3633 ir_node *new_block = be_transform_node(block);
3634 dbg_info *dbgi = get_irn_dbg_info(node);
3638 int n_out_constraints;
3640 const arch_register_req_t **out_reg_reqs;
3641 const arch_register_req_t **in_reg_reqs;
3642 ia32_asm_reg_t *register_map;
3643 unsigned reg_map_size = 0;
3644 struct obstack *obst;
3645 const ir_asm_constraint *in_constraints;
3646 const ir_asm_constraint *out_constraints;
3648 constraint_t parsed_constraint;
3650 arity = get_irn_arity(node);
3651 in = alloca(arity * sizeof(in[0]));
3652 memset(in, 0, arity * sizeof(in[0]));
3654 n_out_constraints = get_ASM_n_output_constraints(node);
3655 n_clobbers = get_ASM_n_clobbers(node);
3656 out_arity = n_out_constraints + n_clobbers;
3657 /* hack to keep space for mem proj */
3661 in_constraints = get_ASM_input_constraints(node);
3662 out_constraints = get_ASM_output_constraints(node);
3663 clobbers = get_ASM_clobbers(node);
3665 /* construct output constraints */
3666 obst = get_irg_obstack(irg);
3667 out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
3668 parsed_constraint.out_reqs = out_reg_reqs;
3669 parsed_constraint.n_outs = n_out_constraints;
3670 parsed_constraint.is_in = 0;
3672 for(i = 0; i < out_arity; ++i) {
3675 if(i < n_out_constraints) {
3676 const ir_asm_constraint *constraint = &out_constraints[i];
3677 c = get_id_str(constraint->constraint);
3678 parse_asm_constraint(i, &parsed_constraint, c);
3680 if(constraint->pos > reg_map_size)
3681 reg_map_size = constraint->pos;
3683 out_reg_reqs[i] = parsed_constraint.req;
3684 } else if(i < out_arity - 1) {
3685 ident *glob_id = clobbers [i - n_out_constraints];
3686 assert(glob_id != NULL);
3687 c = get_id_str(glob_id);
3688 parse_clobber(node, i, &parsed_constraint, c);
3690 out_reg_reqs[i+1] = parsed_constraint.req;
3694 out_reg_reqs[n_out_constraints] = &no_register_req;
3696 /* construct input constraints */
3697 in_reg_reqs = obstack_alloc(obst, arity * sizeof(in_reg_reqs[0]));
3698 parsed_constraint.is_in = 1;
3699 for(i = 0; i < arity; ++i) {
3700 const ir_asm_constraint *constraint = &in_constraints[i];
3701 ident *constr_id = constraint->constraint;
3702 const char *c = get_id_str(constr_id);
3704 parse_asm_constraint(i, &parsed_constraint, c);
3705 in_reg_reqs[i] = parsed_constraint.req;
3707 if(constraint->pos > reg_map_size)
3708 reg_map_size = constraint->pos;
3710 if(parsed_constraint.immediate_possible) {
3711 ir_node *pred = get_irn_n(node, i);
3712 char imm_type = parsed_constraint.immediate_type;
3713 ir_node *immediate = try_create_Immediate(pred, imm_type);
3715 if(immediate != NULL) {
3722 register_map = NEW_ARR_D(ia32_asm_reg_t, obst, reg_map_size);
3723 memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
3725 for(i = 0; i < n_out_constraints; ++i) {
3726 const ir_asm_constraint *constraint = &out_constraints[i];
3727 unsigned pos = constraint->pos;
3729 assert(pos < reg_map_size);
3730 register_map[pos].use_input = 0;
3731 register_map[pos].valid = 1;
3732 register_map[pos].memory = is_memory_op(constraint);
3733 register_map[pos].inout_pos = i;
3734 register_map[pos].mode = constraint->mode;
3737 /* transform inputs */
3738 for(i = 0; i < arity; ++i) {
3739 const ir_asm_constraint *constraint = &in_constraints[i];
3740 unsigned pos = constraint->pos;
3741 ir_node *pred = get_irn_n(node, i);
3742 ir_node *transformed;
3744 assert(pos < reg_map_size);
3745 register_map[pos].use_input = 1;
3746 register_map[pos].valid = 1;
3747 register_map[pos].memory = is_memory_op(constraint);
3748 register_map[pos].inout_pos = i;
3749 register_map[pos].mode = constraint->mode;
3754 transformed = be_transform_node(pred);
3755 in[i] = transformed;
3758 new_node = new_rd_ia32_Asm(dbgi, irg, new_block, arity, in, out_arity,
3759 get_ASM_text(node), register_map);
3761 set_ia32_out_req_all(new_node, out_reg_reqs);
3762 set_ia32_in_req_all(new_node, in_reg_reqs);
3764 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3770 * Transforms a FrameAddr into an ia32 Add.
3772 static ir_node *gen_be_FrameAddr(ir_node *node) {
3773 ir_node *block = be_transform_node(get_nodes_block(node));
3774 ir_node *op = be_get_FrameAddr_frame(node);
3775 ir_node *new_op = be_transform_node(op);
3776 ir_graph *irg = current_ir_graph;
3777 dbg_info *dbgi = get_irn_dbg_info(node);
3778 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3781 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3782 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3783 set_ia32_use_frame(new_node);
3785 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3791 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3793 static ir_node *gen_be_Return(ir_node *node) {
3794 ir_graph *irg = current_ir_graph;
3795 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3796 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3797 ir_entity *ent = get_irg_entity(irg);
3798 ir_type *tp = get_entity_type(ent);
3803 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3804 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3807 int pn_ret_val, pn_ret_mem, arity, i;
3809 assert(ret_val != NULL);
3810 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3811 return be_duplicate_node(node);
3814 res_type = get_method_res_type(tp, 0);
3816 if (! is_Primitive_type(res_type)) {
3817 return be_duplicate_node(node);
3820 mode = get_type_mode(res_type);
3821 if (! mode_is_float(mode)) {
3822 return be_duplicate_node(node);
3825 assert(get_method_n_ress(tp) == 1);
3827 pn_ret_val = get_Proj_proj(ret_val);
3828 pn_ret_mem = get_Proj_proj(ret_mem);
3830 /* get the Barrier */
3831 barrier = get_Proj_pred(ret_val);
3833 /* get result input of the Barrier */
3834 ret_val = get_irn_n(barrier, pn_ret_val);
3835 new_ret_val = be_transform_node(ret_val);
3837 /* get memory input of the Barrier */
3838 ret_mem = get_irn_n(barrier, pn_ret_mem);
3839 new_ret_mem = be_transform_node(ret_mem);
3841 frame = get_irg_frame(irg);
3843 dbgi = get_irn_dbg_info(barrier);
3844 block = be_transform_node(get_nodes_block(barrier));
3846 noreg = ia32_new_NoReg_gp(env_cg);
3848 /* store xmm0 onto stack */
3849 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3850 new_ret_mem, new_ret_val);
3851 set_ia32_ls_mode(sse_store, mode);
3852 set_ia32_op_type(sse_store, ia32_AddrModeD);
3853 set_ia32_use_frame(sse_store);
3855 /* load into x87 register */
3856 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3857 set_ia32_op_type(fld, ia32_AddrModeS);
3858 set_ia32_use_frame(fld);
3860 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3861 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3863 /* create a new barrier */
3864 arity = get_irn_arity(barrier);
3865 in = alloca(arity * sizeof(in[0]));
3866 for (i = 0; i < arity; ++i) {
3869 if (i == pn_ret_val) {
3871 } else if (i == pn_ret_mem) {
3874 ir_node *in = get_irn_n(barrier, i);
3875 new_in = be_transform_node(in);
3880 new_barrier = new_ir_node(dbgi, irg, block,
3881 get_irn_op(barrier), get_irn_mode(barrier),
3883 copy_node_attr(barrier, new_barrier);
3884 be_duplicate_deps(barrier, new_barrier);
3885 be_set_transformed_node(barrier, new_barrier);
3886 mark_irn_visited(barrier);
3888 /* transform normally */
3889 return be_duplicate_node(node);
3893 * Transform a be_AddSP into an ia32_SubSP.
3895 static ir_node *gen_be_AddSP(ir_node *node)
3897 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3898 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3900 return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
3904 * Transform a be_SubSP into an ia32_AddSP
3906 static ir_node *gen_be_SubSP(ir_node *node)
3908 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3909 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3911 return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
3915 * This function just sets the register for the Unknown node
3916 * as this is not done during register allocation because Unknown
3917 * is an "ignore" node.
3919 static ir_node *gen_Unknown(ir_node *node) {
3920 ir_mode *mode = get_irn_mode(node);
3922 if (mode_is_float(mode)) {
3923 if (ia32_cg_config.use_sse2) {
3924 return ia32_new_Unknown_xmm(env_cg);
3926 /* Unknown nodes are buggy in x87 sim, use zero for now... */
3927 ir_graph *irg = current_ir_graph;
3928 dbg_info *dbgi = get_irn_dbg_info(node);
3929 ir_node *block = get_irg_start_block(irg);
3930 ir_node *ret = new_rd_ia32_vfldz(dbgi, irg, block);
3932 /* Const Nodes before the initial IncSP are a bad idea, because
3933 * they could be spilled and we have no SP ready at that point yet.
3934 * So add a dependency to the initial frame pointer calculation to
3935 * avoid that situation.
3937 add_irn_dep(ret, get_irg_frame(irg));
3940 } else if (mode_needs_gp_reg(mode)) {
3941 return ia32_new_Unknown_gp(env_cg);
3943 panic("unsupported Unknown-Mode");
3949 * Change some phi modes
3951 static ir_node *gen_Phi(ir_node *node) {
3952 ir_node *block = be_transform_node(get_nodes_block(node));
3953 ir_graph *irg = current_ir_graph;
3954 dbg_info *dbgi = get_irn_dbg_info(node);
3955 ir_mode *mode = get_irn_mode(node);
3958 if(mode_needs_gp_reg(mode)) {
3959 /* we shouldn't have any 64bit stuff around anymore */
3960 assert(get_mode_size_bits(mode) <= 32);
3961 /* all integer operations are on 32bit registers now */
3963 } else if(mode_is_float(mode)) {
3964 if (ia32_cg_config.use_sse2) {
3971 /* phi nodes allow loops, so we use the old arguments for now
3972 * and fix this later */
3973 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3974 get_irn_in(node) + 1);
3975 copy_node_attr(node, phi);
3976 be_duplicate_deps(node, phi);
3978 be_set_transformed_node(node, phi);
3979 be_enqueue_preds(node);
3987 static ir_node *gen_IJmp(ir_node *node)
3989 ir_node *block = get_nodes_block(node);
3990 ir_node *new_block = be_transform_node(block);
3991 ir_graph *irg = current_ir_graph;
3992 dbg_info *dbgi = get_irn_dbg_info(node);
3993 ir_node *op = get_IJmp_target(node);
3995 ia32_address_mode_t am;
3996 ia32_address_t *addr = &am.addr;
3998 assert(get_irn_mode(op) == mode_P);
4000 match_arguments(&am, block, NULL, op, NULL,
4001 match_am | match_8bit_am | match_16bit_am |
4002 match_immediate | match_8bit | match_16bit);
4004 new_node = new_rd_ia32_IJmp(dbgi, irg, new_block, addr->base, addr->index,
4005 addr->mem, am.new_op2);
4006 set_am_attributes(new_node, &am);
4007 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4009 new_node = fix_mem_proj(new_node, &am);
4014 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4017 typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4018 ir_node *val, ir_node *mem);
4021 * Transforms a lowered Load into a "real" one.
4023 static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
4025 ir_node *block = be_transform_node(get_nodes_block(node));
4026 ir_node *ptr = get_irn_n(node, 0);
4027 ir_node *new_ptr = be_transform_node(ptr);
4028 ir_node *mem = get_irn_n(node, 1);
4029 ir_node *new_mem = be_transform_node(mem);
4030 ir_graph *irg = current_ir_graph;
4031 dbg_info *dbgi = get_irn_dbg_info(node);
4032 ir_mode *mode = get_ia32_ls_mode(node);
4033 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4036 new_op = func(dbgi, irg, block, new_ptr, noreg, new_mem);
4038 set_ia32_op_type(new_op, ia32_AddrModeS);
4039 set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
4040 set_ia32_am_scale(new_op, get_ia32_am_scale(node));
4041 set_ia32_am_sc(new_op, get_ia32_am_sc(node));
4042 if (is_ia32_am_sc_sign(node))
4043 set_ia32_am_sc_sign(new_op);
4044 set_ia32_ls_mode(new_op, mode);
4045 if (is_ia32_use_frame(node)) {
4046 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4047 set_ia32_use_frame(new_op);
4050 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4056 * Transforms a lowered Store into a "real" one.
4058 static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
4060 ir_node *block = be_transform_node(get_nodes_block(node));
4061 ir_node *ptr = get_irn_n(node, 0);
4062 ir_node *new_ptr = be_transform_node(ptr);
4063 ir_node *val = get_irn_n(node, 1);
4064 ir_node *new_val = be_transform_node(val);
4065 ir_node *mem = get_irn_n(node, 2);
4066 ir_node *new_mem = be_transform_node(mem);
4067 ir_graph *irg = current_ir_graph;
4068 dbg_info *dbgi = get_irn_dbg_info(node);
4069 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4070 ir_mode *mode = get_ia32_ls_mode(node);
4074 new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
4076 am_offs = get_ia32_am_offs_int(node);
4077 add_ia32_am_offs_int(new_op, am_offs);
4079 set_ia32_op_type(new_op, ia32_AddrModeD);
4080 set_ia32_ls_mode(new_op, mode);
4081 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4082 set_ia32_use_frame(new_op);
4084 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4089 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4091 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4092 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4094 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
4095 match_immediate | match_mode_neutral);
4098 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4100 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4101 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4102 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
4106 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4108 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4109 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4110 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
4114 static ir_node *gen_ia32_l_Add(ir_node *node) {
4115 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4116 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4117 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
4118 match_commutative | match_am | match_immediate |
4119 match_mode_neutral);
4121 if(is_Proj(lowered)) {
4122 lowered = get_Proj_pred(lowered);
4124 assert(is_ia32_Add(lowered));
4125 set_irn_mode(lowered, mode_T);
4131 static ir_node *gen_ia32_l_Adc(ir_node *node)
4133 return gen_binop_flags(node, new_rd_ia32_Adc,
4134 match_commutative | match_am | match_immediate |
4135 match_mode_neutral);
4139 * Transforms an ia32_l_vfild into a "real" ia32_vfild node
4141 * @param node The node to transform
4142 * @return the created ia32 vfild node
4144 static ir_node *gen_ia32_l_vfild(ir_node *node) {
4145 return gen_lowered_Load(node, new_rd_ia32_vfild);
4149 * Transforms an ia32_l_Load into a "real" ia32_Load node
4151 * @param node The node to transform
4152 * @return the created ia32 Load node
4154 static ir_node *gen_ia32_l_Load(ir_node *node) {
4155 return gen_lowered_Load(node, new_rd_ia32_Load);
4159 * Transforms an ia32_l_Store into a "real" ia32_Store node
4161 * @param node The node to transform
4162 * @return the created ia32 Store node
4164 static ir_node *gen_ia32_l_Store(ir_node *node) {
4165 return gen_lowered_Store(node, new_rd_ia32_Store);
4169 * Transforms a l_vfist into a "real" vfist node.
4171 * @param node The node to transform
4172 * @return the created ia32 vfist node
4174 static ir_node *gen_ia32_l_vfist(ir_node *node) {
4175 ir_node *block = be_transform_node(get_nodes_block(node));
4176 ir_node *ptr = get_irn_n(node, 0);
4177 ir_node *new_ptr = be_transform_node(ptr);
4178 ir_node *val = get_irn_n(node, 1);
4179 ir_node *new_val = be_transform_node(val);
4180 ir_node *mem = get_irn_n(node, 2);
4181 ir_node *new_mem = be_transform_node(mem);
4182 ir_graph *irg = current_ir_graph;
4183 dbg_info *dbgi = get_irn_dbg_info(node);
4184 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4185 ir_mode *mode = get_ia32_ls_mode(node);
4186 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4190 new_op = new_rd_ia32_vfist(dbgi, irg, block, new_ptr, noreg, new_mem,
4191 new_val, trunc_mode);
4193 am_offs = get_ia32_am_offs_int(node);
4194 add_ia32_am_offs_int(new_op, am_offs);
4196 set_ia32_op_type(new_op, ia32_AddrModeD);
4197 set_ia32_ls_mode(new_op, mode);
4198 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4199 set_ia32_use_frame(new_op);
4201 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4207 * Transforms a l_MulS into a "real" MulS node.
4209 * @return the created ia32 Mul node
4211 static ir_node *gen_ia32_l_Mul(ir_node *node) {
4212 ir_node *left = get_binop_left(node);
4213 ir_node *right = get_binop_right(node);
4215 return gen_binop(node, left, right, new_rd_ia32_Mul,
4216 match_commutative | match_am | match_mode_neutral);
4220 * Transforms a l_IMulS into a "real" IMul1OPS node.
4222 * @return the created ia32 IMul1OP node
4224 static ir_node *gen_ia32_l_IMul(ir_node *node) {
4225 ir_node *left = get_binop_left(node);
4226 ir_node *right = get_binop_right(node);
4228 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
4229 match_commutative | match_am | match_mode_neutral);
4232 static ir_node *gen_ia32_l_Sub(ir_node *node) {
4233 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4234 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4235 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
4236 match_am | match_immediate | match_mode_neutral);
4238 if(is_Proj(lowered)) {
4239 lowered = get_Proj_pred(lowered);
4241 assert(is_ia32_Sub(lowered));
4242 set_irn_mode(lowered, mode_T);
4248 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
4249 return gen_binop_flags(node, new_rd_ia32_Sbb,
4250 match_am | match_immediate | match_mode_neutral);
4254 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4255 * op1 - target to be shifted
4256 * op2 - contains bits to be shifted into target
4258 * Only op3 can be an immediate.
4260 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4261 ir_node *low, ir_node *count)
4263 ir_node *block = get_nodes_block(node);
4264 ir_node *new_block = be_transform_node(block);
4265 ir_graph *irg = current_ir_graph;
4266 dbg_info *dbgi = get_irn_dbg_info(node);
4267 ir_node *new_high = be_transform_node(high);
4268 ir_node *new_low = be_transform_node(low);
4272 /* the shift amount can be any mode that is bigger than 5 bits, since all
4273 * other bits are ignored anyway */
4274 while (is_Conv(count) && get_irn_n_edges(count) == 1) {
4275 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4276 count = get_Conv_op(count);
4278 new_count = create_immediate_or_transform(count, 0);
4280 if (is_ia32_l_ShlD(node)) {
4281 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
4284 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
4287 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4292 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4294 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4295 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4296 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4297 return gen_lowered_64bit_shifts(node, high, low, count);
4300 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4302 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4303 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4304 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4305 return gen_lowered_64bit_shifts(node, high, low, count);
4308 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
4309 ir_node *src_block = get_nodes_block(node);
4310 ir_node *block = be_transform_node(src_block);
4311 ir_graph *irg = current_ir_graph;
4312 dbg_info *dbgi = get_irn_dbg_info(node);
4313 ir_node *frame = get_irg_frame(irg);
4314 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4315 ir_node *nomem = new_NoMem();
4316 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4317 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4318 ir_node *new_val_low = be_transform_node(val_low);
4319 ir_node *new_val_high = be_transform_node(val_high);
4324 ir_node *store_high;
4326 if(!mode_is_signed(get_irn_mode(val_high))) {
4327 panic("unsigned long long -> float not supported yet (%+F)", node);
4331 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4333 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4335 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
4336 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
4338 set_ia32_use_frame(store_low);
4339 set_ia32_use_frame(store_high);
4340 set_ia32_op_type(store_low, ia32_AddrModeD);
4341 set_ia32_op_type(store_high, ia32_AddrModeD);
4342 set_ia32_ls_mode(store_low, mode_Iu);
4343 set_ia32_ls_mode(store_high, mode_Is);
4344 add_ia32_am_offs_int(store_high, 4);
4348 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4351 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
4353 set_ia32_use_frame(fild);
4354 set_ia32_op_type(fild, ia32_AddrModeS);
4355 set_ia32_ls_mode(fild, mode_Ls);
4357 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
4359 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4362 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
4363 ir_node *src_block = get_nodes_block(node);
4364 ir_node *block = be_transform_node(src_block);
4365 ir_graph *irg = current_ir_graph;
4366 dbg_info *dbgi = get_irn_dbg_info(node);
4367 ir_node *frame = get_irg_frame(irg);
4368 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4369 ir_node *nomem = new_NoMem();
4370 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4371 ir_node *new_val = be_transform_node(val);
4372 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4377 fist = new_rd_ia32_vfist(dbgi, irg, block, frame, noreg, nomem, new_val,
4379 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
4380 set_ia32_use_frame(fist);
4381 set_ia32_op_type(fist, ia32_AddrModeD);
4382 set_ia32_ls_mode(fist, mode_Ls);
4388 * the BAD transformer.
4390 static ir_node *bad_transform(ir_node *node) {
4391 panic("No transform function for %+F available.\n", node);
4395 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
4396 ir_graph *irg = current_ir_graph;
4397 ir_node *block = be_transform_node(get_nodes_block(node));
4398 ir_node *pred = get_Proj_pred(node);
4399 ir_node *new_pred = be_transform_node(pred);
4400 ir_node *frame = get_irg_frame(irg);
4401 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4402 dbg_info *dbgi = get_irn_dbg_info(node);
4403 long pn = get_Proj_proj(node);
4408 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
4409 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
4410 set_ia32_use_frame(load);
4411 set_ia32_op_type(load, ia32_AddrModeS);
4412 set_ia32_ls_mode(load, mode_Iu);
4413 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4414 * 32 bit from it with this particular load */
4415 attr = get_ia32_attr(load);
4416 attr->data.need_64bit_stackent = 1;
4418 if (pn == pn_ia32_l_FloattoLL_res_high) {
4419 add_ia32_am_offs_int(load, 4);
4421 assert(pn == pn_ia32_l_FloattoLL_res_low);
4424 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4430 * Transform the Projs of an AddSP.
4432 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
4433 ir_node *block = be_transform_node(get_nodes_block(node));
4434 ir_node *pred = get_Proj_pred(node);
4435 ir_node *new_pred = be_transform_node(pred);
4436 ir_graph *irg = current_ir_graph;
4437 dbg_info *dbgi = get_irn_dbg_info(node);
4438 long proj = get_Proj_proj(node);
4440 if (proj == pn_be_AddSP_sp) {
4441 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4442 pn_ia32_SubSP_stack);
4443 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4445 } else if(proj == pn_be_AddSP_res) {
4446 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4447 pn_ia32_SubSP_addr);
4448 } else if (proj == pn_be_AddSP_M) {
4449 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4453 return new_rd_Unknown(irg, get_irn_mode(node));
4457 * Transform the Projs of a SubSP.
4459 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
4460 ir_node *block = be_transform_node(get_nodes_block(node));
4461 ir_node *pred = get_Proj_pred(node);
4462 ir_node *new_pred = be_transform_node(pred);
4463 ir_graph *irg = current_ir_graph;
4464 dbg_info *dbgi = get_irn_dbg_info(node);
4465 long proj = get_Proj_proj(node);
4467 if (proj == pn_be_SubSP_sp) {
4468 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4469 pn_ia32_AddSP_stack);
4470 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4472 } else if (proj == pn_be_SubSP_M) {
4473 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4477 return new_rd_Unknown(irg, get_irn_mode(node));
4481 * Transform and renumber the Projs from a Load.
4483 static ir_node *gen_Proj_Load(ir_node *node) {
4485 ir_node *block = be_transform_node(get_nodes_block(node));
4486 ir_node *pred = get_Proj_pred(node);
4487 ir_graph *irg = current_ir_graph;
4488 dbg_info *dbgi = get_irn_dbg_info(node);
4489 long proj = get_Proj_proj(node);
4492 /* loads might be part of source address mode matches, so we don't
4493 transform the ProjMs yet (with the exception of loads whose result is
4496 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4499 assert(pn_ia32_Load_M == 1); /* convention: mem-result of Source-AM
4501 /* this is needed, because sometimes we have loops that are only
4502 reachable through the ProjM */
4503 be_enqueue_preds(node);
4504 /* do it in 2 steps, to silence firm verifier */
4505 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4506 set_Proj_proj(res, pn_ia32_Load_M);
4510 /* renumber the proj */
4511 new_pred = be_transform_node(pred);
4512 if (is_ia32_Load(new_pred)) {
4515 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4517 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4518 case pn_Load_X_regular:
4519 return new_rd_Jmp(dbgi, irg, block);
4520 case pn_Load_X_except:
4521 /* This Load might raise an exception. Mark it. */
4522 set_ia32_exc_label(new_pred, 1);
4523 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4527 } else if (is_ia32_Conv_I2I(new_pred) ||
4528 is_ia32_Conv_I2I8Bit(new_pred)) {
4529 set_irn_mode(new_pred, mode_T);
4530 if (proj == pn_Load_res) {
4531 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4532 } else if (proj == pn_Load_M) {
4533 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4535 } else if (is_ia32_xLoad(new_pred)) {
4538 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4540 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4541 case pn_Load_X_regular:
4542 return new_rd_Jmp(dbgi, irg, block);
4543 case pn_Load_X_except:
4544 /* This Load might raise an exception. Mark it. */
4545 set_ia32_exc_label(new_pred, 1);
4546 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4550 } else if (is_ia32_vfld(new_pred)) {
4553 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4555 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4556 case pn_Load_X_regular:
4557 return new_rd_Jmp(dbgi, irg, block);
4558 case pn_Load_X_except:
4559 /* This Load might raise an exception. Mark it. */
4560 set_ia32_exc_label(new_pred, 1);
4561 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4566 /* can happen for ProJMs when source address mode happened for the
4569 /* however it should not be the result proj, as that would mean the
4570 load had multiple users and should not have been used for
4572 if (proj != pn_Load_M) {
4573 panic("internal error: transformed node not a Load");
4575 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4579 return new_rd_Unknown(irg, get_irn_mode(node));
4583 * Transform and renumber the Projs from a DivMod like instruction.
4585 static ir_node *gen_Proj_DivMod(ir_node *node) {
4586 ir_node *block = be_transform_node(get_nodes_block(node));
4587 ir_node *pred = get_Proj_pred(node);
4588 ir_node *new_pred = be_transform_node(pred);
4589 ir_graph *irg = current_ir_graph;
4590 dbg_info *dbgi = get_irn_dbg_info(node);
4591 ir_mode *mode = get_irn_mode(node);
4592 long proj = get_Proj_proj(node);
4594 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4596 switch (get_irn_opcode(pred)) {
4600 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4602 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4603 case pn_Div_X_regular:
4604 return new_rd_Jmp(dbgi, irg, block);
4605 case pn_Div_X_except:
4606 set_ia32_exc_label(new_pred, 1);
4607 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4615 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4617 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4618 case pn_Mod_X_except:
4619 set_ia32_exc_label(new_pred, 1);
4620 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4628 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4629 case pn_DivMod_res_div:
4630 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4631 case pn_DivMod_res_mod:
4632 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4633 case pn_DivMod_X_regular:
4634 return new_rd_Jmp(dbgi, irg, block);
4635 case pn_DivMod_X_except:
4636 set_ia32_exc_label(new_pred, 1);
4637 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4647 return new_rd_Unknown(irg, mode);
4651 * Transform and renumber the Projs from a CopyB.
4653 static ir_node *gen_Proj_CopyB(ir_node *node) {
4654 ir_node *block = be_transform_node(get_nodes_block(node));
4655 ir_node *pred = get_Proj_pred(node);
4656 ir_node *new_pred = be_transform_node(pred);
4657 ir_graph *irg = current_ir_graph;
4658 dbg_info *dbgi = get_irn_dbg_info(node);
4659 ir_mode *mode = get_irn_mode(node);
4660 long proj = get_Proj_proj(node);
4663 case pn_CopyB_M_regular:
4664 if (is_ia32_CopyB_i(new_pred)) {
4665 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4666 } else if (is_ia32_CopyB(new_pred)) {
4667 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4675 return new_rd_Unknown(irg, mode);
4679 * Transform and renumber the Projs from a Quot.
4681 static ir_node *gen_Proj_Quot(ir_node *node) {
4682 ir_node *block = be_transform_node(get_nodes_block(node));
4683 ir_node *pred = get_Proj_pred(node);
4684 ir_node *new_pred = be_transform_node(pred);
4685 ir_graph *irg = current_ir_graph;
4686 dbg_info *dbgi = get_irn_dbg_info(node);
4687 ir_mode *mode = get_irn_mode(node);
4688 long proj = get_Proj_proj(node);
4692 if (is_ia32_xDiv(new_pred)) {
4693 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4694 } else if (is_ia32_vfdiv(new_pred)) {
4695 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4699 if (is_ia32_xDiv(new_pred)) {
4700 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4701 } else if (is_ia32_vfdiv(new_pred)) {
4702 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4710 return new_rd_Unknown(irg, mode);
4714 * Transform the Thread Local Storage Proj.
4716 static ir_node *gen_Proj_tls(ir_node *node) {
4717 ir_node *block = be_transform_node(get_nodes_block(node));
4718 ir_graph *irg = current_ir_graph;
4719 dbg_info *dbgi = NULL;
4720 ir_node *res = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
4725 static ir_node *gen_be_Call(ir_node *node) {
4726 ir_node *res = be_duplicate_node(node);
4727 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4732 static ir_node *gen_be_IncSP(ir_node *node) {
4733 ir_node *res = be_duplicate_node(node);
4734 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4740 * Transform the Projs from a be_Call.
4742 static ir_node *gen_Proj_be_Call(ir_node *node) {
4743 ir_node *block = be_transform_node(get_nodes_block(node));
4744 ir_node *call = get_Proj_pred(node);
4745 ir_node *new_call = be_transform_node(call);
4746 ir_graph *irg = current_ir_graph;
4747 dbg_info *dbgi = get_irn_dbg_info(node);
4748 ir_type *method_type = be_Call_get_type(call);
4749 int n_res = get_method_n_ress(method_type);
4750 long proj = get_Proj_proj(node);
4751 ir_mode *mode = get_irn_mode(node);
4753 const arch_register_class_t *cls;
4755 /* The following is kinda tricky: If we're using SSE, then we have to
4756 * move the result value of the call in floating point registers to an
4757 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4758 * after the call, we have to make sure to correctly make the
4759 * MemProj and the result Proj use these 2 nodes
4761 if (proj == pn_be_Call_M_regular) {
4762 // get new node for result, are we doing the sse load/store hack?
4763 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4764 ir_node *call_res_new;
4765 ir_node *call_res_pred = NULL;
4767 if (call_res != NULL) {
4768 call_res_new = be_transform_node(call_res);
4769 call_res_pred = get_Proj_pred(call_res_new);
4772 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4773 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4774 pn_be_Call_M_regular);
4776 assert(is_ia32_xLoad(call_res_pred));
4777 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4781 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4782 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4784 ir_node *frame = get_irg_frame(irg);
4785 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4787 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4790 /* in case there is no memory output: create one to serialize the copy
4792 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4793 pn_be_Call_M_regular);
4794 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4795 pn_be_Call_first_res);
4797 /* store st(0) onto stack */
4798 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4800 set_ia32_op_type(fstp, ia32_AddrModeD);
4801 set_ia32_use_frame(fstp);
4803 /* load into SSE register */
4804 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4806 set_ia32_op_type(sse_load, ia32_AddrModeS);
4807 set_ia32_use_frame(sse_load);
4809 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4815 /* transform call modes */
4816 if (mode_is_data(mode)) {
4817 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4821 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4825 * Transform the Projs from a Cmp.
4827 static ir_node *gen_Proj_Cmp(ir_node *node)
4829 /* this probably means not all mode_b nodes were lowered... */
4830 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4835 * Transform and potentially renumber Proj nodes.
4837 static ir_node *gen_Proj(ir_node *node) {
4838 ir_graph *irg = current_ir_graph;
4839 dbg_info *dbgi = get_irn_dbg_info(node);
4840 ir_node *pred = get_Proj_pred(node);
4841 long proj = get_Proj_proj(node);
4843 if (is_Store(pred)) {
4844 if (proj == pn_Store_M) {
4845 return be_transform_node(pred);
4848 return new_r_Bad(irg);
4850 } else if (is_Load(pred)) {
4851 return gen_Proj_Load(node);
4852 } else if (is_Div(pred) || is_Mod(pred) || is_DivMod(pred)) {
4853 return gen_Proj_DivMod(node);
4854 } else if (is_CopyB(pred)) {
4855 return gen_Proj_CopyB(node);
4856 } else if (is_Quot(pred)) {
4857 return gen_Proj_Quot(node);
4858 } else if (be_is_SubSP(pred)) {
4859 return gen_Proj_be_SubSP(node);
4860 } else if (be_is_AddSP(pred)) {
4861 return gen_Proj_be_AddSP(node);
4862 } else if (be_is_Call(pred)) {
4863 return gen_Proj_be_Call(node);
4864 } else if (is_Cmp(pred)) {
4865 return gen_Proj_Cmp(node);
4866 } else if (get_irn_op(pred) == op_Start) {
4867 if (proj == pn_Start_X_initial_exec) {
4868 ir_node *block = get_nodes_block(pred);
4871 /* we exchange the ProjX with a jump */
4872 block = be_transform_node(block);
4873 jump = new_rd_Jmp(dbgi, irg, block);
4876 if (node == be_get_old_anchor(anchor_tls)) {
4877 return gen_Proj_tls(node);
4879 } else if (is_ia32_l_FloattoLL(pred)) {
4880 return gen_Proj_l_FloattoLL(node);
4882 } else if(!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4886 ir_node *new_pred = be_transform_node(pred);
4887 ir_node *block = be_transform_node(get_nodes_block(node));
4888 ir_mode *mode = get_irn_mode(node);
4889 if (mode_needs_gp_reg(mode)) {
4890 ir_node *new_proj = new_r_Proj(irg, block, new_pred, mode_Iu,
4891 get_Proj_proj(node));
4892 #ifdef DEBUG_libfirm
4893 new_proj->node_nr = node->node_nr;
4899 return be_duplicate_node(node);
4903 * Enters all transform functions into the generic pointer
4905 static void register_transformers(void)
4909 /* first clear the generic function pointer for all ops */
4910 clear_irp_opcodes_generic_func();
4912 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4913 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4951 /* transform ops from intrinsic lowering */
4967 GEN(ia32_l_LLtoFloat);
4968 GEN(ia32_l_FloattoLL);
4974 /* we should never see these nodes */
4989 /* handle generic backend nodes */
4998 op_Mulh = get_op_Mulh();
5007 * Pre-transform all unknown and noreg nodes.
5009 static void ia32_pretransform_node(void *arch_cg) {
5010 ia32_code_gen_t *cg = arch_cg;
5012 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5013 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5014 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5015 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5016 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5017 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5022 * Walker, checks if all ia32 nodes producing more than one result have
5023 * its Projs, other wise creates new projs and keep them using a be_Keep node.
5025 static void add_missing_keep_walker(ir_node *node, void *data)
5028 unsigned found_projs = 0;
5029 const ir_edge_t *edge;
5030 ir_mode *mode = get_irn_mode(node);
5035 if(!is_ia32_irn(node))
5038 n_outs = get_ia32_n_res(node);
5041 if(is_ia32_SwitchJmp(node))
5044 assert(n_outs < (int) sizeof(unsigned) * 8);
5045 foreach_out_edge(node, edge) {
5046 ir_node *proj = get_edge_src_irn(edge);
5047 int pn = get_Proj_proj(proj);
5049 assert(get_irn_mode(proj) == mode_M || pn < n_outs);
5050 found_projs |= 1 << pn;
5054 /* are keeps missing? */
5056 for(i = 0; i < n_outs; ++i) {
5059 const arch_register_req_t *req;
5060 const arch_register_class_t *class;
5062 if(found_projs & (1 << i)) {
5066 req = get_ia32_out_req(node, i);
5071 if(class == &ia32_reg_classes[CLASS_ia32_flags]) {
5075 block = get_nodes_block(node);
5076 in[0] = new_r_Proj(current_ir_graph, block, node,
5077 arch_register_class_mode(class), i);
5078 if(last_keep != NULL) {
5079 be_Keep_add_node(last_keep, class, in[0]);
5081 last_keep = be_new_Keep(class, current_ir_graph, block, 1, in);
5082 if(sched_is_scheduled(node)) {
5083 sched_add_after(node, last_keep);
5090 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5093 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5095 ir_graph *irg = be_get_birg_irg(cg->birg);
5096 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5099 /* do the transformation */
5100 void ia32_transform_graph(ia32_code_gen_t *cg) {
5102 ir_graph *irg = cg->irg;
5104 register_transformers();
5106 initial_fpcw = NULL;
5108 BE_TIMER_PUSH(t_heights);
5109 heights = heights_new(irg);
5110 BE_TIMER_POP(t_heights);
5111 ia32_calculate_non_address_mode_nodes(cg->birg);
5113 /* the transform phase is not safe for CSE (yet) because several nodes get
5114 * attributes set after their creation */
5115 cse_last = get_opt_cse();
5118 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
5120 set_opt_cse(cse_last);
5122 ia32_free_non_address_mode_nodes();
5123 heights_free(heights);
5127 void ia32_init_transform(void)
5129 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");