2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
35 #include "irgraph_t.h"
40 #include "iredges_t.h"
52 #include "../benode_t.h"
53 #include "../besched.h"
55 #include "../beutil.h"
56 #include "../beirg_t.h"
57 #include "../betranshlp.h"
60 #include "bearch_ia32_t.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
79 #define TP_SFP_SIGN "ia32_sfp_sign"
80 #define TP_DFP_SIGN "ia32_dfp_sign"
81 #define TP_SFP_ABS "ia32_sfp_abs"
82 #define TP_DFP_ABS "ia32_dfp_abs"
83 #define TP_INT_MAX "ia32_int_max"
85 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
86 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
87 #define ENT_SFP_ABS "IA32_SFP_ABS"
88 #define ENT_DFP_ABS "IA32_DFP_ABS"
89 #define ENT_INT_MAX "IA32_INT_MAX"
91 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
92 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
94 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
96 /** hold the current code generator during transformation */
97 static ia32_code_gen_t *env_cg = NULL;
98 static ir_node *initial_fpcw = NULL;
99 static heights_t *heights = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *try_create_Immediate(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_immediate_or_transform(ir_node *node,
132 char immediate_constraint_type);
134 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
135 dbg_info *dbgi, ir_node *block,
136 ir_node *op, ir_node *orig_node);
139 * Return true if a mode can be stored in the GP register set
141 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
142 if(mode == mode_fpcw)
144 if(get_mode_size_bits(mode) > 32)
146 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
150 * creates a unique ident by adding a number to a tag
152 * @param tag the tag string, must contain a %d if a number
155 static ident *unique_id(const char *tag)
157 static unsigned id = 0;
160 snprintf(str, sizeof(str), tag, ++id);
161 return new_id_from_str(str);
165 * Get a primitive type for a mode.
167 static ir_type *get_prim_type(pmap *types, ir_mode *mode)
169 pmap_entry *e = pmap_find(types, mode);
174 snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
175 res = new_type_primitive(new_id_from_str(buf), mode);
176 set_type_alignment_bytes(res, 16);
177 pmap_insert(types, mode, res);
185 * Creates an immediate.
187 * @param symconst if set, create a SymConst immediate
188 * @param symconst_sign sign for the symconst
189 * @param val integer value for the immediate
191 static ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
193 ir_graph *irg = current_ir_graph;
194 ir_node *start_block = get_irg_start_block(irg);
195 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block,
196 symconst, symconst_sign, val);
197 arch_set_irn_register(env_cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
203 * Get an atomic entity that is initialized with a tarval forming
206 * @param cnst the node representing the constant
208 static ir_entity *create_float_const_entity(ir_node *cnst)
210 ia32_isa_t *isa = env_cg->isa;
211 tarval *key = get_Const_tarval(cnst);
212 pmap_entry *e = pmap_find(isa->tv_ent, key);
218 ir_mode *mode = get_tarval_mode(tv);
221 if (! ia32_cg_config.use_sse2) {
222 /* try to reduce the mode to produce smaller sized entities */
223 if (mode != mode_F) {
224 if (tarval_ieee754_can_conv_lossless(tv, mode_F)) {
226 tv = tarval_convert_to(tv, mode);
227 } else if (mode != mode_D) {
228 if (tarval_ieee754_can_conv_lossless(tv, mode_D)) {
230 tv = tarval_convert_to(tv, mode);
236 if (mode == get_irn_mode(cnst)) {
237 /* mode was not changed */
238 tp = get_Const_type(cnst);
239 if (tp == firm_unknown_type)
240 tp = get_prim_type(isa->types, mode);
242 tp = get_prim_type(isa->types, mode);
244 res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
246 set_entity_ld_ident(res, get_entity_ident(res));
247 set_entity_visibility(res, visibility_local);
248 set_entity_variability(res, variability_constant);
249 set_entity_allocation(res, allocation_static);
251 /* we create a new entity here: It's initialization must resist on the
253 rem = current_ir_graph;
254 current_ir_graph = get_const_code_irg();
255 set_atomic_ent_value(res, new_Const_type(tv, tp));
256 current_ir_graph = rem;
258 pmap_insert(isa->tv_ent, key, res);
266 static int is_Const_0(ir_node *node) {
267 return is_Const(node) && is_Const_null(node);
270 static int is_Const_1(ir_node *node) {
271 return is_Const(node) && is_Const_one(node);
274 static int is_Const_Minus_1(ir_node *node) {
275 return is_Const(node) && is_Const_all_one(node);
279 * returns true if constant can be created with a simple float command
281 static int is_simple_x87_Const(ir_node *node)
283 tarval *tv = get_Const_tarval(node);
285 if (tarval_is_null(tv) || tarval_is_one(tv))
288 /* TODO: match all the other float constants */
293 * returns true if constant can be created with a simple float command
295 static int is_simple_sse_Const(ir_node *node)
297 tarval *tv = get_Const_tarval(node);
298 ir_mode *mode = get_tarval_mode(tv);
303 if (tarval_is_null(tv) || tarval_is_one(tv))
306 if (mode == mode_D) {
307 unsigned val = get_tarval_sub_bits(tv, 0) |
308 (get_tarval_sub_bits(tv, 1) << 8) |
309 (get_tarval_sub_bits(tv, 2) << 16) |
310 (get_tarval_sub_bits(tv, 3) << 24);
312 /* really a 32bit constant */
316 /* TODO: match all the other float constants */
321 * Transforms a Const.
323 static ir_node *gen_Const(ir_node *node) {
324 ir_graph *irg = current_ir_graph;
325 ir_node *old_block = get_nodes_block(node);
326 ir_node *block = be_transform_node(old_block);
327 dbg_info *dbgi = get_irn_dbg_info(node);
328 ir_mode *mode = get_irn_mode(node);
330 assert(is_Const(node));
332 if (mode_is_float(mode)) {
334 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
335 ir_node *nomem = new_NoMem();
339 if (ia32_cg_config.use_sse2) {
340 tarval *tv = get_Const_tarval(node);
341 if (tarval_is_null(tv)) {
342 load = new_rd_ia32_xZero(dbgi, irg, block);
343 set_ia32_ls_mode(load, mode);
345 } else if (tarval_is_one(tv)) {
346 int cnst = mode == mode_F ? 26 : 55;
347 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
348 ir_node *imm2 = create_Immediate(NULL, 0, 2);
349 ir_node *pslld, *psrld;
351 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
352 set_ia32_ls_mode(load, mode);
353 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
354 set_ia32_ls_mode(pslld, mode);
355 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
356 set_ia32_ls_mode(psrld, mode);
358 } else if (mode == mode_F) {
359 /* we can place any 32bit constant by using a movd gp, sse */
360 unsigned val = get_tarval_sub_bits(tv, 0) |
361 (get_tarval_sub_bits(tv, 1) << 8) |
362 (get_tarval_sub_bits(tv, 2) << 16) |
363 (get_tarval_sub_bits(tv, 3) << 24);
364 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
365 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
366 set_ia32_ls_mode(load, mode);
369 if (mode == mode_D) {
370 unsigned val = get_tarval_sub_bits(tv, 0) |
371 (get_tarval_sub_bits(tv, 1) << 8) |
372 (get_tarval_sub_bits(tv, 2) << 16) |
373 (get_tarval_sub_bits(tv, 3) << 24);
375 ir_node *imm32 = create_Immediate(NULL, 0, 32);
376 ir_node *cnst, *psllq;
378 /* fine, lower 32bit are zero, produce 32bit value */
379 val = get_tarval_sub_bits(tv, 4) |
380 (get_tarval_sub_bits(tv, 5) << 8) |
381 (get_tarval_sub_bits(tv, 6) << 16) |
382 (get_tarval_sub_bits(tv, 7) << 24);
383 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
384 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
385 set_ia32_ls_mode(load, mode);
386 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
387 set_ia32_ls_mode(psllq, mode);
392 floatent = create_float_const_entity(node);
394 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
396 set_ia32_op_type(load, ia32_AddrModeS);
397 set_ia32_am_sc(load, floatent);
398 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
399 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
402 if (is_Const_null(node)) {
403 load = new_rd_ia32_vfldz(dbgi, irg, block);
405 set_ia32_ls_mode(load, mode);
406 } else if (is_Const_one(node)) {
407 load = new_rd_ia32_vfld1(dbgi, irg, block);
409 set_ia32_ls_mode(load, mode);
411 floatent = create_float_const_entity(node);
413 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
414 set_ia32_op_type(load, ia32_AddrModeS);
415 set_ia32_am_sc(load, floatent);
416 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
417 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
418 /* take the mode from the entity */
419 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
423 /* Const Nodes before the initial IncSP are a bad idea, because
424 * they could be spilled and we have no SP ready at that point yet.
425 * So add a dependency to the initial frame pointer calculation to
426 * avoid that situation.
428 if (get_irg_start_block(irg) == block) {
429 add_irn_dep(load, get_irg_frame(irg));
432 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
436 tarval *tv = get_Const_tarval(node);
439 tv = tarval_convert_to(tv, mode_Iu);
441 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
443 panic("couldn't convert constant tarval (%+F)", node);
445 val = get_tarval_long(tv);
447 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
448 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
451 if (get_irg_start_block(irg) == block) {
452 add_irn_dep(cnst, get_irg_frame(irg));
460 * Transforms a SymConst.
462 static ir_node *gen_SymConst(ir_node *node) {
463 ir_graph *irg = current_ir_graph;
464 ir_node *old_block = get_nodes_block(node);
465 ir_node *block = be_transform_node(old_block);
466 dbg_info *dbgi = get_irn_dbg_info(node);
467 ir_mode *mode = get_irn_mode(node);
470 if (mode_is_float(mode)) {
471 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
472 ir_node *nomem = new_NoMem();
474 if (ia32_cg_config.use_sse2)
475 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
477 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
478 set_ia32_am_sc(cnst, get_SymConst_entity(node));
479 set_ia32_use_frame(cnst);
483 if(get_SymConst_kind(node) != symconst_addr_ent) {
484 panic("backend only support symconst_addr_ent (at %+F)", node);
486 entity = get_SymConst_entity(node);
487 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
490 /* Const Nodes before the initial IncSP are a bad idea, because
491 * they could be spilled and we have no SP ready at that point yet
493 if (get_irg_start_block(irg) == block) {
494 add_irn_dep(cnst, get_irg_frame(irg));
497 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
502 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
503 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
504 static const struct {
506 const char *ent_name;
507 const char *cnst_str;
510 } names [ia32_known_const_max] = {
511 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
512 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
513 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
514 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
515 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
517 static ir_entity *ent_cache[ia32_known_const_max];
519 const char *tp_name, *ent_name, *cnst_str;
527 ent_name = names[kct].ent_name;
528 if (! ent_cache[kct]) {
529 tp_name = names[kct].tp_name;
530 cnst_str = names[kct].cnst_str;
532 switch (names[kct].mode) {
533 case 0: mode = mode_Iu; break;
534 case 1: mode = mode_Lu; break;
535 default: mode = mode_F; break;
537 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
538 tp = new_type_primitive(new_id_from_str(tp_name), mode);
539 /* set the specified alignment */
540 set_type_alignment_bytes(tp, names[kct].align);
542 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
544 set_entity_ld_ident(ent, get_entity_ident(ent));
545 set_entity_visibility(ent, visibility_local);
546 set_entity_variability(ent, variability_constant);
547 set_entity_allocation(ent, allocation_static);
549 /* we create a new entity here: It's initialization must resist on the
551 rem = current_ir_graph;
552 current_ir_graph = get_const_code_irg();
553 cnst = new_Const(mode, tv);
554 current_ir_graph = rem;
556 set_atomic_ent_value(ent, cnst);
558 /* cache the entry */
559 ent_cache[kct] = ent;
562 return ent_cache[kct];
567 * Prints the old node name on cg obst and returns a pointer to it.
569 const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
570 ia32_isa_t *isa = (ia32_isa_t *)cg->arch_env->isa;
572 lc_eoprintf(firm_get_arg_env(), isa->name_obst, "%+F", irn);
573 obstack_1grow(isa->name_obst, 0);
574 return obstack_finish(isa->name_obst);
579 * return true if the node is a Proj(Load) and could be used in source address
580 * mode for another node. Will return only true if the @p other node is not
581 * dependent on the memory of the Load (for binary operations use the other
582 * input here, for unary operations use NULL).
584 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
585 ir_node *other, ir_node *other2)
587 ir_mode *mode = get_irn_mode(node);
591 /* float constants are always available */
592 if (is_Const(node) && mode_is_float(mode)) {
593 if (ia32_cg_config.use_sse2) {
594 if (is_simple_sse_Const(node))
597 if (is_simple_x87_Const(node))
600 if (get_irn_n_edges(node) > 1)
607 load = get_Proj_pred(node);
608 pn = get_Proj_proj(node);
609 if(!is_Load(load) || pn != pn_Load_res)
611 if(get_nodes_block(load) != block)
613 /* we only use address mode if we're the only user of the load */
614 if(get_irn_n_edges(node) > 1)
616 /* in some edge cases with address mode we might reach the load normally
617 * and through some AM sequence, if it is already materialized then we
618 * can't create an AM node from it */
619 if(be_is_transformed(node))
622 /* don't do AM if other node inputs depend on the load (via mem-proj) */
623 if(other != NULL && get_nodes_block(other) == block
624 && heights_reachable_in_block(heights, other, load))
626 if(other2 != NULL && get_nodes_block(other2) == block
627 && heights_reachable_in_block(heights, other2, load))
633 typedef struct ia32_address_mode_t ia32_address_mode_t;
634 struct ia32_address_mode_t {
638 ia32_op_type_t op_type;
642 unsigned commutative : 1;
643 unsigned ins_permuted : 1;
646 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
648 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
650 /* construct load address */
651 memset(addr, 0, sizeof(addr[0]));
652 ia32_create_address_mode(addr, ptr, /*force=*/0);
654 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
655 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
656 addr->mem = be_transform_node(mem);
659 static void build_address(ia32_address_mode_t *am, ir_node *node)
661 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
662 ia32_address_t *addr = &am->addr;
668 if (is_Const(node)) {
669 ir_entity *entity = create_float_const_entity(node);
670 addr->base = noreg_gp;
671 addr->index = noreg_gp;
672 addr->mem = new_NoMem();
673 addr->symconst_ent = entity;
675 am->ls_mode = get_irn_mode(node);
676 am->pinned = op_pin_state_floats;
680 load = get_Proj_pred(node);
681 ptr = get_Load_ptr(load);
682 mem = get_Load_mem(load);
683 new_mem = be_transform_node(mem);
684 am->pinned = get_irn_pinned(load);
685 am->ls_mode = get_Load_mode(load);
686 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
688 /* construct load address */
689 ia32_create_address_mode(addr, ptr, /*force=*/0);
691 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
692 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
696 static void set_address(ir_node *node, const ia32_address_t *addr)
698 set_ia32_am_scale(node, addr->scale);
699 set_ia32_am_sc(node, addr->symconst_ent);
700 set_ia32_am_offs_int(node, addr->offset);
701 if(addr->symconst_sign)
702 set_ia32_am_sc_sign(node);
704 set_ia32_use_frame(node);
705 set_ia32_frame_ent(node, addr->frame_entity);
708 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
710 set_address(node, &am->addr);
712 set_ia32_op_type(node, am->op_type);
713 set_ia32_ls_mode(node, am->ls_mode);
714 if(am->pinned == op_pin_state_pinned && get_irn_pinned(node) != op_pin_state_pinned) {
715 set_irn_pinned(node, am->pinned);
718 set_ia32_commutative(node);
722 * Check, if a given node is a Down-Conv, ie. a integer Conv
723 * from a mode with a mode with more bits to a mode with lesser bits.
724 * Moreover, we return only true if the node has not more than 1 user.
726 * @param node the node
727 * @return non-zero if node is a Down-Conv
729 static int is_downconv(const ir_node *node)
737 /* we only want to skip the conv when we're the only user
738 * (not optimal but for now...)
740 if(get_irn_n_edges(node) > 1)
743 src_mode = get_irn_mode(get_Conv_op(node));
744 dest_mode = get_irn_mode(node);
745 return mode_needs_gp_reg(src_mode)
746 && mode_needs_gp_reg(dest_mode)
747 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
750 /* Skip all Down-Conv's on a given node and return the resulting node. */
751 ir_node *ia32_skip_downconv(ir_node *node) {
752 while (is_downconv(node))
753 node = get_Conv_op(node);
759 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
761 ir_mode *mode = get_irn_mode(node);
766 if(mode_is_signed(mode)) {
771 block = get_nodes_block(node);
772 dbgi = get_irn_dbg_info(node);
774 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
779 * matches operands of a node into ia32 addressing/operand modes. This covers
780 * usage of source address mode, immediates, operations with non 32-bit modes,
782 * The resulting data is filled into the @p am struct. block is the block
783 * of the node whose arguments are matched. op1, op2 are the first and second
784 * input that are matched (op1 may be NULL). other_op is another unrelated
785 * input that is not matched! but which is needed sometimes to check if AM
786 * for op1/op2 is legal.
787 * @p flags describes the supported modes of the operation in detail.
789 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
790 ir_node *op1, ir_node *op2, ir_node *other_op,
793 ia32_address_t *addr = &am->addr;
794 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
797 ir_mode *mode = get_irn_mode(op2);
799 unsigned commutative;
800 int use_am_and_immediates;
802 int mode_bits = get_mode_size_bits(mode);
804 memset(am, 0, sizeof(am[0]));
806 commutative = (flags & match_commutative) != 0;
807 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
808 use_am = (flags & match_am) != 0;
809 use_immediate = (flags & match_immediate) != 0;
810 assert(!use_am_and_immediates || use_immediate);
813 assert(!commutative || op1 != NULL);
814 assert(use_am || !(flags & match_8bit_am));
815 assert(use_am || !(flags & match_16bit_am));
817 if (mode_bits == 8) {
818 if (!(flags & match_8bit_am))
820 /* we don't automatically add upconvs yet */
821 assert((flags & match_mode_neutral) || (flags & match_8bit));
822 } else if (mode_bits == 16) {
823 if (!(flags & match_16bit_am))
825 /* we don't automatically add upconvs yet */
826 assert((flags & match_mode_neutral) || (flags & match_16bit));
829 /* we can simply skip downconvs for mode neutral nodes: the upper bits
830 * can be random for these operations */
831 if (flags & match_mode_neutral) {
832 op2 = ia32_skip_downconv(op2);
834 op1 = ia32_skip_downconv(op1);
838 /* match immediates. firm nodes are normalized: constants are always on the
841 if (!(flags & match_try_am) && use_immediate) {
842 new_op2 = try_create_Immediate(op2, 0);
845 if (new_op2 == NULL &&
846 use_am && ia32_use_source_address_mode(block, op2, op1, other_op)) {
847 build_address(am, op2);
848 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 if(mode_is_float(mode)) {
850 new_op2 = ia32_new_NoReg_vfp(env_cg);
854 am->op_type = ia32_AddrModeS;
855 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
857 ia32_use_source_address_mode(block, op1, op2, other_op)) {
859 build_address(am, op1);
861 if (mode_is_float(mode)) {
862 noreg = ia32_new_NoReg_vfp(env_cg);
867 if(new_op2 != NULL) {
870 new_op1 = be_transform_node(op2);
872 am->ins_permuted = 1;
874 am->op_type = ia32_AddrModeS;
876 if(flags & match_try_am) {
879 am->op_type = ia32_Normal;
883 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
885 new_op2 = be_transform_node(op2);
886 am->op_type = ia32_Normal;
887 am->ls_mode = get_irn_mode(op2);
888 if(flags & match_mode_neutral)
889 am->ls_mode = mode_Iu;
891 if(addr->base == NULL)
892 addr->base = noreg_gp;
893 if(addr->index == NULL)
894 addr->index = noreg_gp;
895 if(addr->mem == NULL)
896 addr->mem = new_NoMem();
898 am->new_op1 = new_op1;
899 am->new_op2 = new_op2;
900 am->commutative = commutative;
903 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
905 ir_graph *irg = current_ir_graph;
909 if(am->mem_proj == NULL)
912 /* we have to create a mode_T so the old MemProj can attach to us */
913 mode = get_irn_mode(node);
914 load = get_Proj_pred(am->mem_proj);
916 mark_irn_visited(load);
917 be_set_transformed_node(load, node);
920 set_irn_mode(node, mode_T);
921 return new_rd_Proj(NULL, irg, get_nodes_block(node), node, mode, pn_ia32_res);
928 * Construct a standard binary operation, set AM and immediate if required.
930 * @param op1 The first operand
931 * @param op2 The second operand
932 * @param func The node constructor function
933 * @return The constructed ia32 node.
935 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
936 construct_binop_func *func, match_flags_t flags)
938 ir_node *block = get_nodes_block(node);
939 ir_node *new_block = be_transform_node(block);
940 ir_graph *irg = current_ir_graph;
941 dbg_info *dbgi = get_irn_dbg_info(node);
943 ia32_address_mode_t am;
944 ia32_address_t *addr = &am.addr;
946 match_arguments(&am, block, op1, op2, NULL, flags);
948 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
949 am.new_op1, am.new_op2);
950 set_am_attributes(new_node, &am);
951 /* we can't use source address mode anymore when using immediates */
952 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
953 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
954 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
956 new_node = fix_mem_proj(new_node, &am);
963 n_ia32_l_binop_right,
964 n_ia32_l_binop_eflags
966 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
967 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
968 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
969 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
970 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
971 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
974 * Construct a binary operation which also consumes the eflags.
976 * @param node The node to transform
977 * @param func The node constructor function
978 * @param flags The match flags
979 * @return The constructor ia32 node
981 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
984 ir_node *src_block = get_nodes_block(node);
985 ir_node *block = be_transform_node(src_block);
986 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
987 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
988 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
989 ir_node *new_eflags = be_transform_node(eflags);
990 ir_graph *irg = current_ir_graph;
991 dbg_info *dbgi = get_irn_dbg_info(node);
993 ia32_address_mode_t am;
994 ia32_address_t *addr = &am.addr;
996 match_arguments(&am, src_block, op1, op2, NULL, flags);
998 new_node = func(dbgi, irg, block, addr->base, addr->index,
999 addr->mem, am.new_op1, am.new_op2, new_eflags);
1000 set_am_attributes(new_node, &am);
1001 /* we can't use source address mode anymore when using immediates */
1002 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1003 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1004 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1006 new_node = fix_mem_proj(new_node, &am);
1011 static ir_node *get_fpcw(void)
1014 if(initial_fpcw != NULL)
1015 return initial_fpcw;
1017 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
1018 &ia32_fp_cw_regs[REG_FPCW]);
1019 initial_fpcw = be_transform_node(fpcw);
1021 return initial_fpcw;
1025 * Construct a standard binary operation, set AM and immediate if required.
1027 * @param op1 The first operand
1028 * @param op2 The second operand
1029 * @param func The node constructor function
1030 * @return The constructed ia32 node.
1032 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1033 construct_binop_float_func *func,
1034 match_flags_t flags)
1036 ir_graph *irg = current_ir_graph;
1037 dbg_info *dbgi = get_irn_dbg_info(node);
1038 ir_node *block = get_nodes_block(node);
1039 ir_node *new_block = be_transform_node(block);
1040 ir_mode *mode = get_irn_mode(node);
1042 ia32_address_mode_t am;
1043 ia32_address_t *addr = &am.addr;
1045 /* cannot use addresmode with long double on x87 */
1046 if (get_mode_size_bits(mode) > 64)
1049 match_arguments(&am, block, op1, op2, NULL, flags);
1051 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
1052 am.new_op1, am.new_op2, get_fpcw());
1053 set_am_attributes(new_node, &am);
1055 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1057 new_node = fix_mem_proj(new_node, &am);
1063 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1065 * @param op1 The first operand
1066 * @param op2 The second operand
1067 * @param func The node constructor function
1068 * @return The constructed ia32 node.
1070 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1071 construct_shift_func *func,
1072 match_flags_t flags)
1074 dbg_info *dbgi = get_irn_dbg_info(node);
1075 ir_graph *irg = current_ir_graph;
1076 ir_node *block = get_nodes_block(node);
1077 ir_node *new_block = be_transform_node(block);
1082 assert(! mode_is_float(get_irn_mode(node)));
1083 assert(flags & match_immediate);
1084 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1086 if(flags & match_mode_neutral) {
1087 op1 = ia32_skip_downconv(op1);
1088 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1089 panic("right shifting of non-32bit values not supported, yet");
1091 new_op1 = be_transform_node(op1);
1093 /* the shift amount can be any mode that is bigger than 5 bits, since all
1094 * other bits are ignored anyway */
1095 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1096 op2 = get_Conv_op(op2);
1097 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1099 new_op2 = create_immediate_or_transform(op2, 0);
1101 new_node = func(dbgi, irg, new_block, new_op1, new_op2);
1102 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1104 /* lowered shift instruction may have a dependency operand, handle it here */
1105 if (get_irn_arity(node) == 3) {
1106 /* we have a dependency */
1107 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1108 add_irn_dep(new_node, new_dep);
1116 * Construct a standard unary operation, set AM and immediate if required.
1118 * @param op The operand
1119 * @param func The node constructor function
1120 * @return The constructed ia32 node.
1122 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1123 match_flags_t flags)
1125 ir_graph *irg = current_ir_graph;
1126 dbg_info *dbgi = get_irn_dbg_info(node);
1127 ir_node *block = get_nodes_block(node);
1128 ir_node *new_block = be_transform_node(block);
1132 assert(flags == 0 || flags == match_mode_neutral);
1133 if(flags & match_mode_neutral) {
1134 op = ia32_skip_downconv(op);
1137 new_op = be_transform_node(op);
1138 new_node = func(dbgi, irg, new_block, new_op);
1140 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1145 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1146 ia32_address_t *addr)
1148 ir_graph *irg = current_ir_graph;
1149 ir_node *base = addr->base;
1150 ir_node *index = addr->index;
1154 base = ia32_new_NoReg_gp(env_cg);
1156 base = be_transform_node(base);
1160 index = ia32_new_NoReg_gp(env_cg);
1162 index = be_transform_node(index);
1165 res = new_rd_ia32_Lea(dbgi, irg, block, base, index);
1166 set_address(res, addr);
1171 static int am_has_immediates(const ia32_address_t *addr)
1173 return addr->offset != 0 || addr->symconst_ent != NULL
1174 || addr->frame_entity || addr->use_frame;
1178 * Creates an ia32 Add.
1180 * @return the created ia32 Add node
1182 static ir_node *gen_Add(ir_node *node) {
1183 ir_graph *irg = current_ir_graph;
1184 dbg_info *dbgi = get_irn_dbg_info(node);
1185 ir_node *block = get_nodes_block(node);
1186 ir_node *new_block = be_transform_node(block);
1187 ir_node *op1 = get_Add_left(node);
1188 ir_node *op2 = get_Add_right(node);
1189 ir_mode *mode = get_irn_mode(node);
1191 ir_node *add_immediate_op;
1192 ia32_address_t addr;
1193 ia32_address_mode_t am;
1195 if (mode_is_float(mode)) {
1196 if (ia32_cg_config.use_sse2)
1197 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1198 match_commutative | match_am);
1200 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1201 match_commutative | match_am);
1204 ia32_mark_non_am(node);
1206 op2 = ia32_skip_downconv(op2);
1207 op1 = ia32_skip_downconv(op1);
1211 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1212 * 1. Add with immediate -> Lea
1213 * 2. Add with possible source address mode -> Add
1214 * 3. Otherwise -> Lea
1216 memset(&addr, 0, sizeof(addr));
1217 ia32_create_address_mode(&addr, node, /*force=*/1);
1218 add_immediate_op = NULL;
1220 if(addr.base == NULL && addr.index == NULL) {
1221 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1222 addr.symconst_sign, addr.offset);
1223 add_irn_dep(new_node, get_irg_frame(irg));
1224 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1227 /* add with immediate? */
1228 if(addr.index == NULL) {
1229 add_immediate_op = addr.base;
1230 } else if(addr.base == NULL && addr.scale == 0) {
1231 add_immediate_op = addr.index;
1234 if(add_immediate_op != NULL) {
1235 if(!am_has_immediates(&addr)) {
1236 #ifdef DEBUG_libfirm
1237 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1240 return be_transform_node(add_immediate_op);
1243 new_node = create_lea_from_address(dbgi, new_block, &addr);
1244 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1248 /* test if we can use source address mode */
1249 match_arguments(&am, block, op1, op2, NULL, match_commutative
1250 | match_mode_neutral | match_am | match_immediate | match_try_am);
1252 /* construct an Add with source address mode */
1253 if (am.op_type == ia32_AddrModeS) {
1254 ia32_address_t *am_addr = &am.addr;
1255 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1256 am_addr->index, am_addr->mem, am.new_op1,
1258 set_am_attributes(new_node, &am);
1259 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1261 new_node = fix_mem_proj(new_node, &am);
1266 /* otherwise construct a lea */
1267 new_node = create_lea_from_address(dbgi, new_block, &addr);
1268 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1273 * Creates an ia32 Mul.
1275 * @return the created ia32 Mul node
1277 static ir_node *gen_Mul(ir_node *node) {
1278 ir_node *op1 = get_Mul_left(node);
1279 ir_node *op2 = get_Mul_right(node);
1280 ir_mode *mode = get_irn_mode(node);
1282 if (mode_is_float(mode)) {
1283 if (ia32_cg_config.use_sse2)
1284 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1285 match_commutative | match_am);
1287 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1288 match_commutative | match_am);
1290 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1291 match_commutative | match_am | match_mode_neutral |
1292 match_immediate | match_am_and_immediates);
1296 * Creates an ia32 Mulh.
1297 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1298 * this result while Mul returns the lower 32 bit.
1300 * @return the created ia32 Mulh node
1302 static ir_node *gen_Mulh(ir_node *node)
1304 ir_node *block = get_nodes_block(node);
1305 ir_node *new_block = be_transform_node(block);
1306 ir_graph *irg = current_ir_graph;
1307 dbg_info *dbgi = get_irn_dbg_info(node);
1308 ir_mode *mode = get_irn_mode(node);
1309 ir_node *op1 = get_Mulh_left(node);
1310 ir_node *op2 = get_Mulh_right(node);
1311 ir_node *proj_res_high;
1313 ia32_address_mode_t am;
1314 ia32_address_t *addr = &am.addr;
1316 assert(!mode_is_float(mode) && "Mulh with float not supported");
1317 assert(get_mode_size_bits(mode) == 32);
1319 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1321 if (mode_is_signed(mode)) {
1322 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1323 addr->index, addr->mem, am.new_op1,
1326 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1327 addr->index, addr->mem, am.new_op1,
1331 set_am_attributes(new_node, &am);
1332 /* we can't use source address mode anymore when using immediates */
1333 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1334 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1335 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1337 assert(get_irn_mode(new_node) == mode_T);
1339 fix_mem_proj(new_node, &am);
1341 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1342 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1343 mode_Iu, pn_ia32_IMul1OP_res_high);
1345 return proj_res_high;
1351 * Creates an ia32 And.
1353 * @return The created ia32 And node
1355 static ir_node *gen_And(ir_node *node) {
1356 ir_node *op1 = get_And_left(node);
1357 ir_node *op2 = get_And_right(node);
1358 assert(! mode_is_float(get_irn_mode(node)));
1360 /* is it a zero extension? */
1361 if (is_Const(op2)) {
1362 tarval *tv = get_Const_tarval(op2);
1363 long v = get_tarval_long(tv);
1365 if (v == 0xFF || v == 0xFFFF) {
1366 dbg_info *dbgi = get_irn_dbg_info(node);
1367 ir_node *block = get_nodes_block(node);
1374 assert(v == 0xFFFF);
1377 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1383 return gen_binop(node, op1, op2, new_rd_ia32_And,
1384 match_commutative | match_mode_neutral | match_am
1391 * Creates an ia32 Or.
1393 * @return The created ia32 Or node
1395 static ir_node *gen_Or(ir_node *node) {
1396 ir_node *op1 = get_Or_left(node);
1397 ir_node *op2 = get_Or_right(node);
1399 assert (! mode_is_float(get_irn_mode(node)));
1400 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1401 | match_mode_neutral | match_am | match_immediate);
1407 * Creates an ia32 Eor.
1409 * @return The created ia32 Eor node
1411 static ir_node *gen_Eor(ir_node *node) {
1412 ir_node *op1 = get_Eor_left(node);
1413 ir_node *op2 = get_Eor_right(node);
1415 assert(! mode_is_float(get_irn_mode(node)));
1416 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1417 | match_mode_neutral | match_am | match_immediate);
1422 * Creates an ia32 Sub.
1424 * @return The created ia32 Sub node
1426 static ir_node *gen_Sub(ir_node *node) {
1427 ir_node *op1 = get_Sub_left(node);
1428 ir_node *op2 = get_Sub_right(node);
1429 ir_mode *mode = get_irn_mode(node);
1431 if (mode_is_float(mode)) {
1432 if (ia32_cg_config.use_sse2)
1433 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1435 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1440 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1444 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1445 | match_am | match_immediate);
1449 * Generates an ia32 DivMod with additional infrastructure for the
1450 * register allocator if needed.
1452 static ir_node *create_Div(ir_node *node)
1454 ir_graph *irg = current_ir_graph;
1455 dbg_info *dbgi = get_irn_dbg_info(node);
1456 ir_node *block = get_nodes_block(node);
1457 ir_node *new_block = be_transform_node(block);
1464 ir_node *sign_extension;
1465 ia32_address_mode_t am;
1466 ia32_address_t *addr = &am.addr;
1468 /* the upper bits have random contents for smaller modes */
1469 switch (get_irn_opcode(node)) {
1471 op1 = get_Div_left(node);
1472 op2 = get_Div_right(node);
1473 mem = get_Div_mem(node);
1474 mode = get_Div_resmode(node);
1477 op1 = get_Mod_left(node);
1478 op2 = get_Mod_right(node);
1479 mem = get_Mod_mem(node);
1480 mode = get_Mod_resmode(node);
1483 op1 = get_DivMod_left(node);
1484 op2 = get_DivMod_right(node);
1485 mem = get_DivMod_mem(node);
1486 mode = get_DivMod_resmode(node);
1489 panic("invalid divmod node %+F", node);
1492 match_arguments(&am, block, op1, op2, NULL, match_am);
1494 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1495 is the memory of the consumed address. We can have only the second op as address
1496 in Div nodes, so check only op2. */
1497 if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
1498 new_mem = be_transform_node(mem);
1499 if(!is_NoMem(addr->mem)) {
1503 new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
1506 new_mem = addr->mem;
1509 if (mode_is_signed(mode)) {
1510 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1511 add_irn_dep(produceval, get_irg_frame(irg));
1512 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1515 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1516 addr->index, new_mem, am.new_op2,
1517 am.new_op1, sign_extension);
1519 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1520 add_irn_dep(sign_extension, get_irg_frame(irg));
1522 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1523 addr->index, new_mem, am.new_op2,
1524 am.new_op1, sign_extension);
1527 set_irn_pinned(new_node, get_irn_pinned(node));
1529 set_am_attributes(new_node, &am);
1530 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1532 new_node = fix_mem_proj(new_node, &am);
1538 static ir_node *gen_Mod(ir_node *node) {
1539 return create_Div(node);
1542 static ir_node *gen_Div(ir_node *node) {
1543 return create_Div(node);
1546 static ir_node *gen_DivMod(ir_node *node) {
1547 return create_Div(node);
1553 * Creates an ia32 floating Div.
1555 * @return The created ia32 xDiv node
1557 static ir_node *gen_Quot(ir_node *node)
1559 ir_node *op1 = get_Quot_left(node);
1560 ir_node *op2 = get_Quot_right(node);
1562 if (ia32_cg_config.use_sse2) {
1563 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1565 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1571 * Creates an ia32 Shl.
1573 * @return The created ia32 Shl node
1575 static ir_node *gen_Shl(ir_node *node) {
1576 ir_node *left = get_Shl_left(node);
1577 ir_node *right = get_Shl_right(node);
1579 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1580 match_mode_neutral | match_immediate);
1584 * Creates an ia32 Shr.
1586 * @return The created ia32 Shr node
1588 static ir_node *gen_Shr(ir_node *node) {
1589 ir_node *left = get_Shr_left(node);
1590 ir_node *right = get_Shr_right(node);
1592 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1598 * Creates an ia32 Sar.
1600 * @return The created ia32 Shrs node
1602 static ir_node *gen_Shrs(ir_node *node) {
1603 ir_node *left = get_Shrs_left(node);
1604 ir_node *right = get_Shrs_right(node);
1605 ir_mode *mode = get_irn_mode(node);
1607 if(is_Const(right) && mode == mode_Is) {
1608 tarval *tv = get_Const_tarval(right);
1609 long val = get_tarval_long(tv);
1611 /* this is a sign extension */
1612 ir_graph *irg = current_ir_graph;
1613 dbg_info *dbgi = get_irn_dbg_info(node);
1614 ir_node *block = be_transform_node(get_nodes_block(node));
1616 ir_node *new_op = be_transform_node(op);
1617 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1618 add_irn_dep(pval, get_irg_frame(irg));
1620 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1624 /* 8 or 16 bit sign extension? */
1625 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1626 ir_node *shl_left = get_Shl_left(left);
1627 ir_node *shl_right = get_Shl_right(left);
1628 if(is_Const(shl_right)) {
1629 tarval *tv1 = get_Const_tarval(right);
1630 tarval *tv2 = get_Const_tarval(shl_right);
1631 if(tv1 == tv2 && tarval_is_long(tv1)) {
1632 long val = get_tarval_long(tv1);
1633 if(val == 16 || val == 24) {
1634 dbg_info *dbgi = get_irn_dbg_info(node);
1635 ir_node *block = get_nodes_block(node);
1645 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1654 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1660 * Creates an ia32 RotL.
1662 * @param op1 The first operator
1663 * @param op2 The second operator
1664 * @return The created ia32 RotL node
1666 static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
1667 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1673 * Creates an ia32 RotR.
1674 * NOTE: There is no RotR with immediate because this would always be a RotL
1675 * "imm-mode_size_bits" which can be pre-calculated.
1677 * @param op1 The first operator
1678 * @param op2 The second operator
1679 * @return The created ia32 RotR node
1681 static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
1682 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1688 * Creates an ia32 RotR or RotL (depending on the found pattern).
1690 * @return The created ia32 RotL or RotR node
1692 static ir_node *gen_Rot(ir_node *node) {
1693 ir_node *rotate = NULL;
1694 ir_node *op1 = get_Rot_left(node);
1695 ir_node *op2 = get_Rot_right(node);
1697 /* Firm has only Rot (which is a RotL), so we are looking for a right (op2)
1698 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1699 that means we can create a RotR instead of an Add and a RotL */
1701 if (get_irn_op(op2) == op_Add) {
1703 ir_node *left = get_Add_left(add);
1704 ir_node *right = get_Add_right(add);
1705 if (is_Const(right)) {
1706 tarval *tv = get_Const_tarval(right);
1707 ir_mode *mode = get_irn_mode(node);
1708 long bits = get_mode_size_bits(mode);
1710 if (get_irn_op(left) == op_Minus &&
1711 tarval_is_long(tv) &&
1712 get_tarval_long(tv) == bits &&
1715 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1716 rotate = gen_RotR(node, op1, get_Minus_op(left));
1721 if (rotate == NULL) {
1722 rotate = gen_RotL(node, op1, op2);
1731 * Transforms a Minus node.
1733 * @return The created ia32 Minus node
1735 static ir_node *gen_Minus(ir_node *node)
1737 ir_node *op = get_Minus_op(node);
1738 ir_node *block = be_transform_node(get_nodes_block(node));
1739 ir_graph *irg = current_ir_graph;
1740 dbg_info *dbgi = get_irn_dbg_info(node);
1741 ir_mode *mode = get_irn_mode(node);
1746 if (mode_is_float(mode)) {
1747 ir_node *new_op = be_transform_node(op);
1748 if (ia32_cg_config.use_sse2) {
1749 /* TODO: non-optimal... if we have many xXors, then we should
1750 * rather create a load for the const and use that instead of
1751 * several AM nodes... */
1752 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1753 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1754 ir_node *nomem = new_rd_NoMem(irg);
1756 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1757 nomem, new_op, noreg_xmm);
1759 size = get_mode_size_bits(mode);
1760 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1762 set_ia32_am_sc(new_node, ent);
1763 set_ia32_op_type(new_node, ia32_AddrModeS);
1764 set_ia32_ls_mode(new_node, mode);
1766 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1769 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1772 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1778 * Transforms a Not node.
1780 * @return The created ia32 Not node
1782 static ir_node *gen_Not(ir_node *node) {
1783 ir_node *op = get_Not_op(node);
1785 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1786 assert (! mode_is_float(get_irn_mode(node)));
1788 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1794 * Transforms an Abs node.
1796 * @return The created ia32 Abs node
1798 static ir_node *gen_Abs(ir_node *node)
1800 ir_node *block = get_nodes_block(node);
1801 ir_node *new_block = be_transform_node(block);
1802 ir_node *op = get_Abs_op(node);
1803 ir_graph *irg = current_ir_graph;
1804 dbg_info *dbgi = get_irn_dbg_info(node);
1805 ir_mode *mode = get_irn_mode(node);
1806 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1807 ir_node *nomem = new_NoMem();
1813 if (mode_is_float(mode)) {
1814 new_op = be_transform_node(op);
1816 if (ia32_cg_config.use_sse2) {
1817 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1818 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1819 nomem, new_op, noreg_fp);
1821 size = get_mode_size_bits(mode);
1822 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1824 set_ia32_am_sc(new_node, ent);
1826 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1828 set_ia32_op_type(new_node, ia32_AddrModeS);
1829 set_ia32_ls_mode(new_node, mode);
1831 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1832 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1835 ir_node *xor, *pval, *sign_extension;
1837 if (get_mode_size_bits(mode) == 32) {
1838 new_op = be_transform_node(op);
1840 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1843 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1844 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1847 add_irn_dep(pval, get_irg_frame(irg));
1848 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1850 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1851 nomem, new_op, sign_extension);
1852 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1854 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1855 nomem, xor, sign_extension);
1856 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1862 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1864 ir_graph *irg = current_ir_graph;
1872 /* we have a Cmp as input */
1874 ir_node *pred = get_Proj_pred(node);
1876 flags = be_transform_node(pred);
1877 *pnc_out = get_Proj_proj(node);
1882 /* a mode_b value, we have to compare it against 0 */
1883 dbgi = get_irn_dbg_info(node);
1884 new_block = be_transform_node(get_nodes_block(node));
1885 new_op = be_transform_node(node);
1886 noreg = ia32_new_NoReg_gp(env_cg);
1887 nomem = new_NoMem();
1888 flags = new_rd_ia32_Test(dbgi, irg, new_block, noreg, noreg, nomem,
1889 new_op, new_op, 0, 0);
1890 *pnc_out = pn_Cmp_Lg;
1895 * Transforms a Load.
1897 * @return the created ia32 Load node
1899 static ir_node *gen_Load(ir_node *node) {
1900 ir_node *old_block = get_nodes_block(node);
1901 ir_node *block = be_transform_node(old_block);
1902 ir_node *ptr = get_Load_ptr(node);
1903 ir_node *mem = get_Load_mem(node);
1904 ir_node *new_mem = be_transform_node(mem);
1907 ir_graph *irg = current_ir_graph;
1908 dbg_info *dbgi = get_irn_dbg_info(node);
1909 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1910 ir_mode *mode = get_Load_mode(node);
1913 ia32_address_t addr;
1915 /* construct load address */
1916 memset(&addr, 0, sizeof(addr));
1917 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1924 base = be_transform_node(base);
1930 index = be_transform_node(index);
1933 if (mode_is_float(mode)) {
1934 if (ia32_cg_config.use_sse2) {
1935 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1937 res_mode = mode_xmm;
1939 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1941 res_mode = mode_vfp;
1944 assert(mode != mode_b);
1946 /* create a conv node with address mode for smaller modes */
1947 if(get_mode_size_bits(mode) < 32) {
1948 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1949 new_mem, noreg, mode);
1951 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1956 set_irn_pinned(new_node, get_irn_pinned(node));
1957 set_ia32_op_type(new_node, ia32_AddrModeS);
1958 set_ia32_ls_mode(new_node, mode);
1959 set_address(new_node, &addr);
1961 if(get_irn_pinned(node) == op_pin_state_floats) {
1962 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1965 /* make sure we are scheduled behind the initial IncSP/Barrier
1966 * to avoid spills being placed before it
1968 if (block == get_irg_start_block(irg)) {
1969 add_irn_dep(new_node, get_irg_frame(irg));
1972 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1977 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1978 ir_node *ptr, ir_node *other)
1985 /* we only use address mode if we're the only user of the load */
1986 if(get_irn_n_edges(node) > 1)
1989 load = get_Proj_pred(node);
1992 if(get_nodes_block(load) != block)
1995 /* Store should be attached to the load */
1996 if(!is_Proj(mem) || get_Proj_pred(mem) != load)
1998 /* store should have the same pointer as the load */
1999 if(get_Load_ptr(load) != ptr)
2002 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2003 if(other != NULL && get_nodes_block(other) == block
2004 && heights_reachable_in_block(heights, other, load))
2010 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2011 ir_node *mem, ir_node *ptr, ir_mode *mode,
2012 construct_binop_dest_func *func,
2013 construct_binop_dest_func *func8bit,
2014 match_flags_t flags)
2016 ir_node *src_block = get_nodes_block(node);
2018 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
2019 ir_graph *irg = current_ir_graph;
2024 ia32_address_mode_t am;
2025 ia32_address_t *addr = &am.addr;
2026 memset(&am, 0, sizeof(am));
2028 assert(flags & match_dest_am);
2029 assert(flags & match_immediate); /* there is no destam node without... */
2030 commutative = (flags & match_commutative) != 0;
2032 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
2033 build_address(&am, op1);
2034 new_op = create_immediate_or_transform(op2, 0);
2035 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2036 build_address(&am, op2);
2037 new_op = create_immediate_or_transform(op1, 0);
2042 if(addr->base == NULL)
2043 addr->base = noreg_gp;
2044 if(addr->index == NULL)
2045 addr->index = noreg_gp;
2046 if(addr->mem == NULL)
2047 addr->mem = new_NoMem();
2049 dbgi = get_irn_dbg_info(node);
2050 block = be_transform_node(src_block);
2051 if(get_mode_size_bits(mode) == 8) {
2052 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2055 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
2058 set_address(new_node, addr);
2059 set_ia32_op_type(new_node, ia32_AddrModeD);
2060 set_ia32_ls_mode(new_node, mode);
2061 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2066 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2067 ir_node *ptr, ir_mode *mode,
2068 construct_unop_dest_func *func)
2070 ir_graph *irg = current_ir_graph;
2071 ir_node *src_block = get_nodes_block(node);
2075 ia32_address_mode_t am;
2076 ia32_address_t *addr = &am.addr;
2077 memset(&am, 0, sizeof(am));
2079 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2082 build_address(&am, op);
2084 dbgi = get_irn_dbg_info(node);
2085 block = be_transform_node(src_block);
2086 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
2087 set_address(new_node, addr);
2088 set_ia32_op_type(new_node, ia32_AddrModeD);
2089 set_ia32_ls_mode(new_node, mode);
2090 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2095 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2096 ir_mode *mode = get_irn_mode(node);
2097 ir_node *psi_true = get_Psi_val(node, 0);
2098 ir_node *psi_default = get_Psi_default(node);
2109 ia32_address_t addr;
2111 if(get_mode_size_bits(mode) != 8)
2114 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2116 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2122 build_address_ptr(&addr, ptr, mem);
2124 irg = current_ir_graph;
2125 dbgi = get_irn_dbg_info(node);
2126 block = get_nodes_block(node);
2127 new_block = be_transform_node(block);
2128 cond = get_Psi_cond(node, 0);
2129 flags = get_flags_node(cond, &pnc);
2130 new_mem = be_transform_node(mem);
2131 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2132 addr.index, addr.mem, flags, pnc, negated);
2133 set_address(new_node, &addr);
2134 set_ia32_op_type(new_node, ia32_AddrModeD);
2135 set_ia32_ls_mode(new_node, mode);
2136 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2141 static ir_node *try_create_dest_am(ir_node *node) {
2142 ir_node *val = get_Store_value(node);
2143 ir_node *mem = get_Store_mem(node);
2144 ir_node *ptr = get_Store_ptr(node);
2145 ir_mode *mode = get_irn_mode(val);
2146 unsigned bits = get_mode_size_bits(mode);
2151 /* handle only GP modes for now... */
2152 if(!mode_needs_gp_reg(mode))
2156 /* store must be the only user of the val node */
2157 if(get_irn_n_edges(val) > 1)
2159 /* skip pointless convs */
2161 ir_node *conv_op = get_Conv_op(val);
2162 ir_mode *pred_mode = get_irn_mode(conv_op);
2163 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2171 /* value must be in the same block */
2172 if(get_nodes_block(node) != get_nodes_block(val))
2175 switch(get_irn_opcode(val)) {
2177 op1 = get_Add_left(val);
2178 op2 = get_Add_right(val);
2179 if(is_Const_1(op2)) {
2180 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2181 new_rd_ia32_IncMem);
2183 } else if(is_Const_Minus_1(op2)) {
2184 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2185 new_rd_ia32_DecMem);
2188 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2189 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2190 match_dest_am | match_commutative |
2194 op1 = get_Sub_left(val);
2195 op2 = get_Sub_right(val);
2197 ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
2200 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2201 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2202 match_dest_am | match_immediate |
2206 op1 = get_And_left(val);
2207 op2 = get_And_right(val);
2208 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2209 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2210 match_dest_am | match_commutative |
2214 op1 = get_Or_left(val);
2215 op2 = get_Or_right(val);
2216 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2217 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2218 match_dest_am | match_commutative |
2222 op1 = get_Eor_left(val);
2223 op2 = get_Eor_right(val);
2224 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2225 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2226 match_dest_am | match_commutative |
2230 op1 = get_Shl_left(val);
2231 op2 = get_Shl_right(val);
2232 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2233 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2234 match_dest_am | match_immediate);
2237 op1 = get_Shr_left(val);
2238 op2 = get_Shr_right(val);
2239 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2240 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2241 match_dest_am | match_immediate);
2244 op1 = get_Shrs_left(val);
2245 op2 = get_Shrs_right(val);
2246 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2247 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2248 match_dest_am | match_immediate);
2251 op1 = get_Rot_left(val);
2252 op2 = get_Rot_right(val);
2253 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2254 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2255 match_dest_am | match_immediate);
2257 /* TODO: match ROR patterns... */
2259 new_node = try_create_SetMem(val, ptr, mem);
2262 op1 = get_Minus_op(val);
2263 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2266 /* should be lowered already */
2267 assert(mode != mode_b);
2268 op1 = get_Not_op(val);
2269 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2275 if(new_node != NULL) {
2276 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2277 get_irn_pinned(node) == op_pin_state_pinned) {
2278 set_irn_pinned(new_node, op_pin_state_pinned);
2285 static int is_float_to_int32_conv(const ir_node *node)
2287 ir_mode *mode = get_irn_mode(node);
2291 if(get_mode_size_bits(mode) != 32 || !mode_needs_gp_reg(mode))
2296 conv_op = get_Conv_op(node);
2297 conv_mode = get_irn_mode(conv_op);
2299 if(!mode_is_float(conv_mode))
2306 * Transforms a Store.
2308 * @return the created ia32 Store node
2310 static ir_node *gen_Store(ir_node *node)
2312 ir_node *block = get_nodes_block(node);
2313 ir_node *new_block = be_transform_node(block);
2314 ir_node *ptr = get_Store_ptr(node);
2315 ir_node *val = get_Store_value(node);
2316 ir_node *mem = get_Store_mem(node);
2317 ir_graph *irg = current_ir_graph;
2318 dbg_info *dbgi = get_irn_dbg_info(node);
2319 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2320 ir_mode *mode = get_irn_mode(val);
2323 ia32_address_t addr;
2325 /* check for destination address mode */
2326 new_node = try_create_dest_am(node);
2327 if(new_node != NULL)
2330 /* construct store address */
2331 memset(&addr, 0, sizeof(addr));
2332 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2334 if(addr.base == NULL) {
2337 addr.base = be_transform_node(addr.base);
2340 if(addr.index == NULL) {
2343 addr.index = be_transform_node(addr.index);
2345 addr.mem = be_transform_node(mem);
2347 if (mode_is_float(mode)) {
2348 /* convs (and strict-convs) before stores are unnecessary if the mode
2350 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2351 val = get_Conv_op(val);
2353 new_val = be_transform_node(val);
2354 if (ia32_cg_config.use_sse2) {
2355 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2356 addr.index, addr.mem, new_val);
2358 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2359 addr.index, addr.mem, new_val, mode);
2361 } else if(is_float_to_int32_conv(val)) {
2362 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2363 val = get_Conv_op(val);
2365 /* convs (and strict-convs) before stores are unnecessary if the mode
2367 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2368 val = get_Conv_op(val);
2370 new_val = be_transform_node(val);
2372 new_node = new_rd_ia32_vfist(dbgi, irg, new_block, addr.base,
2373 addr.index, addr.mem, new_val, trunc_mode);
2375 new_val = create_immediate_or_transform(val, 0);
2376 assert(mode != mode_b);
2378 if (get_mode_size_bits(mode) == 8) {
2379 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2380 addr.index, addr.mem, new_val);
2382 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2383 addr.index, addr.mem, new_val);
2387 set_irn_pinned(new_node, get_irn_pinned(node));
2388 set_ia32_op_type(new_node, ia32_AddrModeD);
2389 set_ia32_ls_mode(new_node, mode);
2391 set_address(new_node, &addr);
2392 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2397 static ir_node *create_Switch(ir_node *node)
2399 ir_graph *irg = current_ir_graph;
2400 dbg_info *dbgi = get_irn_dbg_info(node);
2401 ir_node *block = be_transform_node(get_nodes_block(node));
2402 ir_node *sel = get_Cond_selector(node);
2403 ir_node *new_sel = be_transform_node(sel);
2404 int switch_min = INT_MAX;
2405 int switch_max = INT_MIN;
2406 long default_pn = get_Cond_defaultProj(node);
2408 const ir_edge_t *edge;
2410 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2412 /* determine the smallest switch case value */
2413 foreach_out_edge(node, edge) {
2414 ir_node *proj = get_edge_src_irn(edge);
2415 long pn = get_Proj_proj(proj);
2416 if(pn == default_pn)
2425 if((unsigned) (switch_max - switch_min) > 256000) {
2426 panic("Size of switch %+F bigger than 256000", node);
2429 if (switch_min != 0) {
2430 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2432 /* if smallest switch case is not 0 we need an additional sub */
2433 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2434 add_ia32_am_offs_int(new_sel, -switch_min);
2435 set_ia32_op_type(new_sel, ia32_AddrModeS);
2437 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2440 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2441 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2447 * Transform a Cond node.
2449 static ir_node *gen_Cond(ir_node *node) {
2450 ir_node *block = get_nodes_block(node);
2451 ir_node *new_block = be_transform_node(block);
2452 ir_graph *irg = current_ir_graph;
2453 dbg_info *dbgi = get_irn_dbg_info(node);
2454 ir_node *sel = get_Cond_selector(node);
2455 ir_mode *sel_mode = get_irn_mode(sel);
2456 ir_node *flags = NULL;
2460 if (sel_mode != mode_b) {
2461 return create_Switch(node);
2464 /* we get flags from a cmp */
2465 flags = get_flags_node(sel, &pnc);
2467 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2468 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2476 * Transforms a CopyB node.
2478 * @return The transformed node.
2480 static ir_node *gen_CopyB(ir_node *node) {
2481 ir_node *block = be_transform_node(get_nodes_block(node));
2482 ir_node *src = get_CopyB_src(node);
2483 ir_node *new_src = be_transform_node(src);
2484 ir_node *dst = get_CopyB_dst(node);
2485 ir_node *new_dst = be_transform_node(dst);
2486 ir_node *mem = get_CopyB_mem(node);
2487 ir_node *new_mem = be_transform_node(mem);
2488 ir_node *res = NULL;
2489 ir_graph *irg = current_ir_graph;
2490 dbg_info *dbgi = get_irn_dbg_info(node);
2491 int size = get_type_size_bytes(get_CopyB_type(node));
2494 /* If we have to copy more than 32 bytes, we use REP MOVSx and */
2495 /* then we need the size explicitly in ECX. */
2496 if (size >= 32 * 4) {
2497 rem = size & 0x3; /* size % 4 */
2500 res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
2501 add_irn_dep(res, get_irg_frame(irg));
2503 res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
2506 ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
2509 res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
2512 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
2517 static ir_node *gen_be_Copy(ir_node *node)
2519 ir_node *new_node = be_duplicate_node(node);
2520 ir_mode *mode = get_irn_mode(new_node);
2522 if (mode_needs_gp_reg(mode)) {
2523 set_irn_mode(new_node, mode_Iu);
2529 static ir_node *create_Fucom(ir_node *node)
2531 ir_graph *irg = current_ir_graph;
2532 dbg_info *dbgi = get_irn_dbg_info(node);
2533 ir_node *block = get_nodes_block(node);
2534 ir_node *new_block = be_transform_node(block);
2535 ir_node *left = get_Cmp_left(node);
2536 ir_node *new_left = be_transform_node(left);
2537 ir_node *right = get_Cmp_right(node);
2541 if(ia32_cg_config.use_fucomi) {
2542 new_right = be_transform_node(right);
2543 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2545 set_ia32_commutative(new_node);
2546 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2548 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2549 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2552 new_right = be_transform_node(right);
2553 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2557 set_ia32_commutative(new_node);
2559 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2561 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2562 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2568 static ir_node *create_Ucomi(ir_node *node)
2570 ir_graph *irg = current_ir_graph;
2571 dbg_info *dbgi = get_irn_dbg_info(node);
2572 ir_node *src_block = get_nodes_block(node);
2573 ir_node *new_block = be_transform_node(src_block);
2574 ir_node *left = get_Cmp_left(node);
2575 ir_node *right = get_Cmp_right(node);
2577 ia32_address_mode_t am;
2578 ia32_address_t *addr = &am.addr;
2580 match_arguments(&am, src_block, left, right, NULL,
2581 match_commutative | match_am);
2583 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2584 addr->mem, am.new_op1, am.new_op2,
2586 set_am_attributes(new_node, &am);
2588 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2590 new_node = fix_mem_proj(new_node, &am);
2596 * helper function: checks wether all Cmp projs are Lg or Eq which is needed
2597 * to fold an and into a test node
2599 static int can_fold_test_and(ir_node *node)
2601 const ir_edge_t *edge;
2603 /** we can only have eq and lg projs */
2604 foreach_out_edge(node, edge) {
2605 ir_node *proj = get_edge_src_irn(edge);
2606 pn_Cmp pnc = get_Proj_proj(proj);
2607 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2614 static ir_node *gen_Cmp(ir_node *node)
2616 ir_graph *irg = current_ir_graph;
2617 dbg_info *dbgi = get_irn_dbg_info(node);
2618 ir_node *block = get_nodes_block(node);
2619 ir_node *new_block = be_transform_node(block);
2620 ir_node *left = get_Cmp_left(node);
2621 ir_node *right = get_Cmp_right(node);
2622 ir_mode *cmp_mode = get_irn_mode(left);
2624 ia32_address_mode_t am;
2625 ia32_address_t *addr = &am.addr;
2628 if(mode_is_float(cmp_mode)) {
2629 if (ia32_cg_config.use_sse2) {
2630 return create_Ucomi(node);
2632 return create_Fucom(node);
2636 assert(mode_needs_gp_reg(cmp_mode));
2638 /* we prefer the Test instruction where possible except cases where
2639 * we can use SourceAM */
2640 cmp_unsigned = !mode_is_signed(cmp_mode);
2641 if (is_Const_0(right)) {
2643 get_irn_n_edges(left) == 1 &&
2644 can_fold_test_and(node)) {
2645 /* Test(and_left, and_right) */
2646 ir_node *and_left = get_And_left(left);
2647 ir_node *and_right = get_And_right(left);
2648 ir_mode *mode = get_irn_mode(and_left);
2650 match_arguments(&am, block, and_left, and_right, NULL,
2652 match_am | match_8bit_am | match_16bit_am |
2653 match_am_and_immediates | match_immediate |
2654 match_8bit | match_16bit);
2655 if (get_mode_size_bits(mode) == 8) {
2656 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2657 addr->index, addr->mem, am.new_op1,
2658 am.new_op2, am.ins_permuted,
2661 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2662 addr->index, addr->mem, am.new_op1,
2663 am.new_op2, am.ins_permuted, cmp_unsigned);
2666 match_arguments(&am, block, NULL, left, NULL,
2667 match_am | match_8bit_am | match_16bit_am |
2668 match_8bit | match_16bit);
2669 if (am.op_type == ia32_AddrModeS) {
2671 ir_node *imm_zero = try_create_Immediate(right, 0);
2672 if (get_mode_size_bits(cmp_mode) == 8) {
2673 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2674 addr->index, addr->mem, am.new_op2,
2675 imm_zero, am.ins_permuted,
2678 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2679 addr->index, addr->mem, am.new_op2,
2680 imm_zero, am.ins_permuted, cmp_unsigned);
2683 /* Test(left, left) */
2684 if (get_mode_size_bits(cmp_mode) == 8) {
2685 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2686 addr->index, addr->mem, am.new_op2,
2687 am.new_op2, am.ins_permuted,
2690 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2691 addr->index, addr->mem, am.new_op2,
2692 am.new_op2, am.ins_permuted,
2698 /* Cmp(left, right) */
2699 match_arguments(&am, block, left, right, NULL,
2700 match_commutative | match_am | match_8bit_am |
2701 match_16bit_am | match_am_and_immediates |
2702 match_immediate | match_8bit | match_16bit);
2703 if (get_mode_size_bits(cmp_mode) == 8) {
2704 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2705 addr->index, addr->mem, am.new_op1,
2706 am.new_op2, am.ins_permuted,
2709 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2710 addr->index, addr->mem, am.new_op1,
2711 am.new_op2, am.ins_permuted, cmp_unsigned);
2714 set_am_attributes(new_node, &am);
2715 assert(cmp_mode != NULL);
2716 set_ia32_ls_mode(new_node, cmp_mode);
2718 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2720 new_node = fix_mem_proj(new_node, &am);
2725 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2728 ir_graph *irg = current_ir_graph;
2729 dbg_info *dbgi = get_irn_dbg_info(node);
2730 ir_node *block = get_nodes_block(node);
2731 ir_node *new_block = be_transform_node(block);
2732 ir_node *val_true = get_Psi_val(node, 0);
2733 ir_node *val_false = get_Psi_default(node);
2735 match_flags_t match_flags;
2736 ia32_address_mode_t am;
2737 ia32_address_t *addr;
2739 assert(ia32_cg_config.use_cmov);
2740 assert(mode_needs_gp_reg(get_irn_mode(val_true)));
2744 match_flags = match_commutative | match_am | match_16bit_am |
2747 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2749 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2750 addr->mem, am.new_op1, am.new_op2, new_flags,
2751 am.ins_permuted, pnc);
2752 set_am_attributes(new_node, &am);
2754 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2756 new_node = fix_mem_proj(new_node, &am);
2763 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2764 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2767 ir_graph *irg = current_ir_graph;
2768 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2769 ir_node *nomem = new_NoMem();
2770 ir_mode *mode = get_irn_mode(orig_node);
2773 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2774 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2776 /* we might need to conv the result up */
2777 if(get_mode_size_bits(mode) > 8) {
2778 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2779 nomem, new_node, mode_Bu);
2780 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2787 * Transforms a Psi node into CMov.
2789 * @return The transformed node.
2791 static ir_node *gen_Psi(ir_node *node)
2793 dbg_info *dbgi = get_irn_dbg_info(node);
2794 ir_node *block = get_nodes_block(node);
2795 ir_node *new_block = be_transform_node(block);
2796 ir_node *psi_true = get_Psi_val(node, 0);
2797 ir_node *psi_default = get_Psi_default(node);
2798 ir_node *cond = get_Psi_cond(node, 0);
2799 ir_node *flags = NULL;
2803 assert(get_Psi_n_conds(node) == 1);
2804 assert(get_irn_mode(cond) == mode_b);
2805 assert(mode_needs_gp_reg(get_irn_mode(node)));
2807 flags = get_flags_node(cond, &pnc);
2809 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2810 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 0);
2811 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2812 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 1);
2814 new_node = create_CMov(node, cond, flags, pnc);
2821 * Create a conversion from x87 state register to general purpose.
2823 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
2824 ir_node *block = be_transform_node(get_nodes_block(node));
2825 ir_node *op = get_Conv_op(node);
2826 ir_node *new_op = be_transform_node(op);
2827 ia32_code_gen_t *cg = env_cg;
2828 ir_graph *irg = current_ir_graph;
2829 dbg_info *dbgi = get_irn_dbg_info(node);
2830 ir_node *noreg = ia32_new_NoReg_gp(cg);
2831 ir_node *trunc_mode = ia32_new_Fpu_truncate(cg);
2832 ir_mode *mode = get_irn_mode(node);
2833 ir_node *fist, *load;
2836 fist = new_rd_ia32_vfist(dbgi, irg, block, get_irg_frame(irg), noreg,
2837 new_NoMem(), new_op, trunc_mode);
2839 set_irn_pinned(fist, op_pin_state_floats);
2840 set_ia32_use_frame(fist);
2841 set_ia32_op_type(fist, ia32_AddrModeD);
2843 assert(get_mode_size_bits(mode) <= 32);
2844 /* exception we can only store signed 32 bit integers, so for unsigned
2845 we store a 64bit (signed) integer and load the lower bits */
2846 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
2847 set_ia32_ls_mode(fist, mode_Ls);
2849 set_ia32_ls_mode(fist, mode_Is);
2851 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
2854 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, fist);
2856 set_irn_pinned(load, op_pin_state_floats);
2857 set_ia32_use_frame(load);
2858 set_ia32_op_type(load, ia32_AddrModeS);
2859 set_ia32_ls_mode(load, mode_Is);
2860 if(get_ia32_ls_mode(fist) == mode_Ls) {
2861 ia32_attr_t *attr = get_ia32_attr(load);
2862 attr->data.need_64bit_stackent = 1;
2864 ia32_attr_t *attr = get_ia32_attr(load);
2865 attr->data.need_32bit_stackent = 1;
2867 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
2869 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
2873 * Creates a x87 strict Conv by placing a Sore and a Load
2875 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
2877 ir_node *block = get_nodes_block(node);
2878 ir_graph *irg = current_ir_graph;
2879 dbg_info *dbgi = get_irn_dbg_info(node);
2880 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2881 ir_node *nomem = new_NoMem();
2882 ir_node *frame = get_irg_frame(irg);
2883 ir_node *store, *load;
2886 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
2888 set_ia32_use_frame(store);
2889 set_ia32_op_type(store, ia32_AddrModeD);
2890 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2892 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
2894 set_ia32_use_frame(load);
2895 set_ia32_op_type(load, ia32_AddrModeS);
2896 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
2898 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
2903 * Create a conversion from general purpose to x87 register
2905 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
2906 ir_node *src_block = get_nodes_block(node);
2907 ir_node *block = be_transform_node(src_block);
2908 ir_graph *irg = current_ir_graph;
2909 dbg_info *dbgi = get_irn_dbg_info(node);
2910 ir_node *op = get_Conv_op(node);
2911 ir_node *new_op = NULL;
2915 ir_mode *store_mode;
2921 /* fild can use source AM if the operand is a signed 32bit integer */
2922 if (src_mode == mode_Is) {
2923 ia32_address_mode_t am;
2925 match_arguments(&am, src_block, NULL, op, NULL,
2926 match_am | match_try_am);
2927 if (am.op_type == ia32_AddrModeS) {
2928 ia32_address_t *addr = &am.addr;
2930 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
2931 addr->index, addr->mem);
2932 new_node = new_r_Proj(irg, block, fild, mode_vfp,
2935 set_am_attributes(fild, &am);
2936 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
2938 fix_mem_proj(fild, &am);
2943 if(new_op == NULL) {
2944 new_op = be_transform_node(op);
2947 noreg = ia32_new_NoReg_gp(env_cg);
2948 nomem = new_NoMem();
2949 mode = get_irn_mode(op);
2951 /* first convert to 32 bit signed if necessary */
2952 src_bits = get_mode_size_bits(src_mode);
2953 if (src_bits == 8) {
2954 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
2956 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2958 } else if (src_bits < 32) {
2959 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
2961 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
2965 assert(get_mode_size_bits(mode) == 32);
2968 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
2971 set_ia32_use_frame(store);
2972 set_ia32_op_type(store, ia32_AddrModeD);
2973 set_ia32_ls_mode(store, mode_Iu);
2975 /* exception for 32bit unsigned, do a 64bit spill+load */
2976 if(!mode_is_signed(mode)) {
2979 ir_node *zero_const = create_Immediate(NULL, 0, 0);
2981 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
2982 get_irg_frame(irg), noreg, nomem,
2985 set_ia32_use_frame(zero_store);
2986 set_ia32_op_type(zero_store, ia32_AddrModeD);
2987 add_ia32_am_offs_int(zero_store, 4);
2988 set_ia32_ls_mode(zero_store, mode_Iu);
2993 store = new_rd_Sync(dbgi, irg, block, 2, in);
2994 store_mode = mode_Ls;
2996 store_mode = mode_Is;
3000 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3002 set_ia32_use_frame(fild);
3003 set_ia32_op_type(fild, ia32_AddrModeS);
3004 set_ia32_ls_mode(fild, store_mode);
3006 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3012 * Create a conversion from one integer mode into another one
3014 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3015 dbg_info *dbgi, ir_node *block, ir_node *op,
3018 ir_graph *irg = current_ir_graph;
3019 int src_bits = get_mode_size_bits(src_mode);
3020 int tgt_bits = get_mode_size_bits(tgt_mode);
3021 ir_node *new_block = be_transform_node(block);
3023 ir_mode *smaller_mode;
3025 ia32_address_mode_t am;
3026 ia32_address_t *addr = &am.addr;
3029 if (src_bits < tgt_bits) {
3030 smaller_mode = src_mode;
3031 smaller_bits = src_bits;
3033 smaller_mode = tgt_mode;
3034 smaller_bits = tgt_bits;
3037 #ifdef DEBUG_libfirm
3039 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3044 match_arguments(&am, block, NULL, op, NULL,
3045 match_8bit | match_16bit |
3046 match_am | match_8bit_am | match_16bit_am);
3047 if (smaller_bits == 8) {
3048 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3049 addr->index, addr->mem, am.new_op2,
3052 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3053 addr->index, addr->mem, am.new_op2,
3056 set_am_attributes(new_node, &am);
3057 /* match_arguments assume that out-mode = in-mode, this isn't true here
3059 set_ia32_ls_mode(new_node, smaller_mode);
3060 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3061 new_node = fix_mem_proj(new_node, &am);
3066 * Transforms a Conv node.
3068 * @return The created ia32 Conv node
3070 static ir_node *gen_Conv(ir_node *node) {
3071 ir_node *block = get_nodes_block(node);
3072 ir_node *new_block = be_transform_node(block);
3073 ir_node *op = get_Conv_op(node);
3074 ir_node *new_op = NULL;
3075 ir_graph *irg = current_ir_graph;
3076 dbg_info *dbgi = get_irn_dbg_info(node);
3077 ir_mode *src_mode = get_irn_mode(op);
3078 ir_mode *tgt_mode = get_irn_mode(node);
3079 int src_bits = get_mode_size_bits(src_mode);
3080 int tgt_bits = get_mode_size_bits(tgt_mode);
3081 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3082 ir_node *nomem = new_rd_NoMem(irg);
3083 ir_node *res = NULL;
3085 if (src_mode == mode_b) {
3086 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3087 /* nothing to do, we already model bools as 0/1 ints */
3088 return be_transform_node(op);
3091 if (src_mode == tgt_mode) {
3092 if (get_Conv_strict(node)) {
3093 if (ia32_cg_config.use_sse2) {
3094 /* when we are in SSE mode, we can kill all strict no-op conversion */
3095 return be_transform_node(op);
3098 /* this should be optimized already, but who knows... */
3099 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3100 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3101 return be_transform_node(op);
3105 if (mode_is_float(src_mode)) {
3106 new_op = be_transform_node(op);
3107 /* we convert from float ... */
3108 if (mode_is_float(tgt_mode)) {
3109 if(src_mode == mode_E && tgt_mode == mode_D
3110 && !get_Conv_strict(node)) {
3111 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3116 if (ia32_cg_config.use_sse2) {
3117 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3118 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3120 set_ia32_ls_mode(res, tgt_mode);
3122 if(get_Conv_strict(node)) {
3123 res = gen_x87_strict_conv(tgt_mode, new_op);
3124 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3127 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3132 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3133 if (ia32_cg_config.use_sse2) {
3134 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3136 set_ia32_ls_mode(res, src_mode);
3138 return gen_x87_fp_to_gp(node);
3142 /* we convert from int ... */
3143 if (mode_is_float(tgt_mode)) {
3145 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3146 if (ia32_cg_config.use_sse2) {
3147 new_op = be_transform_node(op);
3148 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3150 set_ia32_ls_mode(res, tgt_mode);
3152 res = gen_x87_gp_to_fp(node, src_mode);
3153 if(get_Conv_strict(node)) {
3154 res = gen_x87_strict_conv(tgt_mode, res);
3155 SET_IA32_ORIG_NODE(get_Proj_pred(res),
3156 ia32_get_old_node_name(env_cg, node));
3160 } else if(tgt_mode == mode_b) {
3161 /* mode_b lowering already took care that we only have 0/1 values */
3162 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3163 src_mode, tgt_mode));
3164 return be_transform_node(op);
3167 if (src_bits == tgt_bits) {
3168 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3169 src_mode, tgt_mode));
3170 return be_transform_node(op);
3173 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3181 static int check_immediate_constraint(long val, char immediate_constraint_type)
3183 switch (immediate_constraint_type) {
3187 return val >= 0 && val <= 32;
3189 return val >= 0 && val <= 63;
3191 return val >= -128 && val <= 127;
3193 return val == 0xff || val == 0xffff;
3195 return val >= 0 && val <= 3;
3197 return val >= 0 && val <= 255;
3199 return val >= 0 && val <= 127;
3203 panic("Invalid immediate constraint found");
3207 static ir_node *try_create_Immediate(ir_node *node,
3208 char immediate_constraint_type)
3211 tarval *offset = NULL;
3212 int offset_sign = 0;
3214 ir_entity *symconst_ent = NULL;
3215 int symconst_sign = 0;
3217 ir_node *cnst = NULL;
3218 ir_node *symconst = NULL;
3221 mode = get_irn_mode(node);
3222 if(!mode_is_int(mode) && !mode_is_reference(mode)) {
3226 if(is_Minus(node)) {
3228 node = get_Minus_op(node);
3231 if(is_Const(node)) {
3234 offset_sign = minus;
3235 } else if(is_SymConst(node)) {
3238 symconst_sign = minus;
3239 } else if(is_Add(node)) {
3240 ir_node *left = get_Add_left(node);
3241 ir_node *right = get_Add_right(node);
3242 if(is_Const(left) && is_SymConst(right)) {
3245 symconst_sign = minus;
3246 offset_sign = minus;
3247 } else if(is_SymConst(left) && is_Const(right)) {
3250 symconst_sign = minus;
3251 offset_sign = minus;
3253 } else if(is_Sub(node)) {
3254 ir_node *left = get_Sub_left(node);
3255 ir_node *right = get_Sub_right(node);
3256 if(is_Const(left) && is_SymConst(right)) {
3259 symconst_sign = !minus;
3260 offset_sign = minus;
3261 } else if(is_SymConst(left) && is_Const(right)) {
3264 symconst_sign = minus;
3265 offset_sign = !minus;
3272 offset = get_Const_tarval(cnst);
3273 if(tarval_is_long(offset)) {
3274 val = get_tarval_long(offset);
3276 ir_fprintf(stderr, "Optimisation Warning: tarval from %+F is not a "
3281 if(!check_immediate_constraint(val, immediate_constraint_type))
3284 if(symconst != NULL) {
3285 if(immediate_constraint_type != 0) {
3286 /* we need full 32bits for symconsts */
3290 /* unfortunately the assembler/linker doesn't support -symconst */
3294 if(get_SymConst_kind(symconst) != symconst_addr_ent)
3296 symconst_ent = get_SymConst_entity(symconst);
3298 if(cnst == NULL && symconst == NULL)
3301 if(offset_sign && offset != NULL) {
3302 offset = tarval_neg(offset);
3305 new_node = create_Immediate(symconst_ent, symconst_sign, val);
3310 static ir_node *create_immediate_or_transform(ir_node *node,
3311 char immediate_constraint_type)
3313 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3314 if (new_node == NULL) {
3315 new_node = be_transform_node(node);
3320 static const arch_register_req_t no_register_req = {
3321 arch_register_req_type_none,
3322 NULL, /* regclass */
3323 NULL, /* limit bitset */
3325 0 /* different pos */
3329 * An assembler constraint.
3331 typedef struct constraint_t constraint_t;
3332 struct constraint_t {
3335 const arch_register_req_t **out_reqs;
3337 const arch_register_req_t *req;
3338 unsigned immediate_possible;
3339 char immediate_type;
3342 static void parse_asm_constraint(int pos, constraint_t *constraint, const char *c)
3344 int immediate_possible = 0;
3345 char immediate_type = 0;
3346 unsigned limited = 0;
3347 const arch_register_class_t *cls = NULL;
3348 ir_graph *irg = current_ir_graph;
3349 struct obstack *obst = get_irg_obstack(irg);
3350 arch_register_req_t *req;
3351 unsigned *limited_ptr = NULL;
3355 /* TODO: replace all the asserts with nice error messages */
3358 /* a memory constraint: no need to do anything in backend about it
3359 * (the dependencies are already respected by the memory edge of
3361 constraint->req = &no_register_req;
3373 assert(cls == NULL ||
3374 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3375 cls = &ia32_reg_classes[CLASS_ia32_gp];
3376 limited |= 1 << REG_EAX;
3379 assert(cls == NULL ||
3380 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3381 cls = &ia32_reg_classes[CLASS_ia32_gp];
3382 limited |= 1 << REG_EBX;
3385 assert(cls == NULL ||
3386 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3387 cls = &ia32_reg_classes[CLASS_ia32_gp];
3388 limited |= 1 << REG_ECX;
3391 assert(cls == NULL ||
3392 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3393 cls = &ia32_reg_classes[CLASS_ia32_gp];
3394 limited |= 1 << REG_EDX;
3397 assert(cls == NULL ||
3398 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3399 cls = &ia32_reg_classes[CLASS_ia32_gp];
3400 limited |= 1 << REG_EDI;
3403 assert(cls == NULL ||
3404 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3405 cls = &ia32_reg_classes[CLASS_ia32_gp];
3406 limited |= 1 << REG_ESI;
3409 case 'q': /* q means lower part of the regs only, this makes no
3410 * difference to Q for us (we only assigne whole registers) */
3411 assert(cls == NULL ||
3412 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3413 cls = &ia32_reg_classes[CLASS_ia32_gp];
3414 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3418 assert(cls == NULL ||
3419 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3420 cls = &ia32_reg_classes[CLASS_ia32_gp];
3421 limited |= 1 << REG_EAX | 1 << REG_EDX;
3424 assert(cls == NULL ||
3425 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3426 cls = &ia32_reg_classes[CLASS_ia32_gp];
3427 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3428 1 << REG_EDX | 1 << REG_ESI | 1 << REG_EDI |
3435 assert(cls == NULL);
3436 cls = &ia32_reg_classes[CLASS_ia32_gp];
3442 /* TODO: mark values so the x87 simulator knows about t and u */
3443 assert(cls == NULL);
3444 cls = &ia32_reg_classes[CLASS_ia32_vfp];
3449 assert(cls == NULL);
3450 /* TODO: check that sse2 is supported */
3451 cls = &ia32_reg_classes[CLASS_ia32_xmm];
3461 assert(!immediate_possible);
3462 immediate_possible = 1;
3463 immediate_type = *c;
3467 assert(!immediate_possible);
3468 immediate_possible = 1;
3472 assert(!immediate_possible && cls == NULL);
3473 immediate_possible = 1;
3474 cls = &ia32_reg_classes[CLASS_ia32_gp];
3487 assert(constraint->is_in && "can only specify same constraint "
3490 sscanf(c, "%d%n", &same_as, &p);
3498 /* memory constraint no need to do anything in backend about it
3499 * (the dependencies are already respected by the memory edge of
3501 constraint->req = &no_register_req;
3504 case 'E': /* no float consts yet */
3505 case 'F': /* no float consts yet */
3506 case 's': /* makes no sense on x86 */
3507 case 'X': /* we can't support that in firm */
3510 case '<': /* no autodecrement on x86 */
3511 case '>': /* no autoincrement on x86 */
3512 case 'C': /* sse constant not supported yet */
3513 case 'G': /* 80387 constant not supported yet */
3514 case 'y': /* we don't support mmx registers yet */
3515 case 'Z': /* not available in 32 bit mode */
3516 case 'e': /* not available in 32 bit mode */
3517 panic("unsupported asm constraint '%c' found in (%+F)",
3518 *c, current_ir_graph);
3521 panic("unknown asm constraint '%c' found in (%+F)", *c,
3529 const arch_register_req_t *other_constr;
3531 assert(cls == NULL && "same as and register constraint not supported");
3532 assert(!immediate_possible && "same as and immediate constraint not "
3534 assert(same_as < constraint->n_outs && "wrong constraint number in "
3535 "same_as constraint");
3537 other_constr = constraint->out_reqs[same_as];
3539 req = obstack_alloc(obst, sizeof(req[0]));
3540 req->cls = other_constr->cls;
3541 req->type = arch_register_req_type_should_be_same;
3542 req->limited = NULL;
3543 req->other_same = 1U << pos;
3544 req->other_different = 0;
3546 /* switch constraints. This is because in firm we have same_as
3547 * constraints on the output constraints while in the gcc asm syntax
3548 * they are specified on the input constraints */
3549 constraint->req = other_constr;
3550 constraint->out_reqs[same_as] = req;
3551 constraint->immediate_possible = 0;
3555 if(immediate_possible && cls == NULL) {
3556 cls = &ia32_reg_classes[CLASS_ia32_gp];
3558 assert(!immediate_possible || cls == &ia32_reg_classes[CLASS_ia32_gp]);
3559 assert(cls != NULL);
3561 if(immediate_possible) {
3562 assert(constraint->is_in
3563 && "immediate make no sense for output constraints");
3565 /* todo: check types (no float input on 'r' constrained in and such... */
3568 req = obstack_alloc(obst, sizeof(req[0]) + sizeof(unsigned));
3569 limited_ptr = (unsigned*) (req+1);
3571 req = obstack_alloc(obst, sizeof(req[0]));
3573 memset(req, 0, sizeof(req[0]));
3576 req->type = arch_register_req_type_limited;
3577 *limited_ptr = limited;
3578 req->limited = limited_ptr;
3580 req->type = arch_register_req_type_normal;
3584 constraint->req = req;
3585 constraint->immediate_possible = immediate_possible;
3586 constraint->immediate_type = immediate_type;
3589 static void parse_clobber(ir_node *node, int pos, constraint_t *constraint,
3590 const char *clobber)
3592 ir_graph *irg = get_irn_irg(node);
3593 struct obstack *obst = get_irg_obstack(irg);
3594 const arch_register_t *reg = NULL;
3597 arch_register_req_t *req;
3598 const arch_register_class_t *cls;
3603 /* TODO: construct a hashmap instead of doing linear search for clobber
3605 for(c = 0; c < N_CLASSES; ++c) {
3606 cls = & ia32_reg_classes[c];
3607 for(r = 0; r < cls->n_regs; ++r) {
3608 const arch_register_t *temp_reg = arch_register_for_index(cls, r);
3609 if(strcmp(temp_reg->name, clobber) == 0
3610 || (c == CLASS_ia32_gp && strcmp(temp_reg->name+1, clobber) == 0)) {
3619 panic("Register '%s' mentioned in asm clobber is unknown\n", clobber);
3623 assert(reg->index < 32);
3625 limited = obstack_alloc(obst, sizeof(limited[0]));
3626 *limited = 1 << reg->index;
3628 req = obstack_alloc(obst, sizeof(req[0]));
3629 memset(req, 0, sizeof(req[0]));
3630 req->type = arch_register_req_type_limited;
3632 req->limited = limited;
3634 constraint->req = req;
3635 constraint->immediate_possible = 0;
3636 constraint->immediate_type = 0;
3639 static int is_memory_op(const ir_asm_constraint *constraint)
3641 ident *id = constraint->constraint;
3642 const char *str = get_id_str(id);
3645 for(c = str; *c != '\0'; ++c) {
3654 * generates code for a ASM node
3656 static ir_node *gen_ASM(ir_node *node)
3659 ir_graph *irg = current_ir_graph;
3660 ir_node *block = get_nodes_block(node);
3661 ir_node *new_block = be_transform_node(block);
3662 dbg_info *dbgi = get_irn_dbg_info(node);
3666 int n_out_constraints;
3668 const arch_register_req_t **out_reg_reqs;
3669 const arch_register_req_t **in_reg_reqs;
3670 ia32_asm_reg_t *register_map;
3671 unsigned reg_map_size = 0;
3672 struct obstack *obst;
3673 const ir_asm_constraint *in_constraints;
3674 const ir_asm_constraint *out_constraints;
3676 constraint_t parsed_constraint;
3678 arity = get_irn_arity(node);
3679 in = alloca(arity * sizeof(in[0]));
3680 memset(in, 0, arity * sizeof(in[0]));
3682 n_out_constraints = get_ASM_n_output_constraints(node);
3683 n_clobbers = get_ASM_n_clobbers(node);
3684 out_arity = n_out_constraints + n_clobbers;
3685 /* hack to keep space for mem proj */
3689 in_constraints = get_ASM_input_constraints(node);
3690 out_constraints = get_ASM_output_constraints(node);
3691 clobbers = get_ASM_clobbers(node);
3693 /* construct output constraints */
3694 obst = get_irg_obstack(irg);
3695 out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
3696 parsed_constraint.out_reqs = out_reg_reqs;
3697 parsed_constraint.n_outs = n_out_constraints;
3698 parsed_constraint.is_in = 0;
3700 for(i = 0; i < out_arity; ++i) {
3703 if(i < n_out_constraints) {
3704 const ir_asm_constraint *constraint = &out_constraints[i];
3705 c = get_id_str(constraint->constraint);
3706 parse_asm_constraint(i, &parsed_constraint, c);
3708 if(constraint->pos > reg_map_size)
3709 reg_map_size = constraint->pos;
3711 out_reg_reqs[i] = parsed_constraint.req;
3712 } else if(i < out_arity - 1) {
3713 ident *glob_id = clobbers [i - n_out_constraints];
3714 assert(glob_id != NULL);
3715 c = get_id_str(glob_id);
3716 parse_clobber(node, i, &parsed_constraint, c);
3718 out_reg_reqs[i+1] = parsed_constraint.req;
3722 out_reg_reqs[n_out_constraints] = &no_register_req;
3724 /* construct input constraints */
3725 in_reg_reqs = obstack_alloc(obst, arity * sizeof(in_reg_reqs[0]));
3726 parsed_constraint.is_in = 1;
3727 for(i = 0; i < arity; ++i) {
3728 const ir_asm_constraint *constraint = &in_constraints[i];
3729 ident *constr_id = constraint->constraint;
3730 const char *c = get_id_str(constr_id);
3732 parse_asm_constraint(i, &parsed_constraint, c);
3733 in_reg_reqs[i] = parsed_constraint.req;
3735 if(constraint->pos > reg_map_size)
3736 reg_map_size = constraint->pos;
3738 if(parsed_constraint.immediate_possible) {
3739 ir_node *pred = get_irn_n(node, i);
3740 char imm_type = parsed_constraint.immediate_type;
3741 ir_node *immediate = try_create_Immediate(pred, imm_type);
3743 if(immediate != NULL) {
3750 register_map = NEW_ARR_D(ia32_asm_reg_t, obst, reg_map_size);
3751 memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
3753 for(i = 0; i < n_out_constraints; ++i) {
3754 const ir_asm_constraint *constraint = &out_constraints[i];
3755 unsigned pos = constraint->pos;
3757 assert(pos < reg_map_size);
3758 register_map[pos].use_input = 0;
3759 register_map[pos].valid = 1;
3760 register_map[pos].memory = is_memory_op(constraint);
3761 register_map[pos].inout_pos = i;
3762 register_map[pos].mode = constraint->mode;
3765 /* transform inputs */
3766 for(i = 0; i < arity; ++i) {
3767 const ir_asm_constraint *constraint = &in_constraints[i];
3768 unsigned pos = constraint->pos;
3769 ir_node *pred = get_irn_n(node, i);
3770 ir_node *transformed;
3772 assert(pos < reg_map_size);
3773 register_map[pos].use_input = 1;
3774 register_map[pos].valid = 1;
3775 register_map[pos].memory = is_memory_op(constraint);
3776 register_map[pos].inout_pos = i;
3777 register_map[pos].mode = constraint->mode;
3782 transformed = be_transform_node(pred);
3783 in[i] = transformed;
3786 new_node = new_rd_ia32_Asm(dbgi, irg, new_block, arity, in, out_arity,
3787 get_ASM_text(node), register_map);
3789 set_ia32_out_req_all(new_node, out_reg_reqs);
3790 set_ia32_in_req_all(new_node, in_reg_reqs);
3792 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3798 * Transforms a FrameAddr into an ia32 Add.
3800 static ir_node *gen_be_FrameAddr(ir_node *node) {
3801 ir_node *block = be_transform_node(get_nodes_block(node));
3802 ir_node *op = be_get_FrameAddr_frame(node);
3803 ir_node *new_op = be_transform_node(op);
3804 ir_graph *irg = current_ir_graph;
3805 dbg_info *dbgi = get_irn_dbg_info(node);
3806 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3809 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3810 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3811 set_ia32_use_frame(new_node);
3813 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3819 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3821 static ir_node *gen_be_Return(ir_node *node) {
3822 ir_graph *irg = current_ir_graph;
3823 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3824 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3825 ir_entity *ent = get_irg_entity(irg);
3826 ir_type *tp = get_entity_type(ent);
3831 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3832 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3835 int pn_ret_val, pn_ret_mem, arity, i;
3837 assert(ret_val != NULL);
3838 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3839 return be_duplicate_node(node);
3842 res_type = get_method_res_type(tp, 0);
3844 if (! is_Primitive_type(res_type)) {
3845 return be_duplicate_node(node);
3848 mode = get_type_mode(res_type);
3849 if (! mode_is_float(mode)) {
3850 return be_duplicate_node(node);
3853 assert(get_method_n_ress(tp) == 1);
3855 pn_ret_val = get_Proj_proj(ret_val);
3856 pn_ret_mem = get_Proj_proj(ret_mem);
3858 /* get the Barrier */
3859 barrier = get_Proj_pred(ret_val);
3861 /* get result input of the Barrier */
3862 ret_val = get_irn_n(barrier, pn_ret_val);
3863 new_ret_val = be_transform_node(ret_val);
3865 /* get memory input of the Barrier */
3866 ret_mem = get_irn_n(barrier, pn_ret_mem);
3867 new_ret_mem = be_transform_node(ret_mem);
3869 frame = get_irg_frame(irg);
3871 dbgi = get_irn_dbg_info(barrier);
3872 block = be_transform_node(get_nodes_block(barrier));
3874 noreg = ia32_new_NoReg_gp(env_cg);
3876 /* store xmm0 onto stack */
3877 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3878 new_ret_mem, new_ret_val);
3879 set_ia32_ls_mode(sse_store, mode);
3880 set_ia32_op_type(sse_store, ia32_AddrModeD);
3881 set_ia32_use_frame(sse_store);
3883 /* load into x87 register */
3884 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3885 set_ia32_op_type(fld, ia32_AddrModeS);
3886 set_ia32_use_frame(fld);
3888 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3889 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3891 /* create a new barrier */
3892 arity = get_irn_arity(barrier);
3893 in = alloca(arity * sizeof(in[0]));
3894 for (i = 0; i < arity; ++i) {
3897 if (i == pn_ret_val) {
3899 } else if (i == pn_ret_mem) {
3902 ir_node *in = get_irn_n(barrier, i);
3903 new_in = be_transform_node(in);
3908 new_barrier = new_ir_node(dbgi, irg, block,
3909 get_irn_op(barrier), get_irn_mode(barrier),
3911 copy_node_attr(barrier, new_barrier);
3912 be_duplicate_deps(barrier, new_barrier);
3913 be_set_transformed_node(barrier, new_barrier);
3914 mark_irn_visited(barrier);
3916 /* transform normally */
3917 return be_duplicate_node(node);
3921 * Transform a be_AddSP into an ia32_SubSP.
3923 static ir_node *gen_be_AddSP(ir_node *node)
3925 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3926 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3928 return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
3932 * Transform a be_SubSP into an ia32_AddSP
3934 static ir_node *gen_be_SubSP(ir_node *node)
3936 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3937 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3939 return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
3943 * This function just sets the register for the Unknown node
3944 * as this is not done during register allocation because Unknown
3945 * is an "ignore" node.
3947 static ir_node *gen_Unknown(ir_node *node) {
3948 ir_mode *mode = get_irn_mode(node);
3950 if (mode_is_float(mode)) {
3951 if (ia32_cg_config.use_sse2) {
3952 return ia32_new_Unknown_xmm(env_cg);
3954 /* Unknown nodes are buggy in x87 simulator, use zero for now... */
3955 ir_graph *irg = current_ir_graph;
3956 dbg_info *dbgi = get_irn_dbg_info(node);
3957 ir_node *block = get_irg_start_block(irg);
3958 ir_node *ret = new_rd_ia32_vfldz(dbgi, irg, block);
3960 /* Const Nodes before the initial IncSP are a bad idea, because
3961 * they could be spilled and we have no SP ready at that point yet.
3962 * So add a dependency to the initial frame pointer calculation to
3963 * avoid that situation.
3965 add_irn_dep(ret, get_irg_frame(irg));
3968 } else if (mode_needs_gp_reg(mode)) {
3969 return ia32_new_Unknown_gp(env_cg);
3971 panic("unsupported Unknown-Mode");
3977 * Change some phi modes
3979 static ir_node *gen_Phi(ir_node *node) {
3980 ir_node *block = be_transform_node(get_nodes_block(node));
3981 ir_graph *irg = current_ir_graph;
3982 dbg_info *dbgi = get_irn_dbg_info(node);
3983 ir_mode *mode = get_irn_mode(node);
3986 if(mode_needs_gp_reg(mode)) {
3987 /* we shouldn't have any 64bit stuff around anymore */
3988 assert(get_mode_size_bits(mode) <= 32);
3989 /* all integer operations are on 32bit registers now */
3991 } else if(mode_is_float(mode)) {
3992 if (ia32_cg_config.use_sse2) {
3999 /* phi nodes allow loops, so we use the old arguments for now
4000 * and fix this later */
4001 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4002 get_irn_in(node) + 1);
4003 copy_node_attr(node, phi);
4004 be_duplicate_deps(node, phi);
4006 be_set_transformed_node(node, phi);
4007 be_enqueue_preds(node);
4015 static ir_node *gen_IJmp(ir_node *node)
4017 ir_node *block = get_nodes_block(node);
4018 ir_node *new_block = be_transform_node(block);
4019 ir_graph *irg = current_ir_graph;
4020 dbg_info *dbgi = get_irn_dbg_info(node);
4021 ir_node *op = get_IJmp_target(node);
4023 ia32_address_mode_t am;
4024 ia32_address_t *addr = &am.addr;
4026 assert(get_irn_mode(op) == mode_P);
4028 match_arguments(&am, block, NULL, op, NULL,
4029 match_am | match_8bit_am | match_16bit_am |
4030 match_immediate | match_8bit | match_16bit);
4032 new_node = new_rd_ia32_IJmp(dbgi, irg, new_block, addr->base, addr->index,
4033 addr->mem, am.new_op2);
4034 set_am_attributes(new_node, &am);
4035 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4037 new_node = fix_mem_proj(new_node, &am);
4042 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4045 typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4046 ir_node *val, ir_node *mem);
4049 * Transforms a lowered Load into a "real" one.
4051 static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
4053 ir_node *block = be_transform_node(get_nodes_block(node));
4054 ir_node *ptr = get_irn_n(node, 0);
4055 ir_node *new_ptr = be_transform_node(ptr);
4056 ir_node *mem = get_irn_n(node, 1);
4057 ir_node *new_mem = be_transform_node(mem);
4058 ir_graph *irg = current_ir_graph;
4059 dbg_info *dbgi = get_irn_dbg_info(node);
4060 ir_mode *mode = get_ia32_ls_mode(node);
4061 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4064 new_op = func(dbgi, irg, block, new_ptr, noreg, new_mem);
4066 set_ia32_op_type(new_op, ia32_AddrModeS);
4067 set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
4068 set_ia32_am_scale(new_op, get_ia32_am_scale(node));
4069 set_ia32_am_sc(new_op, get_ia32_am_sc(node));
4070 if (is_ia32_am_sc_sign(node))
4071 set_ia32_am_sc_sign(new_op);
4072 set_ia32_ls_mode(new_op, mode);
4073 if (is_ia32_use_frame(node)) {
4074 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4075 set_ia32_use_frame(new_op);
4078 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4084 * Transforms a lowered Store into a "real" one.
4086 static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
4088 ir_node *block = be_transform_node(get_nodes_block(node));
4089 ir_node *ptr = get_irn_n(node, 0);
4090 ir_node *new_ptr = be_transform_node(ptr);
4091 ir_node *val = get_irn_n(node, 1);
4092 ir_node *new_val = be_transform_node(val);
4093 ir_node *mem = get_irn_n(node, 2);
4094 ir_node *new_mem = be_transform_node(mem);
4095 ir_graph *irg = current_ir_graph;
4096 dbg_info *dbgi = get_irn_dbg_info(node);
4097 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4098 ir_mode *mode = get_ia32_ls_mode(node);
4102 new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
4104 am_offs = get_ia32_am_offs_int(node);
4105 add_ia32_am_offs_int(new_op, am_offs);
4107 set_ia32_op_type(new_op, ia32_AddrModeD);
4108 set_ia32_ls_mode(new_op, mode);
4109 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4110 set_ia32_use_frame(new_op);
4112 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4117 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4119 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4120 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4122 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
4123 match_immediate | match_mode_neutral);
4126 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4128 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4129 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4130 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
4134 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4136 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4137 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4138 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
4142 static ir_node *gen_ia32_l_Add(ir_node *node) {
4143 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4144 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4145 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
4146 match_commutative | match_am | match_immediate |
4147 match_mode_neutral);
4149 if(is_Proj(lowered)) {
4150 lowered = get_Proj_pred(lowered);
4152 assert(is_ia32_Add(lowered));
4153 set_irn_mode(lowered, mode_T);
4159 static ir_node *gen_ia32_l_Adc(ir_node *node)
4161 return gen_binop_flags(node, new_rd_ia32_Adc,
4162 match_commutative | match_am | match_immediate |
4163 match_mode_neutral);
4167 * Transforms an ia32_l_vfild into a "real" ia32_vfild node
4169 * @param node The node to transform
4170 * @return the created ia32 vfild node
4172 static ir_node *gen_ia32_l_vfild(ir_node *node) {
4173 return gen_lowered_Load(node, new_rd_ia32_vfild);
4177 * Transforms an ia32_l_Load into a "real" ia32_Load node
4179 * @param node The node to transform
4180 * @return the created ia32 Load node
4182 static ir_node *gen_ia32_l_Load(ir_node *node) {
4183 return gen_lowered_Load(node, new_rd_ia32_Load);
4187 * Transforms an ia32_l_Store into a "real" ia32_Store node
4189 * @param node The node to transform
4190 * @return the created ia32 Store node
4192 static ir_node *gen_ia32_l_Store(ir_node *node) {
4193 return gen_lowered_Store(node, new_rd_ia32_Store);
4197 * Transforms a l_vfist into a "real" vfist node.
4199 * @param node The node to transform
4200 * @return the created ia32 vfist node
4202 static ir_node *gen_ia32_l_vfist(ir_node *node) {
4203 ir_node *block = be_transform_node(get_nodes_block(node));
4204 ir_node *ptr = get_irn_n(node, 0);
4205 ir_node *new_ptr = be_transform_node(ptr);
4206 ir_node *val = get_irn_n(node, 1);
4207 ir_node *new_val = be_transform_node(val);
4208 ir_node *mem = get_irn_n(node, 2);
4209 ir_node *new_mem = be_transform_node(mem);
4210 ir_graph *irg = current_ir_graph;
4211 dbg_info *dbgi = get_irn_dbg_info(node);
4212 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4213 ir_mode *mode = get_ia32_ls_mode(node);
4214 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4218 new_op = new_rd_ia32_vfist(dbgi, irg, block, new_ptr, noreg, new_mem,
4219 new_val, trunc_mode);
4221 am_offs = get_ia32_am_offs_int(node);
4222 add_ia32_am_offs_int(new_op, am_offs);
4224 set_ia32_op_type(new_op, ia32_AddrModeD);
4225 set_ia32_ls_mode(new_op, mode);
4226 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4227 set_ia32_use_frame(new_op);
4229 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4235 * Transforms a l_MulS into a "real" MulS node.
4237 * @return the created ia32 Mul node
4239 static ir_node *gen_ia32_l_Mul(ir_node *node) {
4240 ir_node *left = get_binop_left(node);
4241 ir_node *right = get_binop_right(node);
4243 return gen_binop(node, left, right, new_rd_ia32_Mul,
4244 match_commutative | match_am | match_mode_neutral);
4248 * Transforms a l_IMulS into a "real" IMul1OPS node.
4250 * @return the created ia32 IMul1OP node
4252 static ir_node *gen_ia32_l_IMul(ir_node *node) {
4253 ir_node *left = get_binop_left(node);
4254 ir_node *right = get_binop_right(node);
4256 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
4257 match_commutative | match_am | match_mode_neutral);
4260 static ir_node *gen_ia32_l_Sub(ir_node *node) {
4261 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4262 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4263 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
4264 match_am | match_immediate | match_mode_neutral);
4266 if(is_Proj(lowered)) {
4267 lowered = get_Proj_pred(lowered);
4269 assert(is_ia32_Sub(lowered));
4270 set_irn_mode(lowered, mode_T);
4276 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
4277 return gen_binop_flags(node, new_rd_ia32_Sbb,
4278 match_am | match_immediate | match_mode_neutral);
4282 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4283 * op1 - target to be shifted
4284 * op2 - contains bits to be shifted into target
4286 * Only op3 can be an immediate.
4288 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4289 ir_node *low, ir_node *count)
4291 ir_node *block = get_nodes_block(node);
4292 ir_node *new_block = be_transform_node(block);
4293 ir_graph *irg = current_ir_graph;
4294 dbg_info *dbgi = get_irn_dbg_info(node);
4295 ir_node *new_high = be_transform_node(high);
4296 ir_node *new_low = be_transform_node(low);
4300 /* the shift amount can be any mode that is bigger than 5 bits, since all
4301 * other bits are ignored anyway */
4302 while (is_Conv(count) && get_irn_n_edges(count) == 1) {
4303 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4304 count = get_Conv_op(count);
4306 new_count = create_immediate_or_transform(count, 0);
4308 if (is_ia32_l_ShlD(node)) {
4309 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
4312 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
4315 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4320 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4322 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4323 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4324 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4325 return gen_lowered_64bit_shifts(node, high, low, count);
4328 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4330 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4331 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4332 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4333 return gen_lowered_64bit_shifts(node, high, low, count);
4336 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
4337 ir_node *src_block = get_nodes_block(node);
4338 ir_node *block = be_transform_node(src_block);
4339 ir_graph *irg = current_ir_graph;
4340 dbg_info *dbgi = get_irn_dbg_info(node);
4341 ir_node *frame = get_irg_frame(irg);
4342 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4343 ir_node *nomem = new_NoMem();
4344 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4345 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4346 ir_node *new_val_low = be_transform_node(val_low);
4347 ir_node *new_val_high = be_transform_node(val_high);
4352 ir_node *store_high;
4354 if(!mode_is_signed(get_irn_mode(val_high))) {
4355 panic("unsigned long long -> float not supported yet (%+F)", node);
4359 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4361 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4363 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
4364 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
4366 set_ia32_use_frame(store_low);
4367 set_ia32_use_frame(store_high);
4368 set_ia32_op_type(store_low, ia32_AddrModeD);
4369 set_ia32_op_type(store_high, ia32_AddrModeD);
4370 set_ia32_ls_mode(store_low, mode_Iu);
4371 set_ia32_ls_mode(store_high, mode_Is);
4372 add_ia32_am_offs_int(store_high, 4);
4376 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4379 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
4381 set_ia32_use_frame(fild);
4382 set_ia32_op_type(fild, ia32_AddrModeS);
4383 set_ia32_ls_mode(fild, mode_Ls);
4385 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
4387 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4390 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
4391 ir_node *src_block = get_nodes_block(node);
4392 ir_node *block = be_transform_node(src_block);
4393 ir_graph *irg = current_ir_graph;
4394 dbg_info *dbgi = get_irn_dbg_info(node);
4395 ir_node *frame = get_irg_frame(irg);
4396 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4397 ir_node *nomem = new_NoMem();
4398 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4399 ir_node *new_val = be_transform_node(val);
4400 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4405 fist = new_rd_ia32_vfist(dbgi, irg, block, frame, noreg, nomem, new_val,
4407 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
4408 set_ia32_use_frame(fist);
4409 set_ia32_op_type(fist, ia32_AddrModeD);
4410 set_ia32_ls_mode(fist, mode_Ls);
4416 * the BAD transformer.
4418 static ir_node *bad_transform(ir_node *node) {
4419 panic("No transform function for %+F available.\n", node);
4423 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
4424 ir_graph *irg = current_ir_graph;
4425 ir_node *block = be_transform_node(get_nodes_block(node));
4426 ir_node *pred = get_Proj_pred(node);
4427 ir_node *new_pred = be_transform_node(pred);
4428 ir_node *frame = get_irg_frame(irg);
4429 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4430 dbg_info *dbgi = get_irn_dbg_info(node);
4431 long pn = get_Proj_proj(node);
4436 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
4437 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
4438 set_ia32_use_frame(load);
4439 set_ia32_op_type(load, ia32_AddrModeS);
4440 set_ia32_ls_mode(load, mode_Iu);
4441 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4442 * 32 bit from it with this particular load */
4443 attr = get_ia32_attr(load);
4444 attr->data.need_64bit_stackent = 1;
4446 if (pn == pn_ia32_l_FloattoLL_res_high) {
4447 add_ia32_am_offs_int(load, 4);
4449 assert(pn == pn_ia32_l_FloattoLL_res_low);
4452 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4458 * Transform the Projs of an AddSP.
4460 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
4461 ir_node *block = be_transform_node(get_nodes_block(node));
4462 ir_node *pred = get_Proj_pred(node);
4463 ir_node *new_pred = be_transform_node(pred);
4464 ir_graph *irg = current_ir_graph;
4465 dbg_info *dbgi = get_irn_dbg_info(node);
4466 long proj = get_Proj_proj(node);
4468 if (proj == pn_be_AddSP_sp) {
4469 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4470 pn_ia32_SubSP_stack);
4471 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4473 } else if(proj == pn_be_AddSP_res) {
4474 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4475 pn_ia32_SubSP_addr);
4476 } else if (proj == pn_be_AddSP_M) {
4477 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4481 return new_rd_Unknown(irg, get_irn_mode(node));
4485 * Transform the Projs of a SubSP.
4487 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
4488 ir_node *block = be_transform_node(get_nodes_block(node));
4489 ir_node *pred = get_Proj_pred(node);
4490 ir_node *new_pred = be_transform_node(pred);
4491 ir_graph *irg = current_ir_graph;
4492 dbg_info *dbgi = get_irn_dbg_info(node);
4493 long proj = get_Proj_proj(node);
4495 if (proj == pn_be_SubSP_sp) {
4496 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4497 pn_ia32_AddSP_stack);
4498 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4500 } else if (proj == pn_be_SubSP_M) {
4501 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4505 return new_rd_Unknown(irg, get_irn_mode(node));
4509 * Transform and renumber the Projs from a Load.
4511 static ir_node *gen_Proj_Load(ir_node *node) {
4513 ir_node *block = be_transform_node(get_nodes_block(node));
4514 ir_node *pred = get_Proj_pred(node);
4515 ir_graph *irg = current_ir_graph;
4516 dbg_info *dbgi = get_irn_dbg_info(node);
4517 long proj = get_Proj_proj(node);
4520 /* loads might be part of source address mode matches, so we don't
4521 transform the ProjMs yet (with the exception of loads whose result is
4524 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4527 assert(pn_ia32_Load_M == 1); /* convention: mem-result of Source-AM
4529 /* this is needed, because sometimes we have loops that are only
4530 reachable through the ProjM */
4531 be_enqueue_preds(node);
4532 /* do it in 2 steps, to silence firm verifier */
4533 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4534 set_Proj_proj(res, pn_ia32_Load_M);
4538 /* renumber the proj */
4539 new_pred = be_transform_node(pred);
4540 if (is_ia32_Load(new_pred)) {
4543 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4545 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4546 case pn_Load_X_regular:
4547 return new_rd_Jmp(dbgi, irg, block);
4548 case pn_Load_X_except:
4549 /* This Load might raise an exception. Mark it. */
4550 set_ia32_exc_label(new_pred, 1);
4551 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4555 } else if (is_ia32_Conv_I2I(new_pred) ||
4556 is_ia32_Conv_I2I8Bit(new_pred)) {
4557 set_irn_mode(new_pred, mode_T);
4558 if (proj == pn_Load_res) {
4559 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4560 } else if (proj == pn_Load_M) {
4561 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4563 } else if (is_ia32_xLoad(new_pred)) {
4566 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4568 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4569 case pn_Load_X_regular:
4570 return new_rd_Jmp(dbgi, irg, block);
4571 case pn_Load_X_except:
4572 /* This Load might raise an exception. Mark it. */
4573 set_ia32_exc_label(new_pred, 1);
4574 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4578 } else if (is_ia32_vfld(new_pred)) {
4581 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4583 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4584 case pn_Load_X_regular:
4585 return new_rd_Jmp(dbgi, irg, block);
4586 case pn_Load_X_except:
4587 /* This Load might raise an exception. Mark it. */
4588 set_ia32_exc_label(new_pred, 1);
4589 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4594 /* can happen for ProJMs when source address mode happened for the
4597 /* however it should not be the result proj, as that would mean the
4598 load had multiple users and should not have been used for
4600 if (proj != pn_Load_M) {
4601 panic("internal error: transformed node not a Load");
4603 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4607 return new_rd_Unknown(irg, get_irn_mode(node));
4611 * Transform and renumber the Projs from a DivMod like instruction.
4613 static ir_node *gen_Proj_DivMod(ir_node *node) {
4614 ir_node *block = be_transform_node(get_nodes_block(node));
4615 ir_node *pred = get_Proj_pred(node);
4616 ir_node *new_pred = be_transform_node(pred);
4617 ir_graph *irg = current_ir_graph;
4618 dbg_info *dbgi = get_irn_dbg_info(node);
4619 ir_mode *mode = get_irn_mode(node);
4620 long proj = get_Proj_proj(node);
4622 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4624 switch (get_irn_opcode(pred)) {
4628 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4630 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4631 case pn_Div_X_regular:
4632 return new_rd_Jmp(dbgi, irg, block);
4633 case pn_Div_X_except:
4634 set_ia32_exc_label(new_pred, 1);
4635 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4643 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4645 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4646 case pn_Mod_X_except:
4647 set_ia32_exc_label(new_pred, 1);
4648 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4656 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4657 case pn_DivMod_res_div:
4658 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4659 case pn_DivMod_res_mod:
4660 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4661 case pn_DivMod_X_regular:
4662 return new_rd_Jmp(dbgi, irg, block);
4663 case pn_DivMod_X_except:
4664 set_ia32_exc_label(new_pred, 1);
4665 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4675 return new_rd_Unknown(irg, mode);
4679 * Transform and renumber the Projs from a CopyB.
4681 static ir_node *gen_Proj_CopyB(ir_node *node) {
4682 ir_node *block = be_transform_node(get_nodes_block(node));
4683 ir_node *pred = get_Proj_pred(node);
4684 ir_node *new_pred = be_transform_node(pred);
4685 ir_graph *irg = current_ir_graph;
4686 dbg_info *dbgi = get_irn_dbg_info(node);
4687 ir_mode *mode = get_irn_mode(node);
4688 long proj = get_Proj_proj(node);
4691 case pn_CopyB_M_regular:
4692 if (is_ia32_CopyB_i(new_pred)) {
4693 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4694 } else if (is_ia32_CopyB(new_pred)) {
4695 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4703 return new_rd_Unknown(irg, mode);
4707 * Transform and renumber the Projs from a Quot.
4709 static ir_node *gen_Proj_Quot(ir_node *node) {
4710 ir_node *block = be_transform_node(get_nodes_block(node));
4711 ir_node *pred = get_Proj_pred(node);
4712 ir_node *new_pred = be_transform_node(pred);
4713 ir_graph *irg = current_ir_graph;
4714 dbg_info *dbgi = get_irn_dbg_info(node);
4715 ir_mode *mode = get_irn_mode(node);
4716 long proj = get_Proj_proj(node);
4720 if (is_ia32_xDiv(new_pred)) {
4721 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4722 } else if (is_ia32_vfdiv(new_pred)) {
4723 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4727 if (is_ia32_xDiv(new_pred)) {
4728 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4729 } else if (is_ia32_vfdiv(new_pred)) {
4730 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4738 return new_rd_Unknown(irg, mode);
4742 * Transform the Thread Local Storage Proj.
4744 static ir_node *gen_Proj_tls(ir_node *node) {
4745 ir_node *block = be_transform_node(get_nodes_block(node));
4746 ir_graph *irg = current_ir_graph;
4747 dbg_info *dbgi = NULL;
4748 ir_node *res = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
4753 static ir_node *gen_be_Call(ir_node *node) {
4754 ir_node *res = be_duplicate_node(node);
4755 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4760 static ir_node *gen_be_IncSP(ir_node *node) {
4761 ir_node *res = be_duplicate_node(node);
4762 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4768 * Transform the Projs from a be_Call.
4770 static ir_node *gen_Proj_be_Call(ir_node *node) {
4771 ir_node *block = be_transform_node(get_nodes_block(node));
4772 ir_node *call = get_Proj_pred(node);
4773 ir_node *new_call = be_transform_node(call);
4774 ir_graph *irg = current_ir_graph;
4775 dbg_info *dbgi = get_irn_dbg_info(node);
4776 ir_type *method_type = be_Call_get_type(call);
4777 int n_res = get_method_n_ress(method_type);
4778 long proj = get_Proj_proj(node);
4779 ir_mode *mode = get_irn_mode(node);
4781 const arch_register_class_t *cls;
4783 /* The following is kinda tricky: If we're using SSE, then we have to
4784 * move the result value of the call in floating point registers to an
4785 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4786 * after the call, we have to make sure to correctly make the
4787 * MemProj and the result Proj use these 2 nodes
4789 if (proj == pn_be_Call_M_regular) {
4790 // get new node for result, are we doing the sse load/store hack?
4791 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4792 ir_node *call_res_new;
4793 ir_node *call_res_pred = NULL;
4795 if (call_res != NULL) {
4796 call_res_new = be_transform_node(call_res);
4797 call_res_pred = get_Proj_pred(call_res_new);
4800 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4801 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4802 pn_be_Call_M_regular);
4804 assert(is_ia32_xLoad(call_res_pred));
4805 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4809 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4810 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4812 ir_node *frame = get_irg_frame(irg);
4813 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4815 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4818 /* in case there is no memory output: create one to serialize the copy
4820 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4821 pn_be_Call_M_regular);
4822 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4823 pn_be_Call_first_res);
4825 /* store st(0) onto stack */
4826 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4828 set_ia32_op_type(fstp, ia32_AddrModeD);
4829 set_ia32_use_frame(fstp);
4831 /* load into SSE register */
4832 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4834 set_ia32_op_type(sse_load, ia32_AddrModeS);
4835 set_ia32_use_frame(sse_load);
4837 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4843 /* transform call modes */
4844 if (mode_is_data(mode)) {
4845 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4849 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4853 * Transform the Projs from a Cmp.
4855 static ir_node *gen_Proj_Cmp(ir_node *node)
4857 /* this probably means not all mode_b nodes were lowered... */
4858 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4863 * Transform and potentially renumber Proj nodes.
4865 static ir_node *gen_Proj(ir_node *node) {
4866 ir_graph *irg = current_ir_graph;
4867 dbg_info *dbgi = get_irn_dbg_info(node);
4868 ir_node *pred = get_Proj_pred(node);
4869 long proj = get_Proj_proj(node);
4871 if (is_Store(pred)) {
4872 if (proj == pn_Store_M) {
4873 return be_transform_node(pred);
4876 return new_r_Bad(irg);
4878 } else if (is_Load(pred)) {
4879 return gen_Proj_Load(node);
4880 } else if (is_Div(pred) || is_Mod(pred) || is_DivMod(pred)) {
4881 return gen_Proj_DivMod(node);
4882 } else if (is_CopyB(pred)) {
4883 return gen_Proj_CopyB(node);
4884 } else if (is_Quot(pred)) {
4885 return gen_Proj_Quot(node);
4886 } else if (be_is_SubSP(pred)) {
4887 return gen_Proj_be_SubSP(node);
4888 } else if (be_is_AddSP(pred)) {
4889 return gen_Proj_be_AddSP(node);
4890 } else if (be_is_Call(pred)) {
4891 return gen_Proj_be_Call(node);
4892 } else if (is_Cmp(pred)) {
4893 return gen_Proj_Cmp(node);
4894 } else if (get_irn_op(pred) == op_Start) {
4895 if (proj == pn_Start_X_initial_exec) {
4896 ir_node *block = get_nodes_block(pred);
4899 /* we exchange the ProjX with a jump */
4900 block = be_transform_node(block);
4901 jump = new_rd_Jmp(dbgi, irg, block);
4904 if (node == be_get_old_anchor(anchor_tls)) {
4905 return gen_Proj_tls(node);
4907 } else if (is_ia32_l_FloattoLL(pred)) {
4908 return gen_Proj_l_FloattoLL(node);
4910 } else if(!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4914 ir_node *new_pred = be_transform_node(pred);
4915 ir_node *block = be_transform_node(get_nodes_block(node));
4916 ir_mode *mode = get_irn_mode(node);
4917 if (mode_needs_gp_reg(mode)) {
4918 ir_node *new_proj = new_r_Proj(irg, block, new_pred, mode_Iu,
4919 get_Proj_proj(node));
4920 #ifdef DEBUG_libfirm
4921 new_proj->node_nr = node->node_nr;
4927 return be_duplicate_node(node);
4931 * Enters all transform functions into the generic pointer
4933 static void register_transformers(void)
4937 /* first clear the generic function pointer for all ops */
4938 clear_irp_opcodes_generic_func();
4940 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4941 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4979 /* transform ops from intrinsic lowering */
4995 GEN(ia32_l_LLtoFloat);
4996 GEN(ia32_l_FloattoLL);
5002 /* we should never see these nodes */
5017 /* handle generic backend nodes */
5026 op_Mulh = get_op_Mulh();
5035 * Pre-transform all unknown and noreg nodes.
5037 static void ia32_pretransform_node(void *arch_cg) {
5038 ia32_code_gen_t *cg = arch_cg;
5040 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5041 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5042 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5043 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5044 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5045 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5050 * Walker, checks if all ia32 nodes producing more than one result have
5051 * its Projs, other wise creates new projs and keep them using a be_Keep node.
5053 static void add_missing_keep_walker(ir_node *node, void *data)
5056 unsigned found_projs = 0;
5057 const ir_edge_t *edge;
5058 ir_mode *mode = get_irn_mode(node);
5063 if(!is_ia32_irn(node))
5066 n_outs = get_ia32_n_res(node);
5069 if(is_ia32_SwitchJmp(node))
5072 assert(n_outs < (int) sizeof(unsigned) * 8);
5073 foreach_out_edge(node, edge) {
5074 ir_node *proj = get_edge_src_irn(edge);
5075 int pn = get_Proj_proj(proj);
5077 assert(get_irn_mode(proj) == mode_M || pn < n_outs);
5078 found_projs |= 1 << pn;
5082 /* are keeps missing? */
5084 for(i = 0; i < n_outs; ++i) {
5087 const arch_register_req_t *req;
5088 const arch_register_class_t *class;
5090 if(found_projs & (1 << i)) {
5094 req = get_ia32_out_req(node, i);
5099 if(class == &ia32_reg_classes[CLASS_ia32_flags]) {
5103 block = get_nodes_block(node);
5104 in[0] = new_r_Proj(current_ir_graph, block, node,
5105 arch_register_class_mode(class), i);
5106 if(last_keep != NULL) {
5107 be_Keep_add_node(last_keep, class, in[0]);
5109 last_keep = be_new_Keep(class, current_ir_graph, block, 1, in);
5110 if(sched_is_scheduled(node)) {
5111 sched_add_after(node, last_keep);
5118 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5121 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5123 ir_graph *irg = be_get_birg_irg(cg->birg);
5124 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5127 /* do the transformation */
5128 void ia32_transform_graph(ia32_code_gen_t *cg) {
5130 ir_graph *irg = cg->irg;
5132 register_transformers();
5134 initial_fpcw = NULL;
5136 BE_TIMER_PUSH(t_heights);
5137 heights = heights_new(irg);
5138 BE_TIMER_POP(t_heights);
5139 ia32_calculate_non_address_mode_nodes(cg->birg);
5141 /* the transform phase is not safe for CSE (yet) because several nodes get
5142 * attributes set after their creation */
5143 cse_last = get_opt_cse();
5146 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
5148 set_opt_cse(cse_last);
5150 ia32_free_non_address_mode_nodes();
5151 heights_free(heights);
5155 void ia32_init_transform(void)
5157 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");