2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
35 #include "irgraph_t.h"
40 #include "iredges_t.h"
52 #include "../benode_t.h"
53 #include "../besched.h"
55 #include "../beutil.h"
56 #include "../beirg_t.h"
57 #include "../betranshlp.h"
60 #include "bearch_ia32_t.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
79 #define TP_SFP_SIGN "ia32_sfp_sign"
80 #define TP_DFP_SIGN "ia32_dfp_sign"
81 #define TP_SFP_ABS "ia32_sfp_abs"
82 #define TP_DFP_ABS "ia32_dfp_abs"
83 #define TP_INT_MAX "ia32_int_max"
85 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
86 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
87 #define ENT_SFP_ABS "IA32_SFP_ABS"
88 #define ENT_DFP_ABS "IA32_DFP_ABS"
89 #define ENT_INT_MAX "IA32_INT_MAX"
91 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
92 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
94 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
96 /** hold the current code generator during transformation */
97 static ia32_code_gen_t *env_cg = NULL;
98 static ir_node *initial_fpcw = NULL;
99 static heights_t *heights = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *try_create_Immediate(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_immediate_or_transform(ir_node *node,
132 char immediate_constraint_type);
134 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
135 dbg_info *dbgi, ir_node *block,
136 ir_node *op, ir_node *orig_node);
139 * Return true if a mode can be stored in the GP register set
141 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
142 if(mode == mode_fpcw)
144 if(get_mode_size_bits(mode) > 32)
146 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
150 * creates a unique ident by adding a number to a tag
152 * @param tag the tag string, must contain a %d if a number
155 static ident *unique_id(const char *tag)
157 static unsigned id = 0;
160 snprintf(str, sizeof(str), tag, ++id);
161 return new_id_from_str(str);
165 * Get a primitive type for a mode.
167 static ir_type *get_prim_type(pmap *types, ir_mode *mode)
169 pmap_entry *e = pmap_find(types, mode);
174 snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
175 res = new_type_primitive(new_id_from_str(buf), mode);
176 set_type_alignment_bytes(res, 16);
177 pmap_insert(types, mode, res);
185 * Creates an immediate.
187 * @param symconst if set, create a SymConst immediate
188 * @param symconst_sign sign for the symconst
189 * @param val integer value for the immediate
191 static ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
193 ir_graph *irg = current_ir_graph;
194 ir_node *start_block = get_irg_start_block(irg);
195 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block,
196 symconst, symconst_sign, val);
197 arch_set_irn_register(env_cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
203 * Get an atomic entity that is initialized with a tarval forming
206 * @param cnst the node representing the constant
208 static ir_entity *create_float_const_entity(ir_node *cnst)
210 ia32_isa_t *isa = env_cg->isa;
211 tarval *key = get_Const_tarval(cnst);
212 pmap_entry *e = pmap_find(isa->tv_ent, key);
218 ir_mode *mode = get_tarval_mode(tv);
221 if (! ia32_cg_config.use_sse2) {
222 /* try to reduce the mode to produce smaller sized entities */
223 if (mode != mode_F) {
224 if (tarval_ieee754_can_conv_lossless(tv, mode_F)) {
226 tv = tarval_convert_to(tv, mode);
227 } else if (mode != mode_D) {
228 if (tarval_ieee754_can_conv_lossless(tv, mode_D)) {
230 tv = tarval_convert_to(tv, mode);
236 if (mode == get_irn_mode(cnst)) {
237 /* mode was not changed */
238 tp = get_Const_type(cnst);
239 if (tp == firm_unknown_type)
240 tp = get_prim_type(isa->types, mode);
242 tp = get_prim_type(isa->types, mode);
244 res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
246 set_entity_ld_ident(res, get_entity_ident(res));
247 set_entity_visibility(res, visibility_local);
248 set_entity_variability(res, variability_constant);
249 set_entity_allocation(res, allocation_static);
251 /* we create a new entity here: It's initialization must resist on the
253 rem = current_ir_graph;
254 current_ir_graph = get_const_code_irg();
255 set_atomic_ent_value(res, new_Const_type(tv, tp));
256 current_ir_graph = rem;
258 pmap_insert(isa->tv_ent, key, res);
266 static int is_Const_0(ir_node *node) {
267 return is_Const(node) && is_Const_null(node);
270 static int is_Const_1(ir_node *node) {
271 return is_Const(node) && is_Const_one(node);
274 static int is_Const_Minus_1(ir_node *node) {
275 return is_Const(node) && is_Const_all_one(node);
279 * returns true if constant can be created with a simple float command
281 static int is_simple_x87_Const(ir_node *node)
283 tarval *tv = get_Const_tarval(node);
285 if (tarval_is_null(tv) || tarval_is_one(tv))
288 /* TODO: match all the other float constants */
293 * returns true if constant can be created with a simple float command
295 static int is_simple_sse_Const(ir_node *node)
297 tarval *tv = get_Const_tarval(node);
298 ir_mode *mode = get_tarval_mode(tv);
303 if (tarval_is_null(tv) || tarval_is_one(tv))
306 if (mode == mode_D) {
307 unsigned val = get_tarval_sub_bits(tv, 0) |
308 (get_tarval_sub_bits(tv, 1) << 8) |
309 (get_tarval_sub_bits(tv, 2) << 16) |
310 (get_tarval_sub_bits(tv, 3) << 24);
312 /* lower 32bit are zero, really a 32bit constant */
316 /* TODO: match all the other float constants */
321 * Transforms a Const.
323 static ir_node *gen_Const(ir_node *node) {
324 ir_graph *irg = current_ir_graph;
325 ir_node *old_block = get_nodes_block(node);
326 ir_node *block = be_transform_node(old_block);
327 dbg_info *dbgi = get_irn_dbg_info(node);
328 ir_mode *mode = get_irn_mode(node);
330 assert(is_Const(node));
332 if (mode_is_float(mode)) {
334 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
335 ir_node *nomem = new_NoMem();
339 if (ia32_cg_config.use_sse2) {
340 tarval *tv = get_Const_tarval(node);
341 if (tarval_is_null(tv)) {
342 load = new_rd_ia32_xZero(dbgi, irg, block);
343 set_ia32_ls_mode(load, mode);
345 } else if (tarval_is_one(tv)) {
346 int cnst = mode == mode_F ? 26 : 55;
347 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
348 ir_node *imm2 = create_Immediate(NULL, 0, 2);
349 ir_node *pslld, *psrld;
351 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
352 set_ia32_ls_mode(load, mode);
353 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
354 set_ia32_ls_mode(pslld, mode);
355 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
356 set_ia32_ls_mode(psrld, mode);
358 } else if (mode == mode_F) {
359 /* we can place any 32bit constant by using a movd gp, sse */
360 unsigned val = get_tarval_sub_bits(tv, 0) |
361 (get_tarval_sub_bits(tv, 1) << 8) |
362 (get_tarval_sub_bits(tv, 2) << 16) |
363 (get_tarval_sub_bits(tv, 3) << 24);
364 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
365 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
366 set_ia32_ls_mode(load, mode);
369 if (mode == mode_D) {
370 unsigned val = get_tarval_sub_bits(tv, 0) |
371 (get_tarval_sub_bits(tv, 1) << 8) |
372 (get_tarval_sub_bits(tv, 2) << 16) |
373 (get_tarval_sub_bits(tv, 3) << 24);
375 ir_node *imm32 = create_Immediate(NULL, 0, 32);
376 ir_node *cnst, *psllq;
378 /* fine, lower 32bit are zero, produce 32bit value */
379 val = get_tarval_sub_bits(tv, 4) |
380 (get_tarval_sub_bits(tv, 5) << 8) |
381 (get_tarval_sub_bits(tv, 6) << 16) |
382 (get_tarval_sub_bits(tv, 7) << 24);
383 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
384 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
385 set_ia32_ls_mode(load, mode);
386 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
387 set_ia32_ls_mode(psllq, mode);
392 floatent = create_float_const_entity(node);
394 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
396 set_ia32_op_type(load, ia32_AddrModeS);
397 set_ia32_am_sc(load, floatent);
398 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
399 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
402 if (is_Const_null(node)) {
403 load = new_rd_ia32_vfldz(dbgi, irg, block);
405 set_ia32_ls_mode(load, mode);
406 } else if (is_Const_one(node)) {
407 load = new_rd_ia32_vfld1(dbgi, irg, block);
409 set_ia32_ls_mode(load, mode);
411 floatent = create_float_const_entity(node);
413 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
414 set_ia32_op_type(load, ia32_AddrModeS);
415 set_ia32_am_sc(load, floatent);
416 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
417 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
418 /* take the mode from the entity */
419 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
423 /* Const Nodes before the initial IncSP are a bad idea, because
424 * they could be spilled and we have no SP ready at that point yet.
425 * So add a dependency to the initial frame pointer calculation to
426 * avoid that situation.
428 if (get_irg_start_block(irg) == block) {
429 add_irn_dep(load, get_irg_frame(irg));
432 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
434 } else { /* non-float mode */
436 tarval *tv = get_Const_tarval(node);
439 tv = tarval_convert_to(tv, mode_Iu);
441 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
443 panic("couldn't convert constant tarval (%+F)", node);
445 val = get_tarval_long(tv);
447 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
448 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
451 if (get_irg_start_block(irg) == block) {
452 add_irn_dep(cnst, get_irg_frame(irg));
460 * Transforms a SymConst.
462 static ir_node *gen_SymConst(ir_node *node) {
463 ir_graph *irg = current_ir_graph;
464 ir_node *old_block = get_nodes_block(node);
465 ir_node *block = be_transform_node(old_block);
466 dbg_info *dbgi = get_irn_dbg_info(node);
467 ir_mode *mode = get_irn_mode(node);
470 if (mode_is_float(mode)) {
471 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
472 ir_node *nomem = new_NoMem();
474 if (ia32_cg_config.use_sse2)
475 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
477 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
478 set_ia32_am_sc(cnst, get_SymConst_entity(node));
479 set_ia32_use_frame(cnst);
483 if(get_SymConst_kind(node) != symconst_addr_ent) {
484 panic("backend only support symconst_addr_ent (at %+F)", node);
486 entity = get_SymConst_entity(node);
487 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
490 /* Const Nodes before the initial IncSP are a bad idea, because
491 * they could be spilled and we have no SP ready at that point yet
493 if (get_irg_start_block(irg) == block) {
494 add_irn_dep(cnst, get_irg_frame(irg));
497 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
502 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
503 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
504 static const struct {
506 const char *ent_name;
507 const char *cnst_str;
510 } names [ia32_known_const_max] = {
511 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
512 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
513 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
514 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
515 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
517 static ir_entity *ent_cache[ia32_known_const_max];
519 const char *tp_name, *ent_name, *cnst_str;
527 ent_name = names[kct].ent_name;
528 if (! ent_cache[kct]) {
529 tp_name = names[kct].tp_name;
530 cnst_str = names[kct].cnst_str;
532 switch (names[kct].mode) {
533 case 0: mode = mode_Iu; break;
534 case 1: mode = mode_Lu; break;
535 default: mode = mode_F; break;
537 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
538 tp = new_type_primitive(new_id_from_str(tp_name), mode);
539 /* set the specified alignment */
540 set_type_alignment_bytes(tp, names[kct].align);
542 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
544 set_entity_ld_ident(ent, get_entity_ident(ent));
545 set_entity_visibility(ent, visibility_local);
546 set_entity_variability(ent, variability_constant);
547 set_entity_allocation(ent, allocation_static);
549 /* we create a new entity here: It's initialization must resist on the
551 rem = current_ir_graph;
552 current_ir_graph = get_const_code_irg();
553 cnst = new_Const(mode, tv);
554 current_ir_graph = rem;
556 set_atomic_ent_value(ent, cnst);
558 /* cache the entry */
559 ent_cache[kct] = ent;
562 return ent_cache[kct];
567 * Prints the old node name on cg obst and returns a pointer to it.
569 const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
570 ia32_isa_t *isa = (ia32_isa_t *)cg->arch_env->isa;
572 lc_eoprintf(firm_get_arg_env(), isa->name_obst, "%+F", irn);
573 obstack_1grow(isa->name_obst, 0);
574 return obstack_finish(isa->name_obst);
579 * return true if the node is a Proj(Load) and could be used in source address
580 * mode for another node. Will return only true if the @p other node is not
581 * dependent on the memory of the Load (for binary operations use the other
582 * input here, for unary operations use NULL).
584 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
585 ir_node *other, ir_node *other2)
587 ir_mode *mode = get_irn_mode(node);
591 /* float constants are always available */
592 if (is_Const(node) && mode_is_float(mode)) {
593 if (ia32_cg_config.use_sse2) {
594 if (is_simple_sse_Const(node))
597 if (is_simple_x87_Const(node))
600 if (get_irn_n_edges(node) > 1)
607 load = get_Proj_pred(node);
608 pn = get_Proj_proj(node);
609 if(!is_Load(load) || pn != pn_Load_res)
611 if(get_nodes_block(load) != block)
613 /* we only use address mode if we're the only user of the load */
614 if(get_irn_n_edges(node) > 1)
616 /* in some edge cases with address mode we might reach the load normally
617 * and through some AM sequence, if it is already materialized then we
618 * can't create an AM node from it */
619 if(be_is_transformed(node))
622 /* don't do AM if other node inputs depend on the load (via mem-proj) */
623 if(other != NULL && get_nodes_block(other) == block
624 && heights_reachable_in_block(heights, other, load))
626 if(other2 != NULL && get_nodes_block(other2) == block
627 && heights_reachable_in_block(heights, other2, load))
633 typedef struct ia32_address_mode_t ia32_address_mode_t;
634 struct ia32_address_mode_t {
638 ia32_op_type_t op_type;
642 unsigned commutative : 1;
643 unsigned ins_permuted : 1;
646 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
648 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
650 /* construct load address */
651 memset(addr, 0, sizeof(addr[0]));
652 ia32_create_address_mode(addr, ptr, /*force=*/0);
654 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
655 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
656 addr->mem = be_transform_node(mem);
659 static void build_address(ia32_address_mode_t *am, ir_node *node)
661 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
662 ia32_address_t *addr = &am->addr;
668 if (is_Const(node)) {
669 ir_entity *entity = create_float_const_entity(node);
670 addr->base = noreg_gp;
671 addr->index = noreg_gp;
672 addr->mem = new_NoMem();
673 addr->symconst_ent = entity;
675 am->ls_mode = get_type_mode(get_entity_type(entity));
676 am->pinned = op_pin_state_floats;
680 load = get_Proj_pred(node);
681 ptr = get_Load_ptr(load);
682 mem = get_Load_mem(load);
683 new_mem = be_transform_node(mem);
684 am->pinned = get_irn_pinned(load);
685 am->ls_mode = get_Load_mode(load);
686 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
688 /* construct load address */
689 ia32_create_address_mode(addr, ptr, /*force=*/0);
691 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
692 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
696 static void set_address(ir_node *node, const ia32_address_t *addr)
698 set_ia32_am_scale(node, addr->scale);
699 set_ia32_am_sc(node, addr->symconst_ent);
700 set_ia32_am_offs_int(node, addr->offset);
701 if(addr->symconst_sign)
702 set_ia32_am_sc_sign(node);
704 set_ia32_use_frame(node);
705 set_ia32_frame_ent(node, addr->frame_entity);
708 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
710 set_address(node, &am->addr);
712 set_ia32_op_type(node, am->op_type);
713 set_ia32_ls_mode(node, am->ls_mode);
714 if(am->pinned == op_pin_state_pinned && get_irn_pinned(node) != op_pin_state_pinned) {
715 set_irn_pinned(node, am->pinned);
718 set_ia32_commutative(node);
722 * Check, if a given node is a Down-Conv, ie. a integer Conv
723 * from a mode with a mode with more bits to a mode with lesser bits.
724 * Moreover, we return only true if the node has not more than 1 user.
726 * @param node the node
727 * @return non-zero if node is a Down-Conv
729 static int is_downconv(const ir_node *node)
737 /* we only want to skip the conv when we're the only user
738 * (not optimal but for now...)
740 if(get_irn_n_edges(node) > 1)
743 src_mode = get_irn_mode(get_Conv_op(node));
744 dest_mode = get_irn_mode(node);
745 return mode_needs_gp_reg(src_mode)
746 && mode_needs_gp_reg(dest_mode)
747 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
750 /* Skip all Down-Conv's on a given node and return the resulting node. */
751 ir_node *ia32_skip_downconv(ir_node *node) {
752 while (is_downconv(node))
753 node = get_Conv_op(node);
759 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
761 ir_mode *mode = get_irn_mode(node);
766 if(mode_is_signed(mode)) {
771 block = get_nodes_block(node);
772 dbgi = get_irn_dbg_info(node);
774 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
779 * matches operands of a node into ia32 addressing/operand modes. This covers
780 * usage of source address mode, immediates, operations with non 32-bit modes,
782 * The resulting data is filled into the @p am struct. block is the block
783 * of the node whose arguments are matched. op1, op2 are the first and second
784 * input that are matched (op1 may be NULL). other_op is another unrelated
785 * input that is not matched! but which is needed sometimes to check if AM
786 * for op1/op2 is legal.
787 * @p flags describes the supported modes of the operation in detail.
789 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
790 ir_node *op1, ir_node *op2, ir_node *other_op,
793 ia32_address_t *addr = &am->addr;
794 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
797 ir_mode *mode = get_irn_mode(op2);
799 unsigned commutative;
800 int use_am_and_immediates;
802 int mode_bits = get_mode_size_bits(mode);
804 memset(am, 0, sizeof(am[0]));
806 commutative = (flags & match_commutative) != 0;
807 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
808 use_am = (flags & match_am) != 0;
809 use_immediate = (flags & match_immediate) != 0;
810 assert(!use_am_and_immediates || use_immediate);
813 assert(!commutative || op1 != NULL);
814 assert(use_am || !(flags & match_8bit_am));
815 assert(use_am || !(flags & match_16bit_am));
817 if (mode_bits == 8) {
818 if (!(flags & match_8bit_am))
820 /* we don't automatically add upconvs yet */
821 assert((flags & match_mode_neutral) || (flags & match_8bit));
822 } else if (mode_bits == 16) {
823 if (!(flags & match_16bit_am))
825 /* we don't automatically add upconvs yet */
826 assert((flags & match_mode_neutral) || (flags & match_16bit));
829 /* we can simply skip downconvs for mode neutral nodes: the upper bits
830 * can be random for these operations */
831 if (flags & match_mode_neutral) {
832 op2 = ia32_skip_downconv(op2);
834 op1 = ia32_skip_downconv(op1);
838 /* match immediates. firm nodes are normalized: constants are always on the
841 if (!(flags & match_try_am) && use_immediate) {
842 new_op2 = try_create_Immediate(op2, 0);
845 if (new_op2 == NULL &&
846 use_am && ia32_use_source_address_mode(block, op2, op1, other_op)) {
847 build_address(am, op2);
848 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
849 if(mode_is_float(mode)) {
850 new_op2 = ia32_new_NoReg_vfp(env_cg);
854 am->op_type = ia32_AddrModeS;
855 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
857 ia32_use_source_address_mode(block, op1, op2, other_op)) {
859 build_address(am, op1);
861 if (mode_is_float(mode)) {
862 noreg = ia32_new_NoReg_vfp(env_cg);
867 if(new_op2 != NULL) {
870 new_op1 = be_transform_node(op2);
872 am->ins_permuted = 1;
874 am->op_type = ia32_AddrModeS;
876 if(flags & match_try_am) {
879 am->op_type = ia32_Normal;
883 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
885 new_op2 = be_transform_node(op2);
886 am->op_type = ia32_Normal;
887 am->ls_mode = get_irn_mode(op2);
888 if(flags & match_mode_neutral)
889 am->ls_mode = mode_Iu;
891 if(addr->base == NULL)
892 addr->base = noreg_gp;
893 if(addr->index == NULL)
894 addr->index = noreg_gp;
895 if(addr->mem == NULL)
896 addr->mem = new_NoMem();
898 am->new_op1 = new_op1;
899 am->new_op2 = new_op2;
900 am->commutative = commutative;
903 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
905 ir_graph *irg = current_ir_graph;
909 if(am->mem_proj == NULL)
912 /* we have to create a mode_T so the old MemProj can attach to us */
913 mode = get_irn_mode(node);
914 load = get_Proj_pred(am->mem_proj);
916 mark_irn_visited(load);
917 be_set_transformed_node(load, node);
920 set_irn_mode(node, mode_T);
921 return new_rd_Proj(NULL, irg, get_nodes_block(node), node, mode, pn_ia32_res);
928 * Construct a standard binary operation, set AM and immediate if required.
930 * @param op1 The first operand
931 * @param op2 The second operand
932 * @param func The node constructor function
933 * @return The constructed ia32 node.
935 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
936 construct_binop_func *func, match_flags_t flags)
938 ir_node *block = get_nodes_block(node);
939 ir_node *new_block = be_transform_node(block);
940 ir_graph *irg = current_ir_graph;
941 dbg_info *dbgi = get_irn_dbg_info(node);
943 ia32_address_mode_t am;
944 ia32_address_t *addr = &am.addr;
946 match_arguments(&am, block, op1, op2, NULL, flags);
948 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
949 am.new_op1, am.new_op2);
950 set_am_attributes(new_node, &am);
951 /* we can't use source address mode anymore when using immediates */
952 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
953 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
954 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
956 new_node = fix_mem_proj(new_node, &am);
963 n_ia32_l_binop_right,
964 n_ia32_l_binop_eflags
966 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
967 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
968 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
969 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
970 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
971 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
974 * Construct a binary operation which also consumes the eflags.
976 * @param node The node to transform
977 * @param func The node constructor function
978 * @param flags The match flags
979 * @return The constructor ia32 node
981 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
984 ir_node *src_block = get_nodes_block(node);
985 ir_node *block = be_transform_node(src_block);
986 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
987 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
988 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
989 ir_node *new_eflags = be_transform_node(eflags);
990 ir_graph *irg = current_ir_graph;
991 dbg_info *dbgi = get_irn_dbg_info(node);
993 ia32_address_mode_t am;
994 ia32_address_t *addr = &am.addr;
996 match_arguments(&am, src_block, op1, op2, NULL, flags);
998 new_node = func(dbgi, irg, block, addr->base, addr->index,
999 addr->mem, am.new_op1, am.new_op2, new_eflags);
1000 set_am_attributes(new_node, &am);
1001 /* we can't use source address mode anymore when using immediates */
1002 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1003 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1004 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1006 new_node = fix_mem_proj(new_node, &am);
1011 static ir_node *get_fpcw(void)
1014 if(initial_fpcw != NULL)
1015 return initial_fpcw;
1017 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
1018 &ia32_fp_cw_regs[REG_FPCW]);
1019 initial_fpcw = be_transform_node(fpcw);
1021 return initial_fpcw;
1025 * Construct a standard binary operation, set AM and immediate if required.
1027 * @param op1 The first operand
1028 * @param op2 The second operand
1029 * @param func The node constructor function
1030 * @return The constructed ia32 node.
1032 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1033 construct_binop_float_func *func,
1034 match_flags_t flags)
1036 ir_graph *irg = current_ir_graph;
1037 dbg_info *dbgi = get_irn_dbg_info(node);
1038 ir_node *block = get_nodes_block(node);
1039 ir_node *new_block = be_transform_node(block);
1040 ir_mode *mode = get_irn_mode(node);
1042 ia32_address_mode_t am;
1043 ia32_address_t *addr = &am.addr;
1045 /* cannot use addresmode with long double on x87 */
1046 if (get_mode_size_bits(mode) > 64)
1049 match_arguments(&am, block, op1, op2, NULL, flags);
1051 new_node = func(dbgi, irg, new_block, addr->base, addr->index, addr->mem,
1052 am.new_op1, am.new_op2, get_fpcw());
1053 set_am_attributes(new_node, &am);
1055 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1057 new_node = fix_mem_proj(new_node, &am);
1063 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1065 * @param op1 The first operand
1066 * @param op2 The second operand
1067 * @param func The node constructor function
1068 * @return The constructed ia32 node.
1070 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1071 construct_shift_func *func,
1072 match_flags_t flags)
1074 dbg_info *dbgi = get_irn_dbg_info(node);
1075 ir_graph *irg = current_ir_graph;
1076 ir_node *block = get_nodes_block(node);
1077 ir_node *new_block = be_transform_node(block);
1082 assert(! mode_is_float(get_irn_mode(node)));
1083 assert(flags & match_immediate);
1084 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1086 if(flags & match_mode_neutral) {
1087 op1 = ia32_skip_downconv(op1);
1088 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1089 panic("right shifting of non-32bit values not supported, yet");
1091 new_op1 = be_transform_node(op1);
1093 /* the shift amount can be any mode that is bigger than 5 bits, since all
1094 * other bits are ignored anyway */
1095 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1096 op2 = get_Conv_op(op2);
1097 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1099 new_op2 = create_immediate_or_transform(op2, 0);
1101 new_node = func(dbgi, irg, new_block, new_op1, new_op2);
1102 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1104 /* lowered shift instruction may have a dependency operand, handle it here */
1105 if (get_irn_arity(node) == 3) {
1106 /* we have a dependency */
1107 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1108 add_irn_dep(new_node, new_dep);
1116 * Construct a standard unary operation, set AM and immediate if required.
1118 * @param op The operand
1119 * @param func The node constructor function
1120 * @return The constructed ia32 node.
1122 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1123 match_flags_t flags)
1125 ir_graph *irg = current_ir_graph;
1126 dbg_info *dbgi = get_irn_dbg_info(node);
1127 ir_node *block = get_nodes_block(node);
1128 ir_node *new_block = be_transform_node(block);
1132 assert(flags == 0 || flags == match_mode_neutral);
1133 if(flags & match_mode_neutral) {
1134 op = ia32_skip_downconv(op);
1137 new_op = be_transform_node(op);
1138 new_node = func(dbgi, irg, new_block, new_op);
1140 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1145 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1146 ia32_address_t *addr)
1148 ir_graph *irg = current_ir_graph;
1149 ir_node *base = addr->base;
1150 ir_node *index = addr->index;
1154 base = ia32_new_NoReg_gp(env_cg);
1156 base = be_transform_node(base);
1160 index = ia32_new_NoReg_gp(env_cg);
1162 index = be_transform_node(index);
1165 res = new_rd_ia32_Lea(dbgi, irg, block, base, index);
1166 set_address(res, addr);
1171 static int am_has_immediates(const ia32_address_t *addr)
1173 return addr->offset != 0 || addr->symconst_ent != NULL
1174 || addr->frame_entity || addr->use_frame;
1178 * Creates an ia32 Add.
1180 * @return the created ia32 Add node
1182 static ir_node *gen_Add(ir_node *node) {
1183 ir_graph *irg = current_ir_graph;
1184 dbg_info *dbgi = get_irn_dbg_info(node);
1185 ir_node *block = get_nodes_block(node);
1186 ir_node *new_block = be_transform_node(block);
1187 ir_node *op1 = get_Add_left(node);
1188 ir_node *op2 = get_Add_right(node);
1189 ir_mode *mode = get_irn_mode(node);
1191 ir_node *add_immediate_op;
1192 ia32_address_t addr;
1193 ia32_address_mode_t am;
1195 if (mode_is_float(mode)) {
1196 if (ia32_cg_config.use_sse2)
1197 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1198 match_commutative | match_am);
1200 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1201 match_commutative | match_am);
1204 ia32_mark_non_am(node);
1206 op2 = ia32_skip_downconv(op2);
1207 op1 = ia32_skip_downconv(op1);
1211 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1212 * 1. Add with immediate -> Lea
1213 * 2. Add with possible source address mode -> Add
1214 * 3. Otherwise -> Lea
1216 memset(&addr, 0, sizeof(addr));
1217 ia32_create_address_mode(&addr, node, /*force=*/1);
1218 add_immediate_op = NULL;
1220 if(addr.base == NULL && addr.index == NULL) {
1221 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1222 addr.symconst_sign, addr.offset);
1223 add_irn_dep(new_node, get_irg_frame(irg));
1224 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1227 /* add with immediate? */
1228 if(addr.index == NULL) {
1229 add_immediate_op = addr.base;
1230 } else if(addr.base == NULL && addr.scale == 0) {
1231 add_immediate_op = addr.index;
1234 if(add_immediate_op != NULL) {
1235 if(!am_has_immediates(&addr)) {
1236 #ifdef DEBUG_libfirm
1237 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1240 return be_transform_node(add_immediate_op);
1243 new_node = create_lea_from_address(dbgi, new_block, &addr);
1244 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1248 /* test if we can use source address mode */
1249 match_arguments(&am, block, op1, op2, NULL, match_commutative
1250 | match_mode_neutral | match_am | match_immediate | match_try_am);
1252 /* construct an Add with source address mode */
1253 if (am.op_type == ia32_AddrModeS) {
1254 ia32_address_t *am_addr = &am.addr;
1255 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1256 am_addr->index, am_addr->mem, am.new_op1,
1258 set_am_attributes(new_node, &am);
1259 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1261 new_node = fix_mem_proj(new_node, &am);
1266 /* otherwise construct a lea */
1267 new_node = create_lea_from_address(dbgi, new_block, &addr);
1268 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1273 * Creates an ia32 Mul.
1275 * @return the created ia32 Mul node
1277 static ir_node *gen_Mul(ir_node *node) {
1278 ir_node *op1 = get_Mul_left(node);
1279 ir_node *op2 = get_Mul_right(node);
1280 ir_mode *mode = get_irn_mode(node);
1282 if (mode_is_float(mode)) {
1283 if (ia32_cg_config.use_sse2)
1284 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1285 match_commutative | match_am);
1287 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1288 match_commutative | match_am);
1290 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1291 match_commutative | match_am | match_mode_neutral |
1292 match_immediate | match_am_and_immediates);
1296 * Creates an ia32 Mulh.
1297 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1298 * this result while Mul returns the lower 32 bit.
1300 * @return the created ia32 Mulh node
1302 static ir_node *gen_Mulh(ir_node *node)
1304 ir_node *block = get_nodes_block(node);
1305 ir_node *new_block = be_transform_node(block);
1306 ir_graph *irg = current_ir_graph;
1307 dbg_info *dbgi = get_irn_dbg_info(node);
1308 ir_mode *mode = get_irn_mode(node);
1309 ir_node *op1 = get_Mulh_left(node);
1310 ir_node *op2 = get_Mulh_right(node);
1311 ir_node *proj_res_high;
1313 ia32_address_mode_t am;
1314 ia32_address_t *addr = &am.addr;
1316 assert(!mode_is_float(mode) && "Mulh with float not supported");
1317 assert(get_mode_size_bits(mode) == 32);
1319 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1321 if (mode_is_signed(mode)) {
1322 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1323 addr->index, addr->mem, am.new_op1,
1326 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1327 addr->index, addr->mem, am.new_op1,
1331 set_am_attributes(new_node, &am);
1332 /* we can't use source address mode anymore when using immediates */
1333 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1334 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1335 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1337 assert(get_irn_mode(new_node) == mode_T);
1339 fix_mem_proj(new_node, &am);
1341 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1342 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1343 mode_Iu, pn_ia32_IMul1OP_res_high);
1345 return proj_res_high;
1351 * Creates an ia32 And.
1353 * @return The created ia32 And node
1355 static ir_node *gen_And(ir_node *node) {
1356 ir_node *op1 = get_And_left(node);
1357 ir_node *op2 = get_And_right(node);
1358 assert(! mode_is_float(get_irn_mode(node)));
1360 /* is it a zero extension? */
1361 if (is_Const(op2)) {
1362 tarval *tv = get_Const_tarval(op2);
1363 long v = get_tarval_long(tv);
1365 if (v == 0xFF || v == 0xFFFF) {
1366 dbg_info *dbgi = get_irn_dbg_info(node);
1367 ir_node *block = get_nodes_block(node);
1374 assert(v == 0xFFFF);
1377 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1383 return gen_binop(node, op1, op2, new_rd_ia32_And,
1384 match_commutative | match_mode_neutral | match_am
1391 * Creates an ia32 Or.
1393 * @return The created ia32 Or node
1395 static ir_node *gen_Or(ir_node *node) {
1396 ir_node *op1 = get_Or_left(node);
1397 ir_node *op2 = get_Or_right(node);
1399 assert (! mode_is_float(get_irn_mode(node)));
1400 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1401 | match_mode_neutral | match_am | match_immediate);
1407 * Creates an ia32 Eor.
1409 * @return The created ia32 Eor node
1411 static ir_node *gen_Eor(ir_node *node) {
1412 ir_node *op1 = get_Eor_left(node);
1413 ir_node *op2 = get_Eor_right(node);
1415 assert(! mode_is_float(get_irn_mode(node)));
1416 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1417 | match_mode_neutral | match_am | match_immediate);
1422 * Creates an ia32 Sub.
1424 * @return The created ia32 Sub node
1426 static ir_node *gen_Sub(ir_node *node) {
1427 ir_node *op1 = get_Sub_left(node);
1428 ir_node *op2 = get_Sub_right(node);
1429 ir_mode *mode = get_irn_mode(node);
1431 if (mode_is_float(mode)) {
1432 if (ia32_cg_config.use_sse2)
1433 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1435 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1440 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1444 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1445 | match_am | match_immediate);
1449 * Generates an ia32 DivMod with additional infrastructure for the
1450 * register allocator if needed.
1452 static ir_node *create_Div(ir_node *node)
1454 ir_graph *irg = current_ir_graph;
1455 dbg_info *dbgi = get_irn_dbg_info(node);
1456 ir_node *block = get_nodes_block(node);
1457 ir_node *new_block = be_transform_node(block);
1464 ir_node *sign_extension;
1465 ia32_address_mode_t am;
1466 ia32_address_t *addr = &am.addr;
1468 /* the upper bits have random contents for smaller modes */
1469 switch (get_irn_opcode(node)) {
1471 op1 = get_Div_left(node);
1472 op2 = get_Div_right(node);
1473 mem = get_Div_mem(node);
1474 mode = get_Div_resmode(node);
1477 op1 = get_Mod_left(node);
1478 op2 = get_Mod_right(node);
1479 mem = get_Mod_mem(node);
1480 mode = get_Mod_resmode(node);
1483 op1 = get_DivMod_left(node);
1484 op2 = get_DivMod_right(node);
1485 mem = get_DivMod_mem(node);
1486 mode = get_DivMod_resmode(node);
1489 panic("invalid divmod node %+F", node);
1492 match_arguments(&am, block, op1, op2, NULL, match_am);
1494 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1495 is the memory of the consumed address. We can have only the second op as address
1496 in Div nodes, so check only op2. */
1497 if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
1498 new_mem = be_transform_node(mem);
1499 if(!is_NoMem(addr->mem)) {
1503 new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
1506 new_mem = addr->mem;
1509 if (mode_is_signed(mode)) {
1510 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1511 add_irn_dep(produceval, get_irg_frame(irg));
1512 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1515 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1516 addr->index, new_mem, am.new_op2,
1517 am.new_op1, sign_extension);
1519 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1520 add_irn_dep(sign_extension, get_irg_frame(irg));
1522 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1523 addr->index, new_mem, am.new_op2,
1524 am.new_op1, sign_extension);
1527 set_irn_pinned(new_node, get_irn_pinned(node));
1529 set_am_attributes(new_node, &am);
1530 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1532 new_node = fix_mem_proj(new_node, &am);
1538 static ir_node *gen_Mod(ir_node *node) {
1539 return create_Div(node);
1542 static ir_node *gen_Div(ir_node *node) {
1543 return create_Div(node);
1546 static ir_node *gen_DivMod(ir_node *node) {
1547 return create_Div(node);
1553 * Creates an ia32 floating Div.
1555 * @return The created ia32 xDiv node
1557 static ir_node *gen_Quot(ir_node *node)
1559 ir_node *op1 = get_Quot_left(node);
1560 ir_node *op2 = get_Quot_right(node);
1562 if (ia32_cg_config.use_sse2) {
1563 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1565 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1571 * Creates an ia32 Shl.
1573 * @return The created ia32 Shl node
1575 static ir_node *gen_Shl(ir_node *node) {
1576 ir_node *left = get_Shl_left(node);
1577 ir_node *right = get_Shl_right(node);
1579 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1580 match_mode_neutral | match_immediate);
1584 * Creates an ia32 Shr.
1586 * @return The created ia32 Shr node
1588 static ir_node *gen_Shr(ir_node *node) {
1589 ir_node *left = get_Shr_left(node);
1590 ir_node *right = get_Shr_right(node);
1592 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1598 * Creates an ia32 Sar.
1600 * @return The created ia32 Shrs node
1602 static ir_node *gen_Shrs(ir_node *node) {
1603 ir_node *left = get_Shrs_left(node);
1604 ir_node *right = get_Shrs_right(node);
1605 ir_mode *mode = get_irn_mode(node);
1607 if(is_Const(right) && mode == mode_Is) {
1608 tarval *tv = get_Const_tarval(right);
1609 long val = get_tarval_long(tv);
1611 /* this is a sign extension */
1612 ir_graph *irg = current_ir_graph;
1613 dbg_info *dbgi = get_irn_dbg_info(node);
1614 ir_node *block = be_transform_node(get_nodes_block(node));
1616 ir_node *new_op = be_transform_node(op);
1617 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1618 add_irn_dep(pval, get_irg_frame(irg));
1620 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1624 /* 8 or 16 bit sign extension? */
1625 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1626 ir_node *shl_left = get_Shl_left(left);
1627 ir_node *shl_right = get_Shl_right(left);
1628 if(is_Const(shl_right)) {
1629 tarval *tv1 = get_Const_tarval(right);
1630 tarval *tv2 = get_Const_tarval(shl_right);
1631 if(tv1 == tv2 && tarval_is_long(tv1)) {
1632 long val = get_tarval_long(tv1);
1633 if(val == 16 || val == 24) {
1634 dbg_info *dbgi = get_irn_dbg_info(node);
1635 ir_node *block = get_nodes_block(node);
1645 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1654 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1660 * Creates an ia32 RotL.
1662 * @param op1 The first operator
1663 * @param op2 The second operator
1664 * @return The created ia32 RotL node
1666 static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
1667 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1673 * Creates an ia32 RotR.
1674 * NOTE: There is no RotR with immediate because this would always be a RotL
1675 * "imm-mode_size_bits" which can be pre-calculated.
1677 * @param op1 The first operator
1678 * @param op2 The second operator
1679 * @return The created ia32 RotR node
1681 static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
1682 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1688 * Creates an ia32 RotR or RotL (depending on the found pattern).
1690 * @return The created ia32 RotL or RotR node
1692 static ir_node *gen_Rot(ir_node *node) {
1693 ir_node *rotate = NULL;
1694 ir_node *op1 = get_Rot_left(node);
1695 ir_node *op2 = get_Rot_right(node);
1697 /* Firm has only Rot (which is a RotL), so we are looking for a right (op2)
1698 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1699 that means we can create a RotR instead of an Add and a RotL */
1701 if (get_irn_op(op2) == op_Add) {
1703 ir_node *left = get_Add_left(add);
1704 ir_node *right = get_Add_right(add);
1705 if (is_Const(right)) {
1706 tarval *tv = get_Const_tarval(right);
1707 ir_mode *mode = get_irn_mode(node);
1708 long bits = get_mode_size_bits(mode);
1710 if (get_irn_op(left) == op_Minus &&
1711 tarval_is_long(tv) &&
1712 get_tarval_long(tv) == bits &&
1715 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1716 rotate = gen_RotR(node, op1, get_Minus_op(left));
1721 if (rotate == NULL) {
1722 rotate = gen_RotL(node, op1, op2);
1731 * Transforms a Minus node.
1733 * @return The created ia32 Minus node
1735 static ir_node *gen_Minus(ir_node *node)
1737 ir_node *op = get_Minus_op(node);
1738 ir_node *block = be_transform_node(get_nodes_block(node));
1739 ir_graph *irg = current_ir_graph;
1740 dbg_info *dbgi = get_irn_dbg_info(node);
1741 ir_mode *mode = get_irn_mode(node);
1746 if (mode_is_float(mode)) {
1747 ir_node *new_op = be_transform_node(op);
1748 if (ia32_cg_config.use_sse2) {
1749 /* TODO: non-optimal... if we have many xXors, then we should
1750 * rather create a load for the const and use that instead of
1751 * several AM nodes... */
1752 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1753 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1754 ir_node *nomem = new_rd_NoMem(irg);
1756 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1757 nomem, new_op, noreg_xmm);
1759 size = get_mode_size_bits(mode);
1760 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1762 set_ia32_am_sc(new_node, ent);
1763 set_ia32_op_type(new_node, ia32_AddrModeS);
1764 set_ia32_ls_mode(new_node, mode);
1766 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1769 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1772 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1778 * Transforms a Not node.
1780 * @return The created ia32 Not node
1782 static ir_node *gen_Not(ir_node *node) {
1783 ir_node *op = get_Not_op(node);
1785 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1786 assert (! mode_is_float(get_irn_mode(node)));
1788 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1794 * Transforms an Abs node.
1796 * @return The created ia32 Abs node
1798 static ir_node *gen_Abs(ir_node *node)
1800 ir_node *block = get_nodes_block(node);
1801 ir_node *new_block = be_transform_node(block);
1802 ir_node *op = get_Abs_op(node);
1803 ir_graph *irg = current_ir_graph;
1804 dbg_info *dbgi = get_irn_dbg_info(node);
1805 ir_mode *mode = get_irn_mode(node);
1806 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1807 ir_node *nomem = new_NoMem();
1813 if (mode_is_float(mode)) {
1814 new_op = be_transform_node(op);
1816 if (ia32_cg_config.use_sse2) {
1817 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1818 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1819 nomem, new_op, noreg_fp);
1821 size = get_mode_size_bits(mode);
1822 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1824 set_ia32_am_sc(new_node, ent);
1826 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1828 set_ia32_op_type(new_node, ia32_AddrModeS);
1829 set_ia32_ls_mode(new_node, mode);
1831 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1832 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1835 ir_node *xor, *pval, *sign_extension;
1837 if (get_mode_size_bits(mode) == 32) {
1838 new_op = be_transform_node(op);
1840 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1843 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1844 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1847 add_irn_dep(pval, get_irg_frame(irg));
1848 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1850 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1851 nomem, new_op, sign_extension);
1852 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1854 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1855 nomem, xor, sign_extension);
1856 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1862 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1864 ir_graph *irg = current_ir_graph;
1872 /* we have a Cmp as input */
1874 ir_node *pred = get_Proj_pred(node);
1876 flags = be_transform_node(pred);
1877 *pnc_out = get_Proj_proj(node);
1882 /* a mode_b value, we have to compare it against 0 */
1883 dbgi = get_irn_dbg_info(node);
1884 new_block = be_transform_node(get_nodes_block(node));
1885 new_op = be_transform_node(node);
1886 noreg = ia32_new_NoReg_gp(env_cg);
1887 nomem = new_NoMem();
1888 flags = new_rd_ia32_Test(dbgi, irg, new_block, noreg, noreg, nomem,
1889 new_op, new_op, 0, 0);
1890 *pnc_out = pn_Cmp_Lg;
1895 * Transforms a Load.
1897 * @return the created ia32 Load node
1899 static ir_node *gen_Load(ir_node *node) {
1900 ir_node *old_block = get_nodes_block(node);
1901 ir_node *block = be_transform_node(old_block);
1902 ir_node *ptr = get_Load_ptr(node);
1903 ir_node *mem = get_Load_mem(node);
1904 ir_node *new_mem = be_transform_node(mem);
1907 ir_graph *irg = current_ir_graph;
1908 dbg_info *dbgi = get_irn_dbg_info(node);
1909 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1910 ir_mode *mode = get_Load_mode(node);
1913 ia32_address_t addr;
1915 /* construct load address */
1916 memset(&addr, 0, sizeof(addr));
1917 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1924 base = be_transform_node(base);
1930 index = be_transform_node(index);
1933 if (mode_is_float(mode)) {
1934 if (ia32_cg_config.use_sse2) {
1935 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1937 res_mode = mode_xmm;
1939 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1941 res_mode = mode_vfp;
1944 assert(mode != mode_b);
1946 /* create a conv node with address mode for smaller modes */
1947 if(get_mode_size_bits(mode) < 32) {
1948 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1949 new_mem, noreg, mode);
1951 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1956 set_irn_pinned(new_node, get_irn_pinned(node));
1957 set_ia32_op_type(new_node, ia32_AddrModeS);
1958 set_ia32_ls_mode(new_node, mode);
1959 set_address(new_node, &addr);
1961 if(get_irn_pinned(node) == op_pin_state_floats) {
1962 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1965 /* make sure we are scheduled behind the initial IncSP/Barrier
1966 * to avoid spills being placed before it
1968 if (block == get_irg_start_block(irg)) {
1969 add_irn_dep(new_node, get_irg_frame(irg));
1972 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1977 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1978 ir_node *ptr, ir_node *other)
1985 /* we only use address mode if we're the only user of the load */
1986 if(get_irn_n_edges(node) > 1)
1989 load = get_Proj_pred(node);
1992 if(get_nodes_block(load) != block)
1995 /* Store should be attached to the load */
1996 if(!is_Proj(mem) || get_Proj_pred(mem) != load)
1998 /* store should have the same pointer as the load */
1999 if(get_Load_ptr(load) != ptr)
2002 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2003 if(other != NULL && get_nodes_block(other) == block
2004 && heights_reachable_in_block(heights, other, load))
2010 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2011 ir_node *mem, ir_node *ptr, ir_mode *mode,
2012 construct_binop_dest_func *func,
2013 construct_binop_dest_func *func8bit,
2014 match_flags_t flags)
2016 ir_node *src_block = get_nodes_block(node);
2018 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
2019 ir_graph *irg = current_ir_graph;
2024 ia32_address_mode_t am;
2025 ia32_address_t *addr = &am.addr;
2026 memset(&am, 0, sizeof(am));
2028 assert(flags & match_dest_am);
2029 assert(flags & match_immediate); /* there is no destam node without... */
2030 commutative = (flags & match_commutative) != 0;
2032 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
2033 build_address(&am, op1);
2034 new_op = create_immediate_or_transform(op2, 0);
2035 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2036 build_address(&am, op2);
2037 new_op = create_immediate_or_transform(op1, 0);
2042 if(addr->base == NULL)
2043 addr->base = noreg_gp;
2044 if(addr->index == NULL)
2045 addr->index = noreg_gp;
2046 if(addr->mem == NULL)
2047 addr->mem = new_NoMem();
2049 dbgi = get_irn_dbg_info(node);
2050 block = be_transform_node(src_block);
2051 if(get_mode_size_bits(mode) == 8) {
2052 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2055 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
2058 set_address(new_node, addr);
2059 set_ia32_op_type(new_node, ia32_AddrModeD);
2060 set_ia32_ls_mode(new_node, mode);
2061 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2066 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2067 ir_node *ptr, ir_mode *mode,
2068 construct_unop_dest_func *func)
2070 ir_graph *irg = current_ir_graph;
2071 ir_node *src_block = get_nodes_block(node);
2075 ia32_address_mode_t am;
2076 ia32_address_t *addr = &am.addr;
2077 memset(&am, 0, sizeof(am));
2079 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2082 build_address(&am, op);
2084 dbgi = get_irn_dbg_info(node);
2085 block = be_transform_node(src_block);
2086 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
2087 set_address(new_node, addr);
2088 set_ia32_op_type(new_node, ia32_AddrModeD);
2089 set_ia32_ls_mode(new_node, mode);
2090 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2095 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2096 ir_mode *mode = get_irn_mode(node);
2097 ir_node *psi_true = get_Psi_val(node, 0);
2098 ir_node *psi_default = get_Psi_default(node);
2109 ia32_address_t addr;
2111 if(get_mode_size_bits(mode) != 8)
2114 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2116 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2122 build_address_ptr(&addr, ptr, mem);
2124 irg = current_ir_graph;
2125 dbgi = get_irn_dbg_info(node);
2126 block = get_nodes_block(node);
2127 new_block = be_transform_node(block);
2128 cond = get_Psi_cond(node, 0);
2129 flags = get_flags_node(cond, &pnc);
2130 new_mem = be_transform_node(mem);
2131 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2132 addr.index, addr.mem, flags, pnc, negated);
2133 set_address(new_node, &addr);
2134 set_ia32_op_type(new_node, ia32_AddrModeD);
2135 set_ia32_ls_mode(new_node, mode);
2136 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2141 static ir_node *try_create_dest_am(ir_node *node) {
2142 ir_node *val = get_Store_value(node);
2143 ir_node *mem = get_Store_mem(node);
2144 ir_node *ptr = get_Store_ptr(node);
2145 ir_mode *mode = get_irn_mode(val);
2146 unsigned bits = get_mode_size_bits(mode);
2151 /* handle only GP modes for now... */
2152 if(!mode_needs_gp_reg(mode))
2156 /* store must be the only user of the val node */
2157 if(get_irn_n_edges(val) > 1)
2159 /* skip pointless convs */
2161 ir_node *conv_op = get_Conv_op(val);
2162 ir_mode *pred_mode = get_irn_mode(conv_op);
2163 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2171 /* value must be in the same block */
2172 if(get_nodes_block(node) != get_nodes_block(val))
2175 switch(get_irn_opcode(val)) {
2177 op1 = get_Add_left(val);
2178 op2 = get_Add_right(val);
2179 if(is_Const_1(op2)) {
2180 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2181 new_rd_ia32_IncMem);
2183 } else if(is_Const_Minus_1(op2)) {
2184 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2185 new_rd_ia32_DecMem);
2188 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2189 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2190 match_dest_am | match_commutative |
2194 op1 = get_Sub_left(val);
2195 op2 = get_Sub_right(val);
2197 ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
2200 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2201 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2202 match_dest_am | match_immediate |
2206 op1 = get_And_left(val);
2207 op2 = get_And_right(val);
2208 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2209 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2210 match_dest_am | match_commutative |
2214 op1 = get_Or_left(val);
2215 op2 = get_Or_right(val);
2216 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2217 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2218 match_dest_am | match_commutative |
2222 op1 = get_Eor_left(val);
2223 op2 = get_Eor_right(val);
2224 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2225 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2226 match_dest_am | match_commutative |
2230 op1 = get_Shl_left(val);
2231 op2 = get_Shl_right(val);
2232 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2233 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2234 match_dest_am | match_immediate);
2237 op1 = get_Shr_left(val);
2238 op2 = get_Shr_right(val);
2239 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2240 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2241 match_dest_am | match_immediate);
2244 op1 = get_Shrs_left(val);
2245 op2 = get_Shrs_right(val);
2246 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2247 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2248 match_dest_am | match_immediate);
2251 op1 = get_Rot_left(val);
2252 op2 = get_Rot_right(val);
2253 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2254 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2255 match_dest_am | match_immediate);
2257 /* TODO: match ROR patterns... */
2259 new_node = try_create_SetMem(val, ptr, mem);
2262 op1 = get_Minus_op(val);
2263 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2266 /* should be lowered already */
2267 assert(mode != mode_b);
2268 op1 = get_Not_op(val);
2269 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2275 if(new_node != NULL) {
2276 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2277 get_irn_pinned(node) == op_pin_state_pinned) {
2278 set_irn_pinned(new_node, op_pin_state_pinned);
2285 static int is_float_to_int32_conv(const ir_node *node)
2287 ir_mode *mode = get_irn_mode(node);
2291 if(get_mode_size_bits(mode) != 32 || !mode_needs_gp_reg(mode))
2296 conv_op = get_Conv_op(node);
2297 conv_mode = get_irn_mode(conv_op);
2299 if(!mode_is_float(conv_mode))
2306 * Transform a Store(floatConst).
2308 * @return the created ia32 Store node
2310 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns) {
2311 ir_mode *mode = get_irn_mode(cns);
2312 int size = get_mode_size_bits(mode);
2313 tarval *tv = get_Const_tarval(cns);
2314 ir_node *block = get_nodes_block(node);
2315 ir_node *new_block = be_transform_node(block);
2316 ir_node *ptr = get_Store_ptr(node);
2317 ir_node *mem = get_Store_mem(node);
2318 ir_graph *irg = current_ir_graph;
2319 dbg_info *dbgi = get_irn_dbg_info(node);
2320 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2323 ia32_address_t addr;
2325 unsigned val = get_tarval_sub_bits(tv, 0) |
2326 (get_tarval_sub_bits(tv, 1) << 8) |
2327 (get_tarval_sub_bits(tv, 2) << 16) |
2328 (get_tarval_sub_bits(tv, 3) << 24);
2329 ir_node *imm = create_Immediate(NULL, 0, val);
2331 /* construct store address */
2332 memset(&addr, 0, sizeof(addr));
2333 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2335 if (addr.base == NULL) {
2338 addr.base = be_transform_node(addr.base);
2341 if (addr.index == NULL) {
2344 addr.index = be_transform_node(addr.index);
2346 addr.mem = be_transform_node(mem);
2348 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2349 addr.index, addr.mem, imm);
2351 set_irn_pinned(new_node, get_irn_pinned(node));
2352 set_ia32_op_type(new_node, ia32_AddrModeD);
2353 set_ia32_ls_mode(new_node, mode);
2355 set_address(new_node, &addr);
2357 /** add more stores if needed */
2359 unsigned val = get_tarval_sub_bits(tv, ofs) |
2360 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2361 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2362 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2363 ir_node *imm = create_Immediate(NULL, 0, val);
2366 addr.mem = new_node;
2368 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2369 addr.index, addr.mem, imm);
2371 set_irn_pinned(new_node, get_irn_pinned(node));
2372 set_ia32_op_type(new_node, ia32_AddrModeD);
2373 set_ia32_ls_mode(new_node, mode);
2375 set_address(new_node, &addr);
2380 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2385 * Transforms a normal Store.
2387 * @return the created ia32 Store node
2389 static ir_node *gen_normal_Store(ir_node *node)
2391 ir_node *val = get_Store_value(node);
2392 ir_mode *mode = get_irn_mode(val);
2393 ir_node *block = get_nodes_block(node);
2394 ir_node *new_block = be_transform_node(block);
2395 ir_node *ptr = get_Store_ptr(node);
2396 ir_node *mem = get_Store_mem(node);
2397 ir_graph *irg = current_ir_graph;
2398 dbg_info *dbgi = get_irn_dbg_info(node);
2399 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2402 ia32_address_t addr;
2404 /* check for destination address mode */
2405 new_node = try_create_dest_am(node);
2406 if (new_node != NULL)
2409 /* construct store address */
2410 memset(&addr, 0, sizeof(addr));
2411 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2413 if (addr.base == NULL) {
2416 addr.base = be_transform_node(addr.base);
2419 if (addr.index == NULL) {
2422 addr.index = be_transform_node(addr.index);
2424 addr.mem = be_transform_node(mem);
2426 if (mode_is_float(mode)) {
2427 /* convs (and strict-convs) before stores are unnecessary if the mode
2429 while (is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2430 val = get_Conv_op(val);
2432 new_val = be_transform_node(val);
2433 if (ia32_cg_config.use_sse2) {
2434 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2435 addr.index, addr.mem, new_val);
2437 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2438 addr.index, addr.mem, new_val, mode);
2440 } else if (is_float_to_int32_conv(val)) {
2441 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2442 val = get_Conv_op(val);
2444 /* convs (and strict-convs) before stores are unnecessary if the mode
2446 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2447 val = get_Conv_op(val);
2449 new_val = be_transform_node(val);
2451 new_node = new_rd_ia32_vfist(dbgi, irg, new_block, addr.base,
2452 addr.index, addr.mem, new_val, trunc_mode);
2454 new_val = create_immediate_or_transform(val, 0);
2455 assert(mode != mode_b);
2457 if (get_mode_size_bits(mode) == 8) {
2458 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2459 addr.index, addr.mem, new_val);
2461 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2462 addr.index, addr.mem, new_val);
2466 set_irn_pinned(new_node, get_irn_pinned(node));
2467 set_ia32_op_type(new_node, ia32_AddrModeD);
2468 set_ia32_ls_mode(new_node, mode);
2470 set_address(new_node, &addr);
2471 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2477 * Transforms a Store.
2479 * @return the created ia32 Store node
2481 static ir_node *gen_Store(ir_node *node)
2483 ir_node *val = get_Store_value(node);
2484 ir_mode *mode = get_irn_mode(val);
2486 if (mode_is_float(mode) && is_Const(val)) {
2489 /* we are storing a floating point constant */
2490 if (ia32_cg_config.use_sse2) {
2491 transform = !is_simple_sse_Const(val);
2493 transform = !is_simple_x87_Const(val);
2496 return gen_float_const_Store(node, val);
2498 return gen_normal_Store(node);
2502 * Transforms a Switch.
2504 * @return the created ia32 SwitchJmp node
2506 static ir_node *create_Switch(ir_node *node)
2508 ir_graph *irg = current_ir_graph;
2509 dbg_info *dbgi = get_irn_dbg_info(node);
2510 ir_node *block = be_transform_node(get_nodes_block(node));
2511 ir_node *sel = get_Cond_selector(node);
2512 ir_node *new_sel = be_transform_node(sel);
2513 int switch_min = INT_MAX;
2514 int switch_max = INT_MIN;
2515 long default_pn = get_Cond_defaultProj(node);
2517 const ir_edge_t *edge;
2519 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2521 /* determine the smallest switch case value */
2522 foreach_out_edge(node, edge) {
2523 ir_node *proj = get_edge_src_irn(edge);
2524 long pn = get_Proj_proj(proj);
2525 if(pn == default_pn)
2534 if((unsigned) (switch_max - switch_min) > 256000) {
2535 panic("Size of switch %+F bigger than 256000", node);
2538 if (switch_min != 0) {
2539 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2541 /* if smallest switch case is not 0 we need an additional sub */
2542 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2543 add_ia32_am_offs_int(new_sel, -switch_min);
2544 set_ia32_op_type(new_sel, ia32_AddrModeS);
2546 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2549 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2550 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2556 * Transform a Cond node.
2558 static ir_node *gen_Cond(ir_node *node) {
2559 ir_node *block = get_nodes_block(node);
2560 ir_node *new_block = be_transform_node(block);
2561 ir_graph *irg = current_ir_graph;
2562 dbg_info *dbgi = get_irn_dbg_info(node);
2563 ir_node *sel = get_Cond_selector(node);
2564 ir_mode *sel_mode = get_irn_mode(sel);
2565 ir_node *flags = NULL;
2569 if (sel_mode != mode_b) {
2570 return create_Switch(node);
2573 /* we get flags from a cmp */
2574 flags = get_flags_node(sel, &pnc);
2576 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2577 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2583 * Transforms a CopyB node.
2585 * @return The transformed node.
2587 static ir_node *gen_CopyB(ir_node *node) {
2588 ir_node *block = be_transform_node(get_nodes_block(node));
2589 ir_node *src = get_CopyB_src(node);
2590 ir_node *new_src = be_transform_node(src);
2591 ir_node *dst = get_CopyB_dst(node);
2592 ir_node *new_dst = be_transform_node(dst);
2593 ir_node *mem = get_CopyB_mem(node);
2594 ir_node *new_mem = be_transform_node(mem);
2595 ir_node *res = NULL;
2596 ir_graph *irg = current_ir_graph;
2597 dbg_info *dbgi = get_irn_dbg_info(node);
2598 int size = get_type_size_bytes(get_CopyB_type(node));
2601 /* If we have to copy more than 32 bytes, we use REP MOVSx and */
2602 /* then we need the size explicitly in ECX. */
2603 if (size >= 32 * 4) {
2604 rem = size & 0x3; /* size % 4 */
2607 res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
2608 add_irn_dep(res, get_irg_frame(irg));
2610 res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
2613 ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
2616 res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
2619 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
2624 static ir_node *gen_be_Copy(ir_node *node)
2626 ir_node *new_node = be_duplicate_node(node);
2627 ir_mode *mode = get_irn_mode(new_node);
2629 if (mode_needs_gp_reg(mode)) {
2630 set_irn_mode(new_node, mode_Iu);
2636 static ir_node *create_Fucom(ir_node *node)
2638 ir_graph *irg = current_ir_graph;
2639 dbg_info *dbgi = get_irn_dbg_info(node);
2640 ir_node *block = get_nodes_block(node);
2641 ir_node *new_block = be_transform_node(block);
2642 ir_node *left = get_Cmp_left(node);
2643 ir_node *new_left = be_transform_node(left);
2644 ir_node *right = get_Cmp_right(node);
2648 if(ia32_cg_config.use_fucomi) {
2649 new_right = be_transform_node(right);
2650 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2652 set_ia32_commutative(new_node);
2653 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2655 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2656 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2659 new_right = be_transform_node(right);
2660 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2664 set_ia32_commutative(new_node);
2666 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2668 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2669 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2675 static ir_node *create_Ucomi(ir_node *node)
2677 ir_graph *irg = current_ir_graph;
2678 dbg_info *dbgi = get_irn_dbg_info(node);
2679 ir_node *src_block = get_nodes_block(node);
2680 ir_node *new_block = be_transform_node(src_block);
2681 ir_node *left = get_Cmp_left(node);
2682 ir_node *right = get_Cmp_right(node);
2684 ia32_address_mode_t am;
2685 ia32_address_t *addr = &am.addr;
2687 match_arguments(&am, src_block, left, right, NULL,
2688 match_commutative | match_am);
2690 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2691 addr->mem, am.new_op1, am.new_op2,
2693 set_am_attributes(new_node, &am);
2695 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2697 new_node = fix_mem_proj(new_node, &am);
2703 * helper function: checks wether all Cmp projs are Lg or Eq which is needed
2704 * to fold an and into a test node
2706 static int can_fold_test_and(ir_node *node)
2708 const ir_edge_t *edge;
2710 /** we can only have eq and lg projs */
2711 foreach_out_edge(node, edge) {
2712 ir_node *proj = get_edge_src_irn(edge);
2713 pn_Cmp pnc = get_Proj_proj(proj);
2714 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2721 static ir_node *gen_Cmp(ir_node *node)
2723 ir_graph *irg = current_ir_graph;
2724 dbg_info *dbgi = get_irn_dbg_info(node);
2725 ir_node *block = get_nodes_block(node);
2726 ir_node *new_block = be_transform_node(block);
2727 ir_node *left = get_Cmp_left(node);
2728 ir_node *right = get_Cmp_right(node);
2729 ir_mode *cmp_mode = get_irn_mode(left);
2731 ia32_address_mode_t am;
2732 ia32_address_t *addr = &am.addr;
2735 if(mode_is_float(cmp_mode)) {
2736 if (ia32_cg_config.use_sse2) {
2737 return create_Ucomi(node);
2739 return create_Fucom(node);
2743 assert(mode_needs_gp_reg(cmp_mode));
2745 /* we prefer the Test instruction where possible except cases where
2746 * we can use SourceAM */
2747 cmp_unsigned = !mode_is_signed(cmp_mode);
2748 if (is_Const_0(right)) {
2750 get_irn_n_edges(left) == 1 &&
2751 can_fold_test_and(node)) {
2752 /* Test(and_left, and_right) */
2753 ir_node *and_left = get_And_left(left);
2754 ir_node *and_right = get_And_right(left);
2755 ir_mode *mode = get_irn_mode(and_left);
2757 match_arguments(&am, block, and_left, and_right, NULL,
2759 match_am | match_8bit_am | match_16bit_am |
2760 match_am_and_immediates | match_immediate |
2761 match_8bit | match_16bit);
2762 if (get_mode_size_bits(mode) == 8) {
2763 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2764 addr->index, addr->mem, am.new_op1,
2765 am.new_op2, am.ins_permuted,
2768 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2769 addr->index, addr->mem, am.new_op1,
2770 am.new_op2, am.ins_permuted, cmp_unsigned);
2773 match_arguments(&am, block, NULL, left, NULL,
2774 match_am | match_8bit_am | match_16bit_am |
2775 match_8bit | match_16bit);
2776 if (am.op_type == ia32_AddrModeS) {
2778 ir_node *imm_zero = try_create_Immediate(right, 0);
2779 if (get_mode_size_bits(cmp_mode) == 8) {
2780 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2781 addr->index, addr->mem, am.new_op2,
2782 imm_zero, am.ins_permuted,
2785 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2786 addr->index, addr->mem, am.new_op2,
2787 imm_zero, am.ins_permuted, cmp_unsigned);
2790 /* Test(left, left) */
2791 if (get_mode_size_bits(cmp_mode) == 8) {
2792 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2793 addr->index, addr->mem, am.new_op2,
2794 am.new_op2, am.ins_permuted,
2797 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2798 addr->index, addr->mem, am.new_op2,
2799 am.new_op2, am.ins_permuted,
2805 /* Cmp(left, right) */
2806 match_arguments(&am, block, left, right, NULL,
2807 match_commutative | match_am | match_8bit_am |
2808 match_16bit_am | match_am_and_immediates |
2809 match_immediate | match_8bit | match_16bit);
2810 if (get_mode_size_bits(cmp_mode) == 8) {
2811 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2812 addr->index, addr->mem, am.new_op1,
2813 am.new_op2, am.ins_permuted,
2816 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2817 addr->index, addr->mem, am.new_op1,
2818 am.new_op2, am.ins_permuted, cmp_unsigned);
2821 set_am_attributes(new_node, &am);
2822 assert(cmp_mode != NULL);
2823 set_ia32_ls_mode(new_node, cmp_mode);
2825 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2827 new_node = fix_mem_proj(new_node, &am);
2832 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2835 ir_graph *irg = current_ir_graph;
2836 dbg_info *dbgi = get_irn_dbg_info(node);
2837 ir_node *block = get_nodes_block(node);
2838 ir_node *new_block = be_transform_node(block);
2839 ir_node *val_true = get_Psi_val(node, 0);
2840 ir_node *val_false = get_Psi_default(node);
2842 match_flags_t match_flags;
2843 ia32_address_mode_t am;
2844 ia32_address_t *addr;
2846 assert(ia32_cg_config.use_cmov);
2847 assert(mode_needs_gp_reg(get_irn_mode(val_true)));
2851 match_flags = match_commutative | match_am | match_16bit_am |
2854 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2856 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2857 addr->mem, am.new_op1, am.new_op2, new_flags,
2858 am.ins_permuted, pnc);
2859 set_am_attributes(new_node, &am);
2861 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2863 new_node = fix_mem_proj(new_node, &am);
2870 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2871 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2874 ir_graph *irg = current_ir_graph;
2875 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2876 ir_node *nomem = new_NoMem();
2877 ir_mode *mode = get_irn_mode(orig_node);
2880 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2881 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2883 /* we might need to conv the result up */
2884 if(get_mode_size_bits(mode) > 8) {
2885 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2886 nomem, new_node, mode_Bu);
2887 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2894 * Transforms a Psi node into CMov.
2896 * @return The transformed node.
2898 static ir_node *gen_Psi(ir_node *node)
2900 dbg_info *dbgi = get_irn_dbg_info(node);
2901 ir_node *block = get_nodes_block(node);
2902 ir_node *new_block = be_transform_node(block);
2903 ir_node *psi_true = get_Psi_val(node, 0);
2904 ir_node *psi_default = get_Psi_default(node);
2905 ir_node *cond = get_Psi_cond(node, 0);
2906 ir_node *flags = NULL;
2910 assert(get_Psi_n_conds(node) == 1);
2911 assert(get_irn_mode(cond) == mode_b);
2912 assert(mode_needs_gp_reg(get_irn_mode(node)));
2914 flags = get_flags_node(cond, &pnc);
2916 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2917 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 0);
2918 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2919 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 1);
2921 new_node = create_CMov(node, cond, flags, pnc);
2928 * Create a conversion from x87 state register to general purpose.
2930 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
2931 ir_node *block = be_transform_node(get_nodes_block(node));
2932 ir_node *op = get_Conv_op(node);
2933 ir_node *new_op = be_transform_node(op);
2934 ia32_code_gen_t *cg = env_cg;
2935 ir_graph *irg = current_ir_graph;
2936 dbg_info *dbgi = get_irn_dbg_info(node);
2937 ir_node *noreg = ia32_new_NoReg_gp(cg);
2938 ir_node *trunc_mode = ia32_new_Fpu_truncate(cg);
2939 ir_mode *mode = get_irn_mode(node);
2940 ir_node *fist, *load;
2943 fist = new_rd_ia32_vfist(dbgi, irg, block, get_irg_frame(irg), noreg,
2944 new_NoMem(), new_op, trunc_mode);
2946 set_irn_pinned(fist, op_pin_state_floats);
2947 set_ia32_use_frame(fist);
2948 set_ia32_op_type(fist, ia32_AddrModeD);
2950 assert(get_mode_size_bits(mode) <= 32);
2951 /* exception we can only store signed 32 bit integers, so for unsigned
2952 we store a 64bit (signed) integer and load the lower bits */
2953 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
2954 set_ia32_ls_mode(fist, mode_Ls);
2956 set_ia32_ls_mode(fist, mode_Is);
2958 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
2961 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, fist);
2963 set_irn_pinned(load, op_pin_state_floats);
2964 set_ia32_use_frame(load);
2965 set_ia32_op_type(load, ia32_AddrModeS);
2966 set_ia32_ls_mode(load, mode_Is);
2967 if(get_ia32_ls_mode(fist) == mode_Ls) {
2968 ia32_attr_t *attr = get_ia32_attr(load);
2969 attr->data.need_64bit_stackent = 1;
2971 ia32_attr_t *attr = get_ia32_attr(load);
2972 attr->data.need_32bit_stackent = 1;
2974 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
2976 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
2980 * Creates a x87 strict Conv by placing a Sore and a Load
2982 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
2984 ir_node *block = get_nodes_block(node);
2985 ir_graph *irg = current_ir_graph;
2986 dbg_info *dbgi = get_irn_dbg_info(node);
2987 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2988 ir_node *nomem = new_NoMem();
2989 ir_node *frame = get_irg_frame(irg);
2990 ir_node *store, *load;
2993 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
2995 set_ia32_use_frame(store);
2996 set_ia32_op_type(store, ia32_AddrModeD);
2997 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
2999 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
3001 set_ia32_use_frame(load);
3002 set_ia32_op_type(load, ia32_AddrModeS);
3003 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3005 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3010 * Create a conversion from general purpose to x87 register
3012 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
3013 ir_node *src_block = get_nodes_block(node);
3014 ir_node *block = be_transform_node(src_block);
3015 ir_graph *irg = current_ir_graph;
3016 dbg_info *dbgi = get_irn_dbg_info(node);
3017 ir_node *op = get_Conv_op(node);
3018 ir_node *new_op = NULL;
3022 ir_mode *store_mode;
3028 /* fild can use source AM if the operand is a signed 32bit integer */
3029 if (src_mode == mode_Is) {
3030 ia32_address_mode_t am;
3032 match_arguments(&am, src_block, NULL, op, NULL,
3033 match_am | match_try_am);
3034 if (am.op_type == ia32_AddrModeS) {
3035 ia32_address_t *addr = &am.addr;
3037 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
3038 addr->index, addr->mem);
3039 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3042 set_am_attributes(fild, &am);
3043 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3045 fix_mem_proj(fild, &am);
3050 if(new_op == NULL) {
3051 new_op = be_transform_node(op);
3054 noreg = ia32_new_NoReg_gp(env_cg);
3055 nomem = new_NoMem();
3056 mode = get_irn_mode(op);
3058 /* first convert to 32 bit signed if necessary */
3059 src_bits = get_mode_size_bits(src_mode);
3060 if (src_bits == 8) {
3061 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
3063 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3065 } else if (src_bits < 32) {
3066 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
3068 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3072 assert(get_mode_size_bits(mode) == 32);
3075 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
3078 set_ia32_use_frame(store);
3079 set_ia32_op_type(store, ia32_AddrModeD);
3080 set_ia32_ls_mode(store, mode_Iu);
3082 /* exception for 32bit unsigned, do a 64bit spill+load */
3083 if(!mode_is_signed(mode)) {
3086 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3088 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
3089 get_irg_frame(irg), noreg, nomem,
3092 set_ia32_use_frame(zero_store);
3093 set_ia32_op_type(zero_store, ia32_AddrModeD);
3094 add_ia32_am_offs_int(zero_store, 4);
3095 set_ia32_ls_mode(zero_store, mode_Iu);
3100 store = new_rd_Sync(dbgi, irg, block, 2, in);
3101 store_mode = mode_Ls;
3103 store_mode = mode_Is;
3107 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3109 set_ia32_use_frame(fild);
3110 set_ia32_op_type(fild, ia32_AddrModeS);
3111 set_ia32_ls_mode(fild, store_mode);
3113 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3119 * Create a conversion from one integer mode into another one
3121 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3122 dbg_info *dbgi, ir_node *block, ir_node *op,
3125 ir_graph *irg = current_ir_graph;
3126 int src_bits = get_mode_size_bits(src_mode);
3127 int tgt_bits = get_mode_size_bits(tgt_mode);
3128 ir_node *new_block = be_transform_node(block);
3130 ir_mode *smaller_mode;
3132 ia32_address_mode_t am;
3133 ia32_address_t *addr = &am.addr;
3136 if (src_bits < tgt_bits) {
3137 smaller_mode = src_mode;
3138 smaller_bits = src_bits;
3140 smaller_mode = tgt_mode;
3141 smaller_bits = tgt_bits;
3144 #ifdef DEBUG_libfirm
3146 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3151 match_arguments(&am, block, NULL, op, NULL,
3152 match_8bit | match_16bit |
3153 match_am | match_8bit_am | match_16bit_am);
3154 if (smaller_bits == 8) {
3155 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3156 addr->index, addr->mem, am.new_op2,
3159 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3160 addr->index, addr->mem, am.new_op2,
3163 set_am_attributes(new_node, &am);
3164 /* match_arguments assume that out-mode = in-mode, this isn't true here
3166 set_ia32_ls_mode(new_node, smaller_mode);
3167 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3168 new_node = fix_mem_proj(new_node, &am);
3173 * Transforms a Conv node.
3175 * @return The created ia32 Conv node
3177 static ir_node *gen_Conv(ir_node *node) {
3178 ir_node *block = get_nodes_block(node);
3179 ir_node *new_block = be_transform_node(block);
3180 ir_node *op = get_Conv_op(node);
3181 ir_node *new_op = NULL;
3182 ir_graph *irg = current_ir_graph;
3183 dbg_info *dbgi = get_irn_dbg_info(node);
3184 ir_mode *src_mode = get_irn_mode(op);
3185 ir_mode *tgt_mode = get_irn_mode(node);
3186 int src_bits = get_mode_size_bits(src_mode);
3187 int tgt_bits = get_mode_size_bits(tgt_mode);
3188 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3189 ir_node *nomem = new_rd_NoMem(irg);
3190 ir_node *res = NULL;
3192 if (src_mode == mode_b) {
3193 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3194 /* nothing to do, we already model bools as 0/1 ints */
3195 return be_transform_node(op);
3198 if (src_mode == tgt_mode) {
3199 if (get_Conv_strict(node)) {
3200 if (ia32_cg_config.use_sse2) {
3201 /* when we are in SSE mode, we can kill all strict no-op conversion */
3202 return be_transform_node(op);
3205 /* this should be optimized already, but who knows... */
3206 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3207 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3208 return be_transform_node(op);
3212 if (mode_is_float(src_mode)) {
3213 new_op = be_transform_node(op);
3214 /* we convert from float ... */
3215 if (mode_is_float(tgt_mode)) {
3216 if(src_mode == mode_E && tgt_mode == mode_D
3217 && !get_Conv_strict(node)) {
3218 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3223 if (ia32_cg_config.use_sse2) {
3224 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3225 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3227 set_ia32_ls_mode(res, tgt_mode);
3229 if(get_Conv_strict(node)) {
3230 res = gen_x87_strict_conv(tgt_mode, new_op);
3231 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3234 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3239 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3240 if (ia32_cg_config.use_sse2) {
3241 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3243 set_ia32_ls_mode(res, src_mode);
3245 return gen_x87_fp_to_gp(node);
3249 /* we convert from int ... */
3250 if (mode_is_float(tgt_mode)) {
3252 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3253 if (ia32_cg_config.use_sse2) {
3254 new_op = be_transform_node(op);
3255 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3257 set_ia32_ls_mode(res, tgt_mode);
3259 res = gen_x87_gp_to_fp(node, src_mode);
3260 if(get_Conv_strict(node)) {
3261 res = gen_x87_strict_conv(tgt_mode, res);
3262 SET_IA32_ORIG_NODE(get_Proj_pred(res),
3263 ia32_get_old_node_name(env_cg, node));
3267 } else if(tgt_mode == mode_b) {
3268 /* mode_b lowering already took care that we only have 0/1 values */
3269 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3270 src_mode, tgt_mode));
3271 return be_transform_node(op);
3274 if (src_bits == tgt_bits) {
3275 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3276 src_mode, tgt_mode));
3277 return be_transform_node(op);
3280 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3288 static int check_immediate_constraint(long val, char immediate_constraint_type)
3290 switch (immediate_constraint_type) {
3294 return val >= 0 && val <= 32;
3296 return val >= 0 && val <= 63;
3298 return val >= -128 && val <= 127;
3300 return val == 0xff || val == 0xffff;
3302 return val >= 0 && val <= 3;
3304 return val >= 0 && val <= 255;
3306 return val >= 0 && val <= 127;
3310 panic("Invalid immediate constraint found");
3314 static ir_node *try_create_Immediate(ir_node *node,
3315 char immediate_constraint_type)
3318 tarval *offset = NULL;
3319 int offset_sign = 0;
3321 ir_entity *symconst_ent = NULL;
3322 int symconst_sign = 0;
3324 ir_node *cnst = NULL;
3325 ir_node *symconst = NULL;
3328 mode = get_irn_mode(node);
3329 if(!mode_is_int(mode) && !mode_is_reference(mode)) {
3333 if(is_Minus(node)) {
3335 node = get_Minus_op(node);
3338 if(is_Const(node)) {
3341 offset_sign = minus;
3342 } else if(is_SymConst(node)) {
3345 symconst_sign = minus;
3346 } else if(is_Add(node)) {
3347 ir_node *left = get_Add_left(node);
3348 ir_node *right = get_Add_right(node);
3349 if(is_Const(left) && is_SymConst(right)) {
3352 symconst_sign = minus;
3353 offset_sign = minus;
3354 } else if(is_SymConst(left) && is_Const(right)) {
3357 symconst_sign = minus;
3358 offset_sign = minus;
3360 } else if(is_Sub(node)) {
3361 ir_node *left = get_Sub_left(node);
3362 ir_node *right = get_Sub_right(node);
3363 if(is_Const(left) && is_SymConst(right)) {
3366 symconst_sign = !minus;
3367 offset_sign = minus;
3368 } else if(is_SymConst(left) && is_Const(right)) {
3371 symconst_sign = minus;
3372 offset_sign = !minus;
3379 offset = get_Const_tarval(cnst);
3380 if(tarval_is_long(offset)) {
3381 val = get_tarval_long(offset);
3383 ir_fprintf(stderr, "Optimisation Warning: tarval from %+F is not a "
3388 if(!check_immediate_constraint(val, immediate_constraint_type))
3391 if(symconst != NULL) {
3392 if(immediate_constraint_type != 0) {
3393 /* we need full 32bits for symconsts */
3397 /* unfortunately the assembler/linker doesn't support -symconst */
3401 if(get_SymConst_kind(symconst) != symconst_addr_ent)
3403 symconst_ent = get_SymConst_entity(symconst);
3405 if(cnst == NULL && symconst == NULL)
3408 if(offset_sign && offset != NULL) {
3409 offset = tarval_neg(offset);
3412 new_node = create_Immediate(symconst_ent, symconst_sign, val);
3417 static ir_node *create_immediate_or_transform(ir_node *node,
3418 char immediate_constraint_type)
3420 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3421 if (new_node == NULL) {
3422 new_node = be_transform_node(node);
3427 static const arch_register_req_t no_register_req = {
3428 arch_register_req_type_none,
3429 NULL, /* regclass */
3430 NULL, /* limit bitset */
3432 0 /* different pos */
3436 * An assembler constraint.
3438 typedef struct constraint_t constraint_t;
3439 struct constraint_t {
3442 const arch_register_req_t **out_reqs;
3444 const arch_register_req_t *req;
3445 unsigned immediate_possible;
3446 char immediate_type;
3449 static void parse_asm_constraint(int pos, constraint_t *constraint, const char *c)
3451 int immediate_possible = 0;
3452 char immediate_type = 0;
3453 unsigned limited = 0;
3454 const arch_register_class_t *cls = NULL;
3455 ir_graph *irg = current_ir_graph;
3456 struct obstack *obst = get_irg_obstack(irg);
3457 arch_register_req_t *req;
3458 unsigned *limited_ptr = NULL;
3462 /* TODO: replace all the asserts with nice error messages */
3465 /* a memory constraint: no need to do anything in backend about it
3466 * (the dependencies are already respected by the memory edge of
3468 constraint->req = &no_register_req;
3480 assert(cls == NULL ||
3481 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3482 cls = &ia32_reg_classes[CLASS_ia32_gp];
3483 limited |= 1 << REG_EAX;
3486 assert(cls == NULL ||
3487 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3488 cls = &ia32_reg_classes[CLASS_ia32_gp];
3489 limited |= 1 << REG_EBX;
3492 assert(cls == NULL ||
3493 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3494 cls = &ia32_reg_classes[CLASS_ia32_gp];
3495 limited |= 1 << REG_ECX;
3498 assert(cls == NULL ||
3499 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3500 cls = &ia32_reg_classes[CLASS_ia32_gp];
3501 limited |= 1 << REG_EDX;
3504 assert(cls == NULL ||
3505 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3506 cls = &ia32_reg_classes[CLASS_ia32_gp];
3507 limited |= 1 << REG_EDI;
3510 assert(cls == NULL ||
3511 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3512 cls = &ia32_reg_classes[CLASS_ia32_gp];
3513 limited |= 1 << REG_ESI;
3516 case 'q': /* q means lower part of the regs only, this makes no
3517 * difference to Q for us (we only assigne whole registers) */
3518 assert(cls == NULL ||
3519 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3520 cls = &ia32_reg_classes[CLASS_ia32_gp];
3521 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3525 assert(cls == NULL ||
3526 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3527 cls = &ia32_reg_classes[CLASS_ia32_gp];
3528 limited |= 1 << REG_EAX | 1 << REG_EDX;
3531 assert(cls == NULL ||
3532 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3533 cls = &ia32_reg_classes[CLASS_ia32_gp];
3534 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3535 1 << REG_EDX | 1 << REG_ESI | 1 << REG_EDI |
3542 assert(cls == NULL);
3543 cls = &ia32_reg_classes[CLASS_ia32_gp];
3549 /* TODO: mark values so the x87 simulator knows about t and u */
3550 assert(cls == NULL);
3551 cls = &ia32_reg_classes[CLASS_ia32_vfp];
3556 assert(cls == NULL);
3557 /* TODO: check that sse2 is supported */
3558 cls = &ia32_reg_classes[CLASS_ia32_xmm];
3568 assert(!immediate_possible);
3569 immediate_possible = 1;
3570 immediate_type = *c;
3574 assert(!immediate_possible);
3575 immediate_possible = 1;
3579 assert(!immediate_possible && cls == NULL);
3580 immediate_possible = 1;
3581 cls = &ia32_reg_classes[CLASS_ia32_gp];
3594 assert(constraint->is_in && "can only specify same constraint "
3597 sscanf(c, "%d%n", &same_as, &p);
3605 /* memory constraint no need to do anything in backend about it
3606 * (the dependencies are already respected by the memory edge of
3608 constraint->req = &no_register_req;
3611 case 'E': /* no float consts yet */
3612 case 'F': /* no float consts yet */
3613 case 's': /* makes no sense on x86 */
3614 case 'X': /* we can't support that in firm */
3617 case '<': /* no autodecrement on x86 */
3618 case '>': /* no autoincrement on x86 */
3619 case 'C': /* sse constant not supported yet */
3620 case 'G': /* 80387 constant not supported yet */
3621 case 'y': /* we don't support mmx registers yet */
3622 case 'Z': /* not available in 32 bit mode */
3623 case 'e': /* not available in 32 bit mode */
3624 panic("unsupported asm constraint '%c' found in (%+F)",
3625 *c, current_ir_graph);
3628 panic("unknown asm constraint '%c' found in (%+F)", *c,
3636 const arch_register_req_t *other_constr;
3638 assert(cls == NULL && "same as and register constraint not supported");
3639 assert(!immediate_possible && "same as and immediate constraint not "
3641 assert(same_as < constraint->n_outs && "wrong constraint number in "
3642 "same_as constraint");
3644 other_constr = constraint->out_reqs[same_as];
3646 req = obstack_alloc(obst, sizeof(req[0]));
3647 req->cls = other_constr->cls;
3648 req->type = arch_register_req_type_should_be_same;
3649 req->limited = NULL;
3650 req->other_same = 1U << pos;
3651 req->other_different = 0;
3653 /* switch constraints. This is because in firm we have same_as
3654 * constraints on the output constraints while in the gcc asm syntax
3655 * they are specified on the input constraints */
3656 constraint->req = other_constr;
3657 constraint->out_reqs[same_as] = req;
3658 constraint->immediate_possible = 0;
3662 if(immediate_possible && cls == NULL) {
3663 cls = &ia32_reg_classes[CLASS_ia32_gp];
3665 assert(!immediate_possible || cls == &ia32_reg_classes[CLASS_ia32_gp]);
3666 assert(cls != NULL);
3668 if(immediate_possible) {
3669 assert(constraint->is_in
3670 && "immediate make no sense for output constraints");
3672 /* todo: check types (no float input on 'r' constrained in and such... */
3675 req = obstack_alloc(obst, sizeof(req[0]) + sizeof(unsigned));
3676 limited_ptr = (unsigned*) (req+1);
3678 req = obstack_alloc(obst, sizeof(req[0]));
3680 memset(req, 0, sizeof(req[0]));
3683 req->type = arch_register_req_type_limited;
3684 *limited_ptr = limited;
3685 req->limited = limited_ptr;
3687 req->type = arch_register_req_type_normal;
3691 constraint->req = req;
3692 constraint->immediate_possible = immediate_possible;
3693 constraint->immediate_type = immediate_type;
3696 static void parse_clobber(ir_node *node, int pos, constraint_t *constraint,
3697 const char *clobber)
3699 ir_graph *irg = get_irn_irg(node);
3700 struct obstack *obst = get_irg_obstack(irg);
3701 const arch_register_t *reg = NULL;
3704 arch_register_req_t *req;
3705 const arch_register_class_t *cls;
3710 /* TODO: construct a hashmap instead of doing linear search for clobber
3712 for(c = 0; c < N_CLASSES; ++c) {
3713 cls = & ia32_reg_classes[c];
3714 for(r = 0; r < cls->n_regs; ++r) {
3715 const arch_register_t *temp_reg = arch_register_for_index(cls, r);
3716 if(strcmp(temp_reg->name, clobber) == 0
3717 || (c == CLASS_ia32_gp && strcmp(temp_reg->name+1, clobber) == 0)) {
3726 panic("Register '%s' mentioned in asm clobber is unknown\n", clobber);
3730 assert(reg->index < 32);
3732 limited = obstack_alloc(obst, sizeof(limited[0]));
3733 *limited = 1 << reg->index;
3735 req = obstack_alloc(obst, sizeof(req[0]));
3736 memset(req, 0, sizeof(req[0]));
3737 req->type = arch_register_req_type_limited;
3739 req->limited = limited;
3741 constraint->req = req;
3742 constraint->immediate_possible = 0;
3743 constraint->immediate_type = 0;
3746 static int is_memory_op(const ir_asm_constraint *constraint)
3748 ident *id = constraint->constraint;
3749 const char *str = get_id_str(id);
3752 for(c = str; *c != '\0'; ++c) {
3761 * generates code for a ASM node
3763 static ir_node *gen_ASM(ir_node *node)
3766 ir_graph *irg = current_ir_graph;
3767 ir_node *block = get_nodes_block(node);
3768 ir_node *new_block = be_transform_node(block);
3769 dbg_info *dbgi = get_irn_dbg_info(node);
3773 int n_out_constraints;
3775 const arch_register_req_t **out_reg_reqs;
3776 const arch_register_req_t **in_reg_reqs;
3777 ia32_asm_reg_t *register_map;
3778 unsigned reg_map_size = 0;
3779 struct obstack *obst;
3780 const ir_asm_constraint *in_constraints;
3781 const ir_asm_constraint *out_constraints;
3783 constraint_t parsed_constraint;
3785 arity = get_irn_arity(node);
3786 in = alloca(arity * sizeof(in[0]));
3787 memset(in, 0, arity * sizeof(in[0]));
3789 n_out_constraints = get_ASM_n_output_constraints(node);
3790 n_clobbers = get_ASM_n_clobbers(node);
3791 out_arity = n_out_constraints + n_clobbers;
3792 /* hack to keep space for mem proj */
3796 in_constraints = get_ASM_input_constraints(node);
3797 out_constraints = get_ASM_output_constraints(node);
3798 clobbers = get_ASM_clobbers(node);
3800 /* construct output constraints */
3801 obst = get_irg_obstack(irg);
3802 out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
3803 parsed_constraint.out_reqs = out_reg_reqs;
3804 parsed_constraint.n_outs = n_out_constraints;
3805 parsed_constraint.is_in = 0;
3807 for(i = 0; i < out_arity; ++i) {
3810 if(i < n_out_constraints) {
3811 const ir_asm_constraint *constraint = &out_constraints[i];
3812 c = get_id_str(constraint->constraint);
3813 parse_asm_constraint(i, &parsed_constraint, c);
3815 if(constraint->pos > reg_map_size)
3816 reg_map_size = constraint->pos;
3818 out_reg_reqs[i] = parsed_constraint.req;
3819 } else if(i < out_arity - 1) {
3820 ident *glob_id = clobbers [i - n_out_constraints];
3821 assert(glob_id != NULL);
3822 c = get_id_str(glob_id);
3823 parse_clobber(node, i, &parsed_constraint, c);
3825 out_reg_reqs[i+1] = parsed_constraint.req;
3829 out_reg_reqs[n_out_constraints] = &no_register_req;
3831 /* construct input constraints */
3832 in_reg_reqs = obstack_alloc(obst, arity * sizeof(in_reg_reqs[0]));
3833 parsed_constraint.is_in = 1;
3834 for(i = 0; i < arity; ++i) {
3835 const ir_asm_constraint *constraint = &in_constraints[i];
3836 ident *constr_id = constraint->constraint;
3837 const char *c = get_id_str(constr_id);
3839 parse_asm_constraint(i, &parsed_constraint, c);
3840 in_reg_reqs[i] = parsed_constraint.req;
3842 if(constraint->pos > reg_map_size)
3843 reg_map_size = constraint->pos;
3845 if(parsed_constraint.immediate_possible) {
3846 ir_node *pred = get_irn_n(node, i);
3847 char imm_type = parsed_constraint.immediate_type;
3848 ir_node *immediate = try_create_Immediate(pred, imm_type);
3850 if(immediate != NULL) {
3857 register_map = NEW_ARR_D(ia32_asm_reg_t, obst, reg_map_size);
3858 memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
3860 for(i = 0; i < n_out_constraints; ++i) {
3861 const ir_asm_constraint *constraint = &out_constraints[i];
3862 unsigned pos = constraint->pos;
3864 assert(pos < reg_map_size);
3865 register_map[pos].use_input = 0;
3866 register_map[pos].valid = 1;
3867 register_map[pos].memory = is_memory_op(constraint);
3868 register_map[pos].inout_pos = i;
3869 register_map[pos].mode = constraint->mode;
3872 /* transform inputs */
3873 for(i = 0; i < arity; ++i) {
3874 const ir_asm_constraint *constraint = &in_constraints[i];
3875 unsigned pos = constraint->pos;
3876 ir_node *pred = get_irn_n(node, i);
3877 ir_node *transformed;
3879 assert(pos < reg_map_size);
3880 register_map[pos].use_input = 1;
3881 register_map[pos].valid = 1;
3882 register_map[pos].memory = is_memory_op(constraint);
3883 register_map[pos].inout_pos = i;
3884 register_map[pos].mode = constraint->mode;
3889 transformed = be_transform_node(pred);
3890 in[i] = transformed;
3893 new_node = new_rd_ia32_Asm(dbgi, irg, new_block, arity, in, out_arity,
3894 get_ASM_text(node), register_map);
3896 set_ia32_out_req_all(new_node, out_reg_reqs);
3897 set_ia32_in_req_all(new_node, in_reg_reqs);
3899 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3905 * Transforms a FrameAddr into an ia32 Add.
3907 static ir_node *gen_be_FrameAddr(ir_node *node) {
3908 ir_node *block = be_transform_node(get_nodes_block(node));
3909 ir_node *op = be_get_FrameAddr_frame(node);
3910 ir_node *new_op = be_transform_node(op);
3911 ir_graph *irg = current_ir_graph;
3912 dbg_info *dbgi = get_irn_dbg_info(node);
3913 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3916 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3917 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3918 set_ia32_use_frame(new_node);
3920 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3926 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3928 static ir_node *gen_be_Return(ir_node *node) {
3929 ir_graph *irg = current_ir_graph;
3930 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3931 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3932 ir_entity *ent = get_irg_entity(irg);
3933 ir_type *tp = get_entity_type(ent);
3938 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3939 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3942 int pn_ret_val, pn_ret_mem, arity, i;
3944 assert(ret_val != NULL);
3945 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3946 return be_duplicate_node(node);
3949 res_type = get_method_res_type(tp, 0);
3951 if (! is_Primitive_type(res_type)) {
3952 return be_duplicate_node(node);
3955 mode = get_type_mode(res_type);
3956 if (! mode_is_float(mode)) {
3957 return be_duplicate_node(node);
3960 assert(get_method_n_ress(tp) == 1);
3962 pn_ret_val = get_Proj_proj(ret_val);
3963 pn_ret_mem = get_Proj_proj(ret_mem);
3965 /* get the Barrier */
3966 barrier = get_Proj_pred(ret_val);
3968 /* get result input of the Barrier */
3969 ret_val = get_irn_n(barrier, pn_ret_val);
3970 new_ret_val = be_transform_node(ret_val);
3972 /* get memory input of the Barrier */
3973 ret_mem = get_irn_n(barrier, pn_ret_mem);
3974 new_ret_mem = be_transform_node(ret_mem);
3976 frame = get_irg_frame(irg);
3978 dbgi = get_irn_dbg_info(barrier);
3979 block = be_transform_node(get_nodes_block(barrier));
3981 noreg = ia32_new_NoReg_gp(env_cg);
3983 /* store xmm0 onto stack */
3984 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3985 new_ret_mem, new_ret_val);
3986 set_ia32_ls_mode(sse_store, mode);
3987 set_ia32_op_type(sse_store, ia32_AddrModeD);
3988 set_ia32_use_frame(sse_store);
3990 /* load into x87 register */
3991 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
3992 set_ia32_op_type(fld, ia32_AddrModeS);
3993 set_ia32_use_frame(fld);
3995 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3996 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3998 /* create a new barrier */
3999 arity = get_irn_arity(barrier);
4000 in = alloca(arity * sizeof(in[0]));
4001 for (i = 0; i < arity; ++i) {
4004 if (i == pn_ret_val) {
4006 } else if (i == pn_ret_mem) {
4009 ir_node *in = get_irn_n(barrier, i);
4010 new_in = be_transform_node(in);
4015 new_barrier = new_ir_node(dbgi, irg, block,
4016 get_irn_op(barrier), get_irn_mode(barrier),
4018 copy_node_attr(barrier, new_barrier);
4019 be_duplicate_deps(barrier, new_barrier);
4020 be_set_transformed_node(barrier, new_barrier);
4021 mark_irn_visited(barrier);
4023 /* transform normally */
4024 return be_duplicate_node(node);
4028 * Transform a be_AddSP into an ia32_SubSP.
4030 static ir_node *gen_be_AddSP(ir_node *node)
4032 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4033 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4035 return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
4039 * Transform a be_SubSP into an ia32_AddSP
4041 static ir_node *gen_be_SubSP(ir_node *node)
4043 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4044 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4046 return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
4050 * This function just sets the register for the Unknown node
4051 * as this is not done during register allocation because Unknown
4052 * is an "ignore" node.
4054 static ir_node *gen_Unknown(ir_node *node) {
4055 ir_mode *mode = get_irn_mode(node);
4057 if (mode_is_float(mode)) {
4058 if (ia32_cg_config.use_sse2) {
4059 return ia32_new_Unknown_xmm(env_cg);
4061 /* Unknown nodes are buggy in x87 simulator, use zero for now... */
4062 ir_graph *irg = current_ir_graph;
4063 dbg_info *dbgi = get_irn_dbg_info(node);
4064 ir_node *block = get_irg_start_block(irg);
4065 ir_node *ret = new_rd_ia32_vfldz(dbgi, irg, block);
4067 /* Const Nodes before the initial IncSP are a bad idea, because
4068 * they could be spilled and we have no SP ready at that point yet.
4069 * So add a dependency to the initial frame pointer calculation to
4070 * avoid that situation.
4072 add_irn_dep(ret, get_irg_frame(irg));
4075 } else if (mode_needs_gp_reg(mode)) {
4076 return ia32_new_Unknown_gp(env_cg);
4078 panic("unsupported Unknown-Mode");
4084 * Change some phi modes
4086 static ir_node *gen_Phi(ir_node *node) {
4087 ir_node *block = be_transform_node(get_nodes_block(node));
4088 ir_graph *irg = current_ir_graph;
4089 dbg_info *dbgi = get_irn_dbg_info(node);
4090 ir_mode *mode = get_irn_mode(node);
4093 if(mode_needs_gp_reg(mode)) {
4094 /* we shouldn't have any 64bit stuff around anymore */
4095 assert(get_mode_size_bits(mode) <= 32);
4096 /* all integer operations are on 32bit registers now */
4098 } else if(mode_is_float(mode)) {
4099 if (ia32_cg_config.use_sse2) {
4106 /* phi nodes allow loops, so we use the old arguments for now
4107 * and fix this later */
4108 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4109 get_irn_in(node) + 1);
4110 copy_node_attr(node, phi);
4111 be_duplicate_deps(node, phi);
4113 be_set_transformed_node(node, phi);
4114 be_enqueue_preds(node);
4122 static ir_node *gen_IJmp(ir_node *node)
4124 ir_node *block = get_nodes_block(node);
4125 ir_node *new_block = be_transform_node(block);
4126 ir_graph *irg = current_ir_graph;
4127 dbg_info *dbgi = get_irn_dbg_info(node);
4128 ir_node *op = get_IJmp_target(node);
4130 ia32_address_mode_t am;
4131 ia32_address_t *addr = &am.addr;
4133 assert(get_irn_mode(op) == mode_P);
4135 match_arguments(&am, block, NULL, op, NULL,
4136 match_am | match_8bit_am | match_16bit_am |
4137 match_immediate | match_8bit | match_16bit);
4139 new_node = new_rd_ia32_IJmp(dbgi, irg, new_block, addr->base, addr->index,
4140 addr->mem, am.new_op2);
4141 set_am_attributes(new_node, &am);
4142 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4144 new_node = fix_mem_proj(new_node, &am);
4149 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4152 typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4153 ir_node *val, ir_node *mem);
4156 * Transforms a lowered Load into a "real" one.
4158 static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
4160 ir_node *block = be_transform_node(get_nodes_block(node));
4161 ir_node *ptr = get_irn_n(node, 0);
4162 ir_node *new_ptr = be_transform_node(ptr);
4163 ir_node *mem = get_irn_n(node, 1);
4164 ir_node *new_mem = be_transform_node(mem);
4165 ir_graph *irg = current_ir_graph;
4166 dbg_info *dbgi = get_irn_dbg_info(node);
4167 ir_mode *mode = get_ia32_ls_mode(node);
4168 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4171 new_op = func(dbgi, irg, block, new_ptr, noreg, new_mem);
4173 set_ia32_op_type(new_op, ia32_AddrModeS);
4174 set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
4175 set_ia32_am_scale(new_op, get_ia32_am_scale(node));
4176 set_ia32_am_sc(new_op, get_ia32_am_sc(node));
4177 if (is_ia32_am_sc_sign(node))
4178 set_ia32_am_sc_sign(new_op);
4179 set_ia32_ls_mode(new_op, mode);
4180 if (is_ia32_use_frame(node)) {
4181 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4182 set_ia32_use_frame(new_op);
4185 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4191 * Transforms a lowered Store into a "real" one.
4193 static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
4195 ir_node *block = be_transform_node(get_nodes_block(node));
4196 ir_node *ptr = get_irn_n(node, 0);
4197 ir_node *new_ptr = be_transform_node(ptr);
4198 ir_node *val = get_irn_n(node, 1);
4199 ir_node *new_val = be_transform_node(val);
4200 ir_node *mem = get_irn_n(node, 2);
4201 ir_node *new_mem = be_transform_node(mem);
4202 ir_graph *irg = current_ir_graph;
4203 dbg_info *dbgi = get_irn_dbg_info(node);
4204 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4205 ir_mode *mode = get_ia32_ls_mode(node);
4209 new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
4211 am_offs = get_ia32_am_offs_int(node);
4212 add_ia32_am_offs_int(new_op, am_offs);
4214 set_ia32_op_type(new_op, ia32_AddrModeD);
4215 set_ia32_ls_mode(new_op, mode);
4216 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4217 set_ia32_use_frame(new_op);
4219 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4224 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4226 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4227 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4229 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
4230 match_immediate | match_mode_neutral);
4233 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4235 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4236 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4237 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
4241 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4243 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4244 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4245 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
4249 static ir_node *gen_ia32_l_Add(ir_node *node) {
4250 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4251 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4252 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
4253 match_commutative | match_am | match_immediate |
4254 match_mode_neutral);
4256 if(is_Proj(lowered)) {
4257 lowered = get_Proj_pred(lowered);
4259 assert(is_ia32_Add(lowered));
4260 set_irn_mode(lowered, mode_T);
4266 static ir_node *gen_ia32_l_Adc(ir_node *node)
4268 return gen_binop_flags(node, new_rd_ia32_Adc,
4269 match_commutative | match_am | match_immediate |
4270 match_mode_neutral);
4274 * Transforms an ia32_l_vfild into a "real" ia32_vfild node
4276 * @param node The node to transform
4277 * @return the created ia32 vfild node
4279 static ir_node *gen_ia32_l_vfild(ir_node *node) {
4280 return gen_lowered_Load(node, new_rd_ia32_vfild);
4284 * Transforms an ia32_l_Load into a "real" ia32_Load node
4286 * @param node The node to transform
4287 * @return the created ia32 Load node
4289 static ir_node *gen_ia32_l_Load(ir_node *node) {
4290 return gen_lowered_Load(node, new_rd_ia32_Load);
4294 * Transforms an ia32_l_Store into a "real" ia32_Store node
4296 * @param node The node to transform
4297 * @return the created ia32 Store node
4299 static ir_node *gen_ia32_l_Store(ir_node *node) {
4300 return gen_lowered_Store(node, new_rd_ia32_Store);
4304 * Transforms a l_vfist into a "real" vfist node.
4306 * @param node The node to transform
4307 * @return the created ia32 vfist node
4309 static ir_node *gen_ia32_l_vfist(ir_node *node) {
4310 ir_node *block = be_transform_node(get_nodes_block(node));
4311 ir_node *ptr = get_irn_n(node, 0);
4312 ir_node *new_ptr = be_transform_node(ptr);
4313 ir_node *val = get_irn_n(node, 1);
4314 ir_node *new_val = be_transform_node(val);
4315 ir_node *mem = get_irn_n(node, 2);
4316 ir_node *new_mem = be_transform_node(mem);
4317 ir_graph *irg = current_ir_graph;
4318 dbg_info *dbgi = get_irn_dbg_info(node);
4319 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4320 ir_mode *mode = get_ia32_ls_mode(node);
4321 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4325 new_op = new_rd_ia32_vfist(dbgi, irg, block, new_ptr, noreg, new_mem,
4326 new_val, trunc_mode);
4328 am_offs = get_ia32_am_offs_int(node);
4329 add_ia32_am_offs_int(new_op, am_offs);
4331 set_ia32_op_type(new_op, ia32_AddrModeD);
4332 set_ia32_ls_mode(new_op, mode);
4333 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4334 set_ia32_use_frame(new_op);
4336 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4342 * Transforms a l_MulS into a "real" MulS node.
4344 * @return the created ia32 Mul node
4346 static ir_node *gen_ia32_l_Mul(ir_node *node) {
4347 ir_node *left = get_binop_left(node);
4348 ir_node *right = get_binop_right(node);
4350 return gen_binop(node, left, right, new_rd_ia32_Mul,
4351 match_commutative | match_am | match_mode_neutral);
4355 * Transforms a l_IMulS into a "real" IMul1OPS node.
4357 * @return the created ia32 IMul1OP node
4359 static ir_node *gen_ia32_l_IMul(ir_node *node) {
4360 ir_node *left = get_binop_left(node);
4361 ir_node *right = get_binop_right(node);
4363 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
4364 match_commutative | match_am | match_mode_neutral);
4367 static ir_node *gen_ia32_l_Sub(ir_node *node) {
4368 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4369 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4370 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
4371 match_am | match_immediate | match_mode_neutral);
4373 if(is_Proj(lowered)) {
4374 lowered = get_Proj_pred(lowered);
4376 assert(is_ia32_Sub(lowered));
4377 set_irn_mode(lowered, mode_T);
4383 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
4384 return gen_binop_flags(node, new_rd_ia32_Sbb,
4385 match_am | match_immediate | match_mode_neutral);
4389 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4390 * op1 - target to be shifted
4391 * op2 - contains bits to be shifted into target
4393 * Only op3 can be an immediate.
4395 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4396 ir_node *low, ir_node *count)
4398 ir_node *block = get_nodes_block(node);
4399 ir_node *new_block = be_transform_node(block);
4400 ir_graph *irg = current_ir_graph;
4401 dbg_info *dbgi = get_irn_dbg_info(node);
4402 ir_node *new_high = be_transform_node(high);
4403 ir_node *new_low = be_transform_node(low);
4407 /* the shift amount can be any mode that is bigger than 5 bits, since all
4408 * other bits are ignored anyway */
4409 while (is_Conv(count) && get_irn_n_edges(count) == 1) {
4410 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4411 count = get_Conv_op(count);
4413 new_count = create_immediate_or_transform(count, 0);
4415 if (is_ia32_l_ShlD(node)) {
4416 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
4419 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
4422 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4427 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4429 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4430 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4431 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4432 return gen_lowered_64bit_shifts(node, high, low, count);
4435 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4437 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4438 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4439 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4440 return gen_lowered_64bit_shifts(node, high, low, count);
4443 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
4444 ir_node *src_block = get_nodes_block(node);
4445 ir_node *block = be_transform_node(src_block);
4446 ir_graph *irg = current_ir_graph;
4447 dbg_info *dbgi = get_irn_dbg_info(node);
4448 ir_node *frame = get_irg_frame(irg);
4449 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4450 ir_node *nomem = new_NoMem();
4451 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4452 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4453 ir_node *new_val_low = be_transform_node(val_low);
4454 ir_node *new_val_high = be_transform_node(val_high);
4459 ir_node *store_high;
4461 if(!mode_is_signed(get_irn_mode(val_high))) {
4462 panic("unsigned long long -> float not supported yet (%+F)", node);
4466 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4468 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4470 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
4471 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
4473 set_ia32_use_frame(store_low);
4474 set_ia32_use_frame(store_high);
4475 set_ia32_op_type(store_low, ia32_AddrModeD);
4476 set_ia32_op_type(store_high, ia32_AddrModeD);
4477 set_ia32_ls_mode(store_low, mode_Iu);
4478 set_ia32_ls_mode(store_high, mode_Is);
4479 add_ia32_am_offs_int(store_high, 4);
4483 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4486 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
4488 set_ia32_use_frame(fild);
4489 set_ia32_op_type(fild, ia32_AddrModeS);
4490 set_ia32_ls_mode(fild, mode_Ls);
4492 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
4494 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4497 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
4498 ir_node *src_block = get_nodes_block(node);
4499 ir_node *block = be_transform_node(src_block);
4500 ir_graph *irg = current_ir_graph;
4501 dbg_info *dbgi = get_irn_dbg_info(node);
4502 ir_node *frame = get_irg_frame(irg);
4503 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4504 ir_node *nomem = new_NoMem();
4505 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4506 ir_node *new_val = be_transform_node(val);
4507 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4512 fist = new_rd_ia32_vfist(dbgi, irg, block, frame, noreg, nomem, new_val,
4514 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
4515 set_ia32_use_frame(fist);
4516 set_ia32_op_type(fist, ia32_AddrModeD);
4517 set_ia32_ls_mode(fist, mode_Ls);
4523 * the BAD transformer.
4525 static ir_node *bad_transform(ir_node *node) {
4526 panic("No transform function for %+F available.\n", node);
4530 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
4531 ir_graph *irg = current_ir_graph;
4532 ir_node *block = be_transform_node(get_nodes_block(node));
4533 ir_node *pred = get_Proj_pred(node);
4534 ir_node *new_pred = be_transform_node(pred);
4535 ir_node *frame = get_irg_frame(irg);
4536 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4537 dbg_info *dbgi = get_irn_dbg_info(node);
4538 long pn = get_Proj_proj(node);
4543 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
4544 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
4545 set_ia32_use_frame(load);
4546 set_ia32_op_type(load, ia32_AddrModeS);
4547 set_ia32_ls_mode(load, mode_Iu);
4548 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4549 * 32 bit from it with this particular load */
4550 attr = get_ia32_attr(load);
4551 attr->data.need_64bit_stackent = 1;
4553 if (pn == pn_ia32_l_FloattoLL_res_high) {
4554 add_ia32_am_offs_int(load, 4);
4556 assert(pn == pn_ia32_l_FloattoLL_res_low);
4559 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4565 * Transform the Projs of an AddSP.
4567 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
4568 ir_node *block = be_transform_node(get_nodes_block(node));
4569 ir_node *pred = get_Proj_pred(node);
4570 ir_node *new_pred = be_transform_node(pred);
4571 ir_graph *irg = current_ir_graph;
4572 dbg_info *dbgi = get_irn_dbg_info(node);
4573 long proj = get_Proj_proj(node);
4575 if (proj == pn_be_AddSP_sp) {
4576 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4577 pn_ia32_SubSP_stack);
4578 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4580 } else if(proj == pn_be_AddSP_res) {
4581 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4582 pn_ia32_SubSP_addr);
4583 } else if (proj == pn_be_AddSP_M) {
4584 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4588 return new_rd_Unknown(irg, get_irn_mode(node));
4592 * Transform the Projs of a SubSP.
4594 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
4595 ir_node *block = be_transform_node(get_nodes_block(node));
4596 ir_node *pred = get_Proj_pred(node);
4597 ir_node *new_pred = be_transform_node(pred);
4598 ir_graph *irg = current_ir_graph;
4599 dbg_info *dbgi = get_irn_dbg_info(node);
4600 long proj = get_Proj_proj(node);
4602 if (proj == pn_be_SubSP_sp) {
4603 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4604 pn_ia32_AddSP_stack);
4605 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4607 } else if (proj == pn_be_SubSP_M) {
4608 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4612 return new_rd_Unknown(irg, get_irn_mode(node));
4616 * Transform and renumber the Projs from a Load.
4618 static ir_node *gen_Proj_Load(ir_node *node) {
4620 ir_node *block = be_transform_node(get_nodes_block(node));
4621 ir_node *pred = get_Proj_pred(node);
4622 ir_graph *irg = current_ir_graph;
4623 dbg_info *dbgi = get_irn_dbg_info(node);
4624 long proj = get_Proj_proj(node);
4627 /* loads might be part of source address mode matches, so we don't
4628 transform the ProjMs yet (with the exception of loads whose result is
4631 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4634 assert(pn_ia32_Load_M == 1); /* convention: mem-result of Source-AM
4636 /* this is needed, because sometimes we have loops that are only
4637 reachable through the ProjM */
4638 be_enqueue_preds(node);
4639 /* do it in 2 steps, to silence firm verifier */
4640 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4641 set_Proj_proj(res, pn_ia32_Load_M);
4645 /* renumber the proj */
4646 new_pred = be_transform_node(pred);
4647 if (is_ia32_Load(new_pred)) {
4650 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4652 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4653 case pn_Load_X_regular:
4654 return new_rd_Jmp(dbgi, irg, block);
4655 case pn_Load_X_except:
4656 /* This Load might raise an exception. Mark it. */
4657 set_ia32_exc_label(new_pred, 1);
4658 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4662 } else if (is_ia32_Conv_I2I(new_pred) ||
4663 is_ia32_Conv_I2I8Bit(new_pred)) {
4664 set_irn_mode(new_pred, mode_T);
4665 if (proj == pn_Load_res) {
4666 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4667 } else if (proj == pn_Load_M) {
4668 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4670 } else if (is_ia32_xLoad(new_pred)) {
4673 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4675 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4676 case pn_Load_X_regular:
4677 return new_rd_Jmp(dbgi, irg, block);
4678 case pn_Load_X_except:
4679 /* This Load might raise an exception. Mark it. */
4680 set_ia32_exc_label(new_pred, 1);
4681 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4685 } else if (is_ia32_vfld(new_pred)) {
4688 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4690 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4691 case pn_Load_X_regular:
4692 return new_rd_Jmp(dbgi, irg, block);
4693 case pn_Load_X_except:
4694 /* This Load might raise an exception. Mark it. */
4695 set_ia32_exc_label(new_pred, 1);
4696 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4701 /* can happen for ProJMs when source address mode happened for the
4704 /* however it should not be the result proj, as that would mean the
4705 load had multiple users and should not have been used for
4707 if (proj != pn_Load_M) {
4708 panic("internal error: transformed node not a Load");
4710 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4714 return new_rd_Unknown(irg, get_irn_mode(node));
4718 * Transform and renumber the Projs from a DivMod like instruction.
4720 static ir_node *gen_Proj_DivMod(ir_node *node) {
4721 ir_node *block = be_transform_node(get_nodes_block(node));
4722 ir_node *pred = get_Proj_pred(node);
4723 ir_node *new_pred = be_transform_node(pred);
4724 ir_graph *irg = current_ir_graph;
4725 dbg_info *dbgi = get_irn_dbg_info(node);
4726 ir_mode *mode = get_irn_mode(node);
4727 long proj = get_Proj_proj(node);
4729 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4731 switch (get_irn_opcode(pred)) {
4735 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4737 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4738 case pn_Div_X_regular:
4739 return new_rd_Jmp(dbgi, irg, block);
4740 case pn_Div_X_except:
4741 set_ia32_exc_label(new_pred, 1);
4742 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4750 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4752 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4753 case pn_Mod_X_except:
4754 set_ia32_exc_label(new_pred, 1);
4755 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4763 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4764 case pn_DivMod_res_div:
4765 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4766 case pn_DivMod_res_mod:
4767 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4768 case pn_DivMod_X_regular:
4769 return new_rd_Jmp(dbgi, irg, block);
4770 case pn_DivMod_X_except:
4771 set_ia32_exc_label(new_pred, 1);
4772 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4782 return new_rd_Unknown(irg, mode);
4786 * Transform and renumber the Projs from a CopyB.
4788 static ir_node *gen_Proj_CopyB(ir_node *node) {
4789 ir_node *block = be_transform_node(get_nodes_block(node));
4790 ir_node *pred = get_Proj_pred(node);
4791 ir_node *new_pred = be_transform_node(pred);
4792 ir_graph *irg = current_ir_graph;
4793 dbg_info *dbgi = get_irn_dbg_info(node);
4794 ir_mode *mode = get_irn_mode(node);
4795 long proj = get_Proj_proj(node);
4798 case pn_CopyB_M_regular:
4799 if (is_ia32_CopyB_i(new_pred)) {
4800 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4801 } else if (is_ia32_CopyB(new_pred)) {
4802 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4810 return new_rd_Unknown(irg, mode);
4814 * Transform and renumber the Projs from a Quot.
4816 static ir_node *gen_Proj_Quot(ir_node *node) {
4817 ir_node *block = be_transform_node(get_nodes_block(node));
4818 ir_node *pred = get_Proj_pred(node);
4819 ir_node *new_pred = be_transform_node(pred);
4820 ir_graph *irg = current_ir_graph;
4821 dbg_info *dbgi = get_irn_dbg_info(node);
4822 ir_mode *mode = get_irn_mode(node);
4823 long proj = get_Proj_proj(node);
4827 if (is_ia32_xDiv(new_pred)) {
4828 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4829 } else if (is_ia32_vfdiv(new_pred)) {
4830 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4834 if (is_ia32_xDiv(new_pred)) {
4835 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4836 } else if (is_ia32_vfdiv(new_pred)) {
4837 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4845 return new_rd_Unknown(irg, mode);
4849 * Transform the Thread Local Storage Proj.
4851 static ir_node *gen_Proj_tls(ir_node *node) {
4852 ir_node *block = be_transform_node(get_nodes_block(node));
4853 ir_graph *irg = current_ir_graph;
4854 dbg_info *dbgi = NULL;
4855 ir_node *res = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
4860 static ir_node *gen_be_Call(ir_node *node) {
4861 ir_node *res = be_duplicate_node(node);
4862 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4867 static ir_node *gen_be_IncSP(ir_node *node) {
4868 ir_node *res = be_duplicate_node(node);
4869 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4875 * Transform the Projs from a be_Call.
4877 static ir_node *gen_Proj_be_Call(ir_node *node) {
4878 ir_node *block = be_transform_node(get_nodes_block(node));
4879 ir_node *call = get_Proj_pred(node);
4880 ir_node *new_call = be_transform_node(call);
4881 ir_graph *irg = current_ir_graph;
4882 dbg_info *dbgi = get_irn_dbg_info(node);
4883 ir_type *method_type = be_Call_get_type(call);
4884 int n_res = get_method_n_ress(method_type);
4885 long proj = get_Proj_proj(node);
4886 ir_mode *mode = get_irn_mode(node);
4888 const arch_register_class_t *cls;
4890 /* The following is kinda tricky: If we're using SSE, then we have to
4891 * move the result value of the call in floating point registers to an
4892 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4893 * after the call, we have to make sure to correctly make the
4894 * MemProj and the result Proj use these 2 nodes
4896 if (proj == pn_be_Call_M_regular) {
4897 // get new node for result, are we doing the sse load/store hack?
4898 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4899 ir_node *call_res_new;
4900 ir_node *call_res_pred = NULL;
4902 if (call_res != NULL) {
4903 call_res_new = be_transform_node(call_res);
4904 call_res_pred = get_Proj_pred(call_res_new);
4907 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4908 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4909 pn_be_Call_M_regular);
4911 assert(is_ia32_xLoad(call_res_pred));
4912 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4916 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4917 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4919 ir_node *frame = get_irg_frame(irg);
4920 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4922 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4925 /* in case there is no memory output: create one to serialize the copy
4927 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4928 pn_be_Call_M_regular);
4929 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4930 pn_be_Call_first_res);
4932 /* store st(0) onto stack */
4933 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4935 set_ia32_op_type(fstp, ia32_AddrModeD);
4936 set_ia32_use_frame(fstp);
4938 /* load into SSE register */
4939 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4941 set_ia32_op_type(sse_load, ia32_AddrModeS);
4942 set_ia32_use_frame(sse_load);
4944 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4950 /* transform call modes */
4951 if (mode_is_data(mode)) {
4952 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4956 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4960 * Transform the Projs from a Cmp.
4962 static ir_node *gen_Proj_Cmp(ir_node *node)
4964 /* this probably means not all mode_b nodes were lowered... */
4965 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4970 * Transform and potentially renumber Proj nodes.
4972 static ir_node *gen_Proj(ir_node *node) {
4973 ir_graph *irg = current_ir_graph;
4974 dbg_info *dbgi = get_irn_dbg_info(node);
4975 ir_node *pred = get_Proj_pred(node);
4976 long proj = get_Proj_proj(node);
4978 if (is_Store(pred)) {
4979 if (proj == pn_Store_M) {
4980 return be_transform_node(pred);
4983 return new_r_Bad(irg);
4985 } else if (is_Load(pred)) {
4986 return gen_Proj_Load(node);
4987 } else if (is_Div(pred) || is_Mod(pred) || is_DivMod(pred)) {
4988 return gen_Proj_DivMod(node);
4989 } else if (is_CopyB(pred)) {
4990 return gen_Proj_CopyB(node);
4991 } else if (is_Quot(pred)) {
4992 return gen_Proj_Quot(node);
4993 } else if (be_is_SubSP(pred)) {
4994 return gen_Proj_be_SubSP(node);
4995 } else if (be_is_AddSP(pred)) {
4996 return gen_Proj_be_AddSP(node);
4997 } else if (be_is_Call(pred)) {
4998 return gen_Proj_be_Call(node);
4999 } else if (is_Cmp(pred)) {
5000 return gen_Proj_Cmp(node);
5001 } else if (get_irn_op(pred) == op_Start) {
5002 if (proj == pn_Start_X_initial_exec) {
5003 ir_node *block = get_nodes_block(pred);
5006 /* we exchange the ProjX with a jump */
5007 block = be_transform_node(block);
5008 jump = new_rd_Jmp(dbgi, irg, block);
5011 if (node == be_get_old_anchor(anchor_tls)) {
5012 return gen_Proj_tls(node);
5014 } else if (is_ia32_l_FloattoLL(pred)) {
5015 return gen_Proj_l_FloattoLL(node);
5017 } else if(!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5021 ir_node *new_pred = be_transform_node(pred);
5022 ir_node *block = be_transform_node(get_nodes_block(node));
5023 ir_mode *mode = get_irn_mode(node);
5024 if (mode_needs_gp_reg(mode)) {
5025 ir_node *new_proj = new_r_Proj(irg, block, new_pred, mode_Iu,
5026 get_Proj_proj(node));
5027 #ifdef DEBUG_libfirm
5028 new_proj->node_nr = node->node_nr;
5034 return be_duplicate_node(node);
5038 * Enters all transform functions into the generic pointer
5040 static void register_transformers(void)
5044 /* first clear the generic function pointer for all ops */
5045 clear_irp_opcodes_generic_func();
5047 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5048 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5086 /* transform ops from intrinsic lowering */
5102 GEN(ia32_l_LLtoFloat);
5103 GEN(ia32_l_FloattoLL);
5109 /* we should never see these nodes */
5124 /* handle generic backend nodes */
5133 op_Mulh = get_op_Mulh();
5142 * Pre-transform all unknown and noreg nodes.
5144 static void ia32_pretransform_node(void *arch_cg) {
5145 ia32_code_gen_t *cg = arch_cg;
5147 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5148 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5149 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5150 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5151 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5152 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5157 * Walker, checks if all ia32 nodes producing more than one result have
5158 * its Projs, other wise creates new projs and keep them using a be_Keep node.
5160 static void add_missing_keep_walker(ir_node *node, void *data)
5163 unsigned found_projs = 0;
5164 const ir_edge_t *edge;
5165 ir_mode *mode = get_irn_mode(node);
5170 if(!is_ia32_irn(node))
5173 n_outs = get_ia32_n_res(node);
5176 if(is_ia32_SwitchJmp(node))
5179 assert(n_outs < (int) sizeof(unsigned) * 8);
5180 foreach_out_edge(node, edge) {
5181 ir_node *proj = get_edge_src_irn(edge);
5182 int pn = get_Proj_proj(proj);
5184 assert(get_irn_mode(proj) == mode_M || pn < n_outs);
5185 found_projs |= 1 << pn;
5189 /* are keeps missing? */
5191 for(i = 0; i < n_outs; ++i) {
5194 const arch_register_req_t *req;
5195 const arch_register_class_t *class;
5197 if(found_projs & (1 << i)) {
5201 req = get_ia32_out_req(node, i);
5206 if(class == &ia32_reg_classes[CLASS_ia32_flags]) {
5210 block = get_nodes_block(node);
5211 in[0] = new_r_Proj(current_ir_graph, block, node,
5212 arch_register_class_mode(class), i);
5213 if(last_keep != NULL) {
5214 be_Keep_add_node(last_keep, class, in[0]);
5216 last_keep = be_new_Keep(class, current_ir_graph, block, 1, in);
5217 if(sched_is_scheduled(node)) {
5218 sched_add_after(node, last_keep);
5225 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5228 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5230 ir_graph *irg = be_get_birg_irg(cg->birg);
5231 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5234 /* do the transformation */
5235 void ia32_transform_graph(ia32_code_gen_t *cg) {
5237 ir_graph *irg = cg->irg;
5239 register_transformers();
5241 initial_fpcw = NULL;
5243 BE_TIMER_PUSH(t_heights);
5244 heights = heights_new(irg);
5245 BE_TIMER_POP(t_heights);
5246 ia32_calculate_non_address_mode_nodes(cg->birg);
5248 /* the transform phase is not safe for CSE (yet) because several nodes get
5249 * attributes set after their creation */
5250 cse_last = get_opt_cse();
5253 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
5255 set_opt_cse(cse_last);
5257 ia32_free_non_address_mode_nodes();
5258 heights_free(heights);
5262 void ia32_init_transform(void)
5264 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");