2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
35 #include "irgraph_t.h"
40 #include "iredges_t.h"
52 #include "../benode_t.h"
53 #include "../besched.h"
55 #include "../beutil.h"
56 #include "../beirg_t.h"
57 #include "../betranshlp.h"
60 #include "bearch_ia32_t.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
79 #define TP_SFP_SIGN "ia32_sfp_sign"
80 #define TP_DFP_SIGN "ia32_dfp_sign"
81 #define TP_SFP_ABS "ia32_sfp_abs"
82 #define TP_DFP_ABS "ia32_dfp_abs"
83 #define TP_INT_MAX "ia32_int_max"
85 #define ENT_SFP_SIGN "IA32_SFP_SIGN"
86 #define ENT_DFP_SIGN "IA32_DFP_SIGN"
87 #define ENT_SFP_ABS "IA32_SFP_ABS"
88 #define ENT_DFP_ABS "IA32_DFP_ABS"
89 #define ENT_INT_MAX "IA32_INT_MAX"
91 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
92 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
94 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
96 /** hold the current code generator during transformation */
97 static ia32_code_gen_t *env_cg = NULL;
98 static ir_node *initial_fpcw = NULL;
99 static heights_t *heights = NULL;
101 extern ir_op *get_op_Mulh(void);
103 typedef ir_node *construct_binop_func(dbg_info *db, ir_graph *irg,
104 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
105 ir_node *op1, ir_node *op2);
107 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_graph *irg,
108 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
109 ir_node *op1, ir_node *op2, ir_node *flags);
111 typedef ir_node *construct_shift_func(dbg_info *db, ir_graph *irg,
112 ir_node *block, ir_node *op1, ir_node *op2);
114 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_graph *irg,
115 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
118 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_graph *irg,
119 ir_node *block, ir_node *base, ir_node *index, ir_node *mem);
121 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_graph *irg,
122 ir_node *block, ir_node *base, ir_node *index, ir_node *mem,
123 ir_node *op1, ir_node *op2, ir_node *fpcw);
125 typedef ir_node *construct_unop_func(dbg_info *db, ir_graph *irg,
126 ir_node *block, ir_node *op);
128 static ir_node *try_create_Immediate(ir_node *node,
129 char immediate_constraint_type);
131 static ir_node *create_immediate_or_transform(ir_node *node,
132 char immediate_constraint_type);
134 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
135 dbg_info *dbgi, ir_node *block,
136 ir_node *op, ir_node *orig_node);
139 * Return true if a mode can be stored in the GP register set
141 static INLINE int mode_needs_gp_reg(ir_mode *mode) {
142 if(mode == mode_fpcw)
144 if(get_mode_size_bits(mode) > 32)
146 return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
150 * creates a unique ident by adding a number to a tag
152 * @param tag the tag string, must contain a %d if a number
155 static ident *unique_id(const char *tag)
157 static unsigned id = 0;
160 snprintf(str, sizeof(str), tag, ++id);
161 return new_id_from_str(str);
165 * Get a primitive type for a mode.
167 static ir_type *get_prim_type(pmap *types, ir_mode *mode)
169 pmap_entry *e = pmap_find(types, mode);
174 snprintf(buf, sizeof(buf), "prim_type_%s", get_mode_name(mode));
175 res = new_type_primitive(new_id_from_str(buf), mode);
176 set_type_alignment_bytes(res, 16);
177 pmap_insert(types, mode, res);
185 * Creates an immediate.
187 * @param symconst if set, create a SymConst immediate
188 * @param symconst_sign sign for the symconst
189 * @param val integer value for the immediate
191 static ir_node *create_Immediate(ir_entity *symconst, int symconst_sign, long val)
193 ir_graph *irg = current_ir_graph;
194 ir_node *start_block = get_irg_start_block(irg);
195 ir_node *immediate = new_rd_ia32_Immediate(NULL, irg, start_block,
196 symconst, symconst_sign, val);
197 arch_set_irn_register(env_cg->arch_env, immediate, &ia32_gp_regs[REG_GP_NOREG]);
203 * Get an atomic entity that is initialized with a tarval forming
206 * @param cnst the node representing the constant
208 static ir_entity *create_float_const_entity(ir_node *cnst)
210 ia32_isa_t *isa = env_cg->isa;
211 tarval *key = get_Const_tarval(cnst);
212 pmap_entry *e = pmap_find(isa->tv_ent, key);
218 ir_mode *mode = get_tarval_mode(tv);
221 if (! ia32_cg_config.use_sse2) {
222 /* try to reduce the mode to produce smaller sized entities */
223 if (mode != mode_F) {
224 if (tarval_ieee754_can_conv_lossless(tv, mode_F)) {
226 tv = tarval_convert_to(tv, mode);
227 } else if (mode != mode_D) {
228 if (tarval_ieee754_can_conv_lossless(tv, mode_D)) {
230 tv = tarval_convert_to(tv, mode);
236 if (mode == get_irn_mode(cnst)) {
237 /* mode was not changed */
238 tp = get_Const_type(cnst);
239 if (tp == firm_unknown_type)
240 tp = get_prim_type(isa->types, mode);
242 tp = get_prim_type(isa->types, mode);
244 res = new_entity(get_glob_type(), unique_id(".LC%u"), tp);
246 set_entity_ld_ident(res, get_entity_ident(res));
247 set_entity_visibility(res, visibility_local);
248 set_entity_variability(res, variability_constant);
249 set_entity_allocation(res, allocation_static);
251 /* we create a new entity here: It's initialization must resist on the
253 rem = current_ir_graph;
254 current_ir_graph = get_const_code_irg();
255 set_atomic_ent_value(res, new_Const_type(tv, tp));
256 current_ir_graph = rem;
258 pmap_insert(isa->tv_ent, key, res);
266 static int is_Const_0(ir_node *node) {
267 return is_Const(node) && is_Const_null(node);
270 static int is_Const_1(ir_node *node) {
271 return is_Const(node) && is_Const_one(node);
274 static int is_Const_Minus_1(ir_node *node) {
275 return is_Const(node) && is_Const_all_one(node);
279 * returns true if constant can be created with a simple float command
281 static int is_simple_x87_Const(ir_node *node)
283 tarval *tv = get_Const_tarval(node);
284 if (tarval_is_null(tv) || tarval_is_one(tv))
287 /* TODO: match all the other float constants */
292 * returns true if constant can be created with a simple float command
294 static int is_simple_sse_Const(ir_node *node)
296 tarval *tv = get_Const_tarval(node);
297 ir_mode *mode = get_tarval_mode(tv);
302 if (tarval_is_null(tv) || tarval_is_one(tv))
305 if (mode == mode_D) {
306 unsigned val = get_tarval_sub_bits(tv, 0) |
307 (get_tarval_sub_bits(tv, 1) << 8) |
308 (get_tarval_sub_bits(tv, 2) << 16) |
309 (get_tarval_sub_bits(tv, 3) << 24);
311 /* lower 32bit are zero, really a 32bit constant */
315 /* TODO: match all the other float constants */
320 * Transforms a Const.
322 static ir_node *gen_Const(ir_node *node) {
323 ir_graph *irg = current_ir_graph;
324 ir_node *old_block = get_nodes_block(node);
325 ir_node *block = be_transform_node(old_block);
326 dbg_info *dbgi = get_irn_dbg_info(node);
327 ir_mode *mode = get_irn_mode(node);
329 assert(is_Const(node));
331 if (mode_is_float(mode)) {
333 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
334 ir_node *nomem = new_NoMem();
338 if (ia32_cg_config.use_sse2) {
339 tarval *tv = get_Const_tarval(node);
340 if (tarval_is_null(tv)) {
341 load = new_rd_ia32_xZero(dbgi, irg, block);
342 set_ia32_ls_mode(load, mode);
344 } else if (tarval_is_one(tv)) {
345 int cnst = mode == mode_F ? 26 : 55;
346 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
347 ir_node *imm2 = create_Immediate(NULL, 0, 2);
348 ir_node *pslld, *psrld;
350 load = new_rd_ia32_xAllOnes(dbgi, irg, block);
351 set_ia32_ls_mode(load, mode);
352 pslld = new_rd_ia32_xPslld(dbgi, irg, block, load, imm1);
353 set_ia32_ls_mode(pslld, mode);
354 psrld = new_rd_ia32_xPsrld(dbgi, irg, block, pslld, imm2);
355 set_ia32_ls_mode(psrld, mode);
357 } else if (mode == mode_F) {
358 /* we can place any 32bit constant by using a movd gp, sse */
359 unsigned val = get_tarval_sub_bits(tv, 0) |
360 (get_tarval_sub_bits(tv, 1) << 8) |
361 (get_tarval_sub_bits(tv, 2) << 16) |
362 (get_tarval_sub_bits(tv, 3) << 24);
363 ir_node *cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
364 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
365 set_ia32_ls_mode(load, mode);
368 if (mode == mode_D) {
369 unsigned val = get_tarval_sub_bits(tv, 0) |
370 (get_tarval_sub_bits(tv, 1) << 8) |
371 (get_tarval_sub_bits(tv, 2) << 16) |
372 (get_tarval_sub_bits(tv, 3) << 24);
374 ir_node *imm32 = create_Immediate(NULL, 0, 32);
375 ir_node *cnst, *psllq;
377 /* fine, lower 32bit are zero, produce 32bit value */
378 val = get_tarval_sub_bits(tv, 4) |
379 (get_tarval_sub_bits(tv, 5) << 8) |
380 (get_tarval_sub_bits(tv, 6) << 16) |
381 (get_tarval_sub_bits(tv, 7) << 24);
382 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
383 load = new_rd_ia32_xMovd(dbgi, irg, block, cnst);
384 set_ia32_ls_mode(load, mode);
385 psllq = new_rd_ia32_xPsllq(dbgi, irg, block, load, imm32);
386 set_ia32_ls_mode(psllq, mode);
391 floatent = create_float_const_entity(node);
393 load = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem,
395 set_ia32_op_type(load, ia32_AddrModeS);
396 set_ia32_am_sc(load, floatent);
397 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
398 res = new_r_Proj(irg, block, load, mode_xmm, pn_ia32_xLoad_res);
401 if (is_Const_null(node)) {
402 load = new_rd_ia32_vfldz(dbgi, irg, block);
404 set_ia32_ls_mode(load, mode);
405 } else if (is_Const_one(node)) {
406 load = new_rd_ia32_vfld1(dbgi, irg, block);
408 set_ia32_ls_mode(load, mode);
410 floatent = create_float_const_entity(node);
412 load = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode);
413 set_ia32_op_type(load, ia32_AddrModeS);
414 set_ia32_am_sc(load, floatent);
415 set_ia32_flags(load, get_ia32_flags(load) | arch_irn_flags_rematerializable);
416 res = new_r_Proj(irg, block, load, mode_vfp, pn_ia32_vfld_res);
417 /* take the mode from the entity */
418 set_ia32_ls_mode(load, get_type_mode(get_entity_type(floatent)));
422 /* Const Nodes before the initial IncSP are a bad idea, because
423 * they could be spilled and we have no SP ready at that point yet.
424 * So add a dependency to the initial frame pointer calculation to
425 * avoid that situation.
427 if (get_irg_start_block(irg) == block) {
428 add_irn_dep(load, get_irg_frame(irg));
431 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
433 } else { /* non-float mode */
435 tarval *tv = get_Const_tarval(node);
438 tv = tarval_convert_to(tv, mode_Iu);
440 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
442 panic("couldn't convert constant tarval (%+F)", node);
444 val = get_tarval_long(tv);
446 cnst = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, val);
447 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
450 if (get_irg_start_block(irg) == block) {
451 add_irn_dep(cnst, get_irg_frame(irg));
459 * Transforms a SymConst.
461 static ir_node *gen_SymConst(ir_node *node) {
462 ir_graph *irg = current_ir_graph;
463 ir_node *old_block = get_nodes_block(node);
464 ir_node *block = be_transform_node(old_block);
465 dbg_info *dbgi = get_irn_dbg_info(node);
466 ir_mode *mode = get_irn_mode(node);
469 if (mode_is_float(mode)) {
470 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
471 ir_node *nomem = new_NoMem();
473 if (ia32_cg_config.use_sse2)
474 cnst = new_rd_ia32_xLoad(dbgi, irg, block, noreg, noreg, nomem, mode_E);
476 cnst = new_rd_ia32_vfld(dbgi, irg, block, noreg, noreg, nomem, mode_E);
477 set_ia32_am_sc(cnst, get_SymConst_entity(node));
478 set_ia32_use_frame(cnst);
482 if(get_SymConst_kind(node) != symconst_addr_ent) {
483 panic("backend only support symconst_addr_ent (at %+F)", node);
485 entity = get_SymConst_entity(node);
486 cnst = new_rd_ia32_Const(dbgi, irg, block, entity, 0, 0);
489 /* Const Nodes before the initial IncSP are a bad idea, because
490 * they could be spilled and we have no SP ready at that point yet
492 if (get_irg_start_block(irg) == block) {
493 add_irn_dep(cnst, get_irg_frame(irg));
496 SET_IA32_ORIG_NODE(cnst, ia32_get_old_node_name(env_cg, node));
501 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
502 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct) {
503 static const struct {
505 const char *ent_name;
506 const char *cnst_str;
509 } names [ia32_known_const_max] = {
510 { TP_SFP_SIGN, ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
511 { TP_DFP_SIGN, ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
512 { TP_SFP_ABS, ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
513 { TP_DFP_ABS, ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
514 { TP_INT_MAX, ENT_INT_MAX, DFP_INTMAX, 2, 4 } /* ia32_INTMAX */
516 static ir_entity *ent_cache[ia32_known_const_max];
518 const char *tp_name, *ent_name, *cnst_str;
526 ent_name = names[kct].ent_name;
527 if (! ent_cache[kct]) {
528 tp_name = names[kct].tp_name;
529 cnst_str = names[kct].cnst_str;
531 switch (names[kct].mode) {
532 case 0: mode = mode_Iu; break;
533 case 1: mode = mode_Lu; break;
534 default: mode = mode_F; break;
536 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
537 tp = new_type_primitive(new_id_from_str(tp_name), mode);
538 /* set the specified alignment */
539 set_type_alignment_bytes(tp, names[kct].align);
541 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
543 set_entity_ld_ident(ent, get_entity_ident(ent));
544 set_entity_visibility(ent, visibility_local);
545 set_entity_variability(ent, variability_constant);
546 set_entity_allocation(ent, allocation_static);
548 /* we create a new entity here: It's initialization must resist on the
550 rem = current_ir_graph;
551 current_ir_graph = get_const_code_irg();
552 cnst = new_Const(mode, tv);
553 current_ir_graph = rem;
555 set_atomic_ent_value(ent, cnst);
557 /* cache the entry */
558 ent_cache[kct] = ent;
561 return ent_cache[kct];
566 * Prints the old node name on cg obst and returns a pointer to it.
568 const char *ia32_get_old_node_name(ia32_code_gen_t *cg, ir_node *irn) {
569 ia32_isa_t *isa = (ia32_isa_t *)cg->arch_env->isa;
571 lc_eoprintf(firm_get_arg_env(), isa->name_obst, "%+F", irn);
572 obstack_1grow(isa->name_obst, 0);
573 return obstack_finish(isa->name_obst);
578 * return true if the node is a Proj(Load) and could be used in source address
579 * mode for another node. Will return only true if the @p other node is not
580 * dependent on the memory of the Load (for binary operations use the other
581 * input here, for unary operations use NULL).
583 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
584 ir_node *other, ir_node *other2)
589 /* float constants are always available */
590 if (is_Const(node)) {
591 ir_mode *mode = get_irn_mode(node);
592 if (mode_is_float(mode)) {
593 if (ia32_cg_config.use_sse2) {
594 if (is_simple_sse_Const(node))
597 if (is_simple_x87_Const(node))
600 if (get_irn_n_edges(node) > 1)
608 load = get_Proj_pred(node);
609 pn = get_Proj_proj(node);
610 if (!is_Load(load) || pn != pn_Load_res)
612 if (get_nodes_block(load) != block)
614 /* we only use address mode if we're the only user of the load */
615 if (get_irn_n_edges(node) > 1)
617 /* in some edge cases with address mode we might reach the load normally
618 * and through some AM sequence, if it is already materialized then we
619 * can't create an AM node from it */
620 if (be_is_transformed(node))
623 /* don't do AM if other node inputs depend on the load (via mem-proj) */
624 if (other != NULL && get_nodes_block(other) == block &&
625 heights_reachable_in_block(heights, other, load))
627 if (other2 != NULL && get_nodes_block(other2) == block &&
628 heights_reachable_in_block(heights, other2, load))
634 typedef struct ia32_address_mode_t ia32_address_mode_t;
635 struct ia32_address_mode_t {
639 ia32_op_type_t op_type;
643 unsigned commutative : 1;
644 unsigned ins_permuted : 1;
647 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
651 /* construct load address */
652 memset(addr, 0, sizeof(addr[0]));
653 ia32_create_address_mode(addr, ptr, /*force=*/0);
655 noreg_gp = ia32_new_NoReg_gp(env_cg);
656 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
657 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
658 addr->mem = be_transform_node(mem);
661 static void build_address(ia32_address_mode_t *am, ir_node *node)
663 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
664 ia32_address_t *addr = &am->addr;
670 if (is_Const(node)) {
671 ir_entity *entity = create_float_const_entity(node);
672 addr->base = noreg_gp;
673 addr->index = noreg_gp;
674 addr->mem = new_NoMem();
675 addr->symconst_ent = entity;
677 am->ls_mode = get_type_mode(get_entity_type(entity));
678 am->pinned = op_pin_state_floats;
682 load = get_Proj_pred(node);
683 ptr = get_Load_ptr(load);
684 mem = get_Load_mem(load);
685 new_mem = be_transform_node(mem);
686 am->pinned = get_irn_pinned(load);
687 am->ls_mode = get_Load_mode(load);
688 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
690 /* construct load address */
691 ia32_create_address_mode(addr, ptr, /*force=*/0);
693 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
694 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
698 static void set_address(ir_node *node, const ia32_address_t *addr)
700 set_ia32_am_scale(node, addr->scale);
701 set_ia32_am_sc(node, addr->symconst_ent);
702 set_ia32_am_offs_int(node, addr->offset);
703 if(addr->symconst_sign)
704 set_ia32_am_sc_sign(node);
706 set_ia32_use_frame(node);
707 set_ia32_frame_ent(node, addr->frame_entity);
711 * Apply attributes of a given address mode to a node.
713 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
715 set_address(node, &am->addr);
717 set_ia32_op_type(node, am->op_type);
718 set_ia32_ls_mode(node, am->ls_mode);
719 if (am->pinned == op_pin_state_pinned) {
720 set_irn_pinned(node, am->pinned);
723 set_ia32_commutative(node);
727 * Check, if a given node is a Down-Conv, ie. a integer Conv
728 * from a mode with a mode with more bits to a mode with lesser bits.
729 * Moreover, we return only true if the node has not more than 1 user.
731 * @param node the node
732 * @return non-zero if node is a Down-Conv
734 static int is_downconv(const ir_node *node)
742 /* we only want to skip the conv when we're the only user
743 * (not optimal but for now...)
745 if(get_irn_n_edges(node) > 1)
748 src_mode = get_irn_mode(get_Conv_op(node));
749 dest_mode = get_irn_mode(node);
750 return mode_needs_gp_reg(src_mode)
751 && mode_needs_gp_reg(dest_mode)
752 && get_mode_size_bits(dest_mode) < get_mode_size_bits(src_mode);
755 /* Skip all Down-Conv's on a given node and return the resulting node. */
756 ir_node *ia32_skip_downconv(ir_node *node) {
757 while (is_downconv(node))
758 node = get_Conv_op(node);
764 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
766 ir_mode *mode = get_irn_mode(node);
771 if(mode_is_signed(mode)) {
776 block = get_nodes_block(node);
777 dbgi = get_irn_dbg_info(node);
779 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
784 * matches operands of a node into ia32 addressing/operand modes. This covers
785 * usage of source address mode, immediates, operations with non 32-bit modes,
787 * The resulting data is filled into the @p am struct. block is the block
788 * of the node whose arguments are matched. op1, op2 are the first and second
789 * input that are matched (op1 may be NULL). other_op is another unrelated
790 * input that is not matched! but which is needed sometimes to check if AM
791 * for op1/op2 is legal.
792 * @p flags describes the supported modes of the operation in detail.
794 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
795 ir_node *op1, ir_node *op2, ir_node *other_op,
798 ia32_address_t *addr = &am->addr;
799 ir_mode *mode = get_irn_mode(op2);
800 int mode_bits = get_mode_size_bits(mode);
801 ir_node *noreg_gp, *new_op1, *new_op2;
803 unsigned commutative;
804 int use_am_and_immediates;
807 memset(am, 0, sizeof(am[0]));
809 commutative = (flags & match_commutative) != 0;
810 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
811 use_am = (flags & match_am) != 0;
812 use_immediate = (flags & match_immediate) != 0;
813 assert(!use_am_and_immediates || use_immediate);
816 assert(!commutative || op1 != NULL);
817 assert(use_am || !(flags & match_8bit_am));
818 assert(use_am || !(flags & match_16bit_am));
820 if (mode_bits == 8) {
821 if (!(flags & match_8bit_am))
823 /* we don't automatically add upconvs yet */
824 assert((flags & match_mode_neutral) || (flags & match_8bit));
825 } else if (mode_bits == 16) {
826 if (!(flags & match_16bit_am))
828 /* we don't automatically add upconvs yet */
829 assert((flags & match_mode_neutral) || (flags & match_16bit));
832 /* we can simply skip downconvs for mode neutral nodes: the upper bits
833 * can be random for these operations */
834 if (flags & match_mode_neutral) {
835 op2 = ia32_skip_downconv(op2);
837 op1 = ia32_skip_downconv(op1);
841 /* match immediates. firm nodes are normalized: constants are always on the
844 if (!(flags & match_try_am) && use_immediate) {
845 new_op2 = try_create_Immediate(op2, 0);
848 noreg_gp = ia32_new_NoReg_gp(env_cg);
849 if (new_op2 == NULL &&
850 use_am && ia32_use_source_address_mode(block, op2, op1, other_op)) {
851 build_address(am, op2);
852 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
853 if (mode_is_float(mode)) {
854 new_op2 = ia32_new_NoReg_vfp(env_cg);
858 am->op_type = ia32_AddrModeS;
859 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
861 ia32_use_source_address_mode(block, op1, op2, other_op)) {
863 build_address(am, op1);
865 if (mode_is_float(mode)) {
866 noreg = ia32_new_NoReg_vfp(env_cg);
871 if (new_op2 != NULL) {
874 new_op1 = be_transform_node(op2);
876 am->ins_permuted = 1;
878 am->op_type = ia32_AddrModeS;
880 if (flags & match_try_am) {
883 am->op_type = ia32_Normal;
887 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
889 new_op2 = be_transform_node(op2);
890 am->op_type = ia32_Normal;
891 am->ls_mode = get_irn_mode(op2);
892 if (flags & match_mode_neutral)
893 am->ls_mode = mode_Iu;
895 if (addr->base == NULL)
896 addr->base = noreg_gp;
897 if (addr->index == NULL)
898 addr->index = noreg_gp;
899 if (addr->mem == NULL)
900 addr->mem = new_NoMem();
902 am->new_op1 = new_op1;
903 am->new_op2 = new_op2;
904 am->commutative = commutative;
907 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
912 if (am->mem_proj == NULL)
915 /* we have to create a mode_T so the old MemProj can attach to us */
916 mode = get_irn_mode(node);
917 load = get_Proj_pred(am->mem_proj);
919 mark_irn_visited(load);
920 be_set_transformed_node(load, node);
922 if (mode != mode_T) {
923 set_irn_mode(node, mode_T);
924 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
931 * Construct a standard binary operation, set AM and immediate if required.
933 * @param op1 The first operand
934 * @param op2 The second operand
935 * @param func The node constructor function
936 * @return The constructed ia32 node.
938 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
939 construct_binop_func *func, match_flags_t flags)
942 ir_node *block, *new_block, *new_node;
943 ia32_address_mode_t am;
944 ia32_address_t *addr = &am.addr;
946 block = get_nodes_block(node);
947 match_arguments(&am, block, op1, op2, NULL, flags);
949 dbgi = get_irn_dbg_info(node);
950 new_block = be_transform_node(block);
951 new_node = func(dbgi, current_ir_graph, new_block,
952 addr->base, addr->index, addr->mem,
953 am.new_op1, am.new_op2);
954 set_am_attributes(new_node, &am);
955 /* we can't use source address mode anymore when using immediates */
956 if (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
957 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
958 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
960 new_node = fix_mem_proj(new_node, &am);
967 n_ia32_l_binop_right,
968 n_ia32_l_binop_eflags
970 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
971 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
972 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
973 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
974 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
975 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
978 * Construct a binary operation which also consumes the eflags.
980 * @param node The node to transform
981 * @param func The node constructor function
982 * @param flags The match flags
983 * @return The constructor ia32 node
985 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
988 ir_node *src_block = get_nodes_block(node);
989 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
990 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
992 ir_node *block, *new_node, *eflags, *new_eflags;
993 ia32_address_mode_t am;
994 ia32_address_t *addr = &am.addr;
996 match_arguments(&am, src_block, op1, op2, NULL, flags);
998 dbgi = get_irn_dbg_info(node);
999 block = be_transform_node(src_block);
1000 eflags = get_irn_n(node, n_ia32_l_binop_eflags);
1001 new_eflags = be_transform_node(eflags);
1002 new_node = func(dbgi, current_ir_graph, block, addr->base, addr->index,
1003 addr->mem, am.new_op1, am.new_op2, new_eflags);
1004 set_am_attributes(new_node, &am);
1005 /* we can't use source address mode anymore when using immediates */
1006 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1007 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1008 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1010 new_node = fix_mem_proj(new_node, &am);
1015 static ir_node *get_fpcw(void)
1018 if (initial_fpcw != NULL)
1019 return initial_fpcw;
1021 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
1022 &ia32_fp_cw_regs[REG_FPCW]);
1023 initial_fpcw = be_transform_node(fpcw);
1025 return initial_fpcw;
1029 * Construct a standard binary operation, set AM and immediate if required.
1031 * @param op1 The first operand
1032 * @param op2 The second operand
1033 * @param func The node constructor function
1034 * @return The constructed ia32 node.
1036 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
1037 construct_binop_float_func *func,
1038 match_flags_t flags)
1040 ir_mode *mode = get_irn_mode(node);
1042 ir_node *block, *new_block, *new_node;
1043 ia32_address_mode_t am;
1044 ia32_address_t *addr = &am.addr;
1046 /* cannot use address mode with long double on x87 */
1047 if (get_mode_size_bits(mode) > 64)
1050 block = get_nodes_block(node);
1051 match_arguments(&am, block, op1, op2, NULL, flags);
1053 dbgi = get_irn_dbg_info(node);
1054 new_block = be_transform_node(block);
1055 new_node = func(dbgi, current_ir_graph, new_block,
1056 addr->base, addr->index, addr->mem,
1057 am.new_op1, am.new_op2, get_fpcw());
1058 set_am_attributes(new_node, &am);
1060 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1062 new_node = fix_mem_proj(new_node, &am);
1068 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1070 * @param op1 The first operand
1071 * @param op2 The second operand
1072 * @param func The node constructor function
1073 * @return The constructed ia32 node.
1075 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1076 construct_shift_func *func,
1077 match_flags_t flags)
1080 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1082 assert(! mode_is_float(get_irn_mode(node)));
1083 assert(flags & match_immediate);
1084 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1086 if (flags & match_mode_neutral) {
1087 op1 = ia32_skip_downconv(op1);
1088 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1089 panic("right shifting of non-32bit values not supported, yet");
1091 new_op1 = be_transform_node(op1);
1093 /* the shift amount can be any mode that is bigger than 5 bits, since all
1094 * other bits are ignored anyway */
1095 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1096 op2 = get_Conv_op(op2);
1097 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1099 new_op2 = create_immediate_or_transform(op2, 0);
1101 dbgi = get_irn_dbg_info(node);
1102 block = get_nodes_block(node);
1103 new_block = be_transform_node(block);
1104 new_node = func(dbgi, current_ir_graph, new_block, new_op1, new_op2);
1105 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1107 /* lowered shift instruction may have a dependency operand, handle it here */
1108 if (get_irn_arity(node) == 3) {
1109 /* we have a dependency */
1110 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1111 add_irn_dep(new_node, new_dep);
1119 * Construct a standard unary operation, set AM and immediate if required.
1121 * @param op The operand
1122 * @param func The node constructor function
1123 * @return The constructed ia32 node.
1125 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1126 match_flags_t flags)
1129 ir_node *block, *new_block, *new_op, *new_node;
1131 assert(flags == 0 || flags == match_mode_neutral);
1132 if (flags & match_mode_neutral) {
1133 op = ia32_skip_downconv(op);
1136 new_op = be_transform_node(op);
1137 dbgi = get_irn_dbg_info(node);
1138 block = get_nodes_block(node);
1139 new_block = be_transform_node(block);
1140 new_node = func(dbgi, current_ir_graph, new_block, new_op);
1142 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1147 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1148 ia32_address_t *addr)
1150 ir_node *base, *index, *res;
1154 base = ia32_new_NoReg_gp(env_cg);
1156 base = be_transform_node(base);
1159 index = addr->index;
1160 if (index == NULL) {
1161 index = ia32_new_NoReg_gp(env_cg);
1163 index = be_transform_node(index);
1166 res = new_rd_ia32_Lea(dbgi, current_ir_graph, block, base, index);
1167 set_address(res, addr);
1173 * Returns non-zero if a given address mode has a symbolic or
1174 * numerical offset != 0.
1176 static int am_has_immediates(const ia32_address_t *addr)
1178 return addr->offset != 0 || addr->symconst_ent != NULL
1179 || addr->frame_entity || addr->use_frame;
1183 * Creates an ia32 Add.
1185 * @return the created ia32 Add node
1187 static ir_node *gen_Add(ir_node *node) {
1188 ir_mode *mode = get_irn_mode(node);
1189 ir_node *op1 = get_Add_left(node);
1190 ir_node *op2 = get_Add_right(node);
1192 ir_node *block, *new_block, *new_node, *add_immediate_op;
1193 ia32_address_t addr;
1194 ia32_address_mode_t am;
1196 if (mode_is_float(mode)) {
1197 if (ia32_cg_config.use_sse2)
1198 return gen_binop(node, op1, op2, new_rd_ia32_xAdd,
1199 match_commutative | match_am);
1201 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfadd,
1202 match_commutative | match_am);
1205 ia32_mark_non_am(node);
1207 op2 = ia32_skip_downconv(op2);
1208 op1 = ia32_skip_downconv(op1);
1212 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1213 * 1. Add with immediate -> Lea
1214 * 2. Add with possible source address mode -> Add
1215 * 3. Otherwise -> Lea
1217 memset(&addr, 0, sizeof(addr));
1218 ia32_create_address_mode(&addr, node, /*force=*/1);
1219 add_immediate_op = NULL;
1221 dbgi = get_irn_dbg_info(node);
1222 block = get_nodes_block(node);
1223 new_block = be_transform_node(block);
1226 if(addr.base == NULL && addr.index == NULL) {
1227 ir_graph *irg = current_ir_graph;
1228 new_node = new_rd_ia32_Const(dbgi, irg, new_block, addr.symconst_ent,
1229 addr.symconst_sign, addr.offset);
1230 add_irn_dep(new_node, get_irg_frame(irg));
1231 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1234 /* add with immediate? */
1235 if(addr.index == NULL) {
1236 add_immediate_op = addr.base;
1237 } else if(addr.base == NULL && addr.scale == 0) {
1238 add_immediate_op = addr.index;
1241 if(add_immediate_op != NULL) {
1242 if(!am_has_immediates(&addr)) {
1243 #ifdef DEBUG_libfirm
1244 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1247 return be_transform_node(add_immediate_op);
1250 new_node = create_lea_from_address(dbgi, new_block, &addr);
1251 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1255 /* test if we can use source address mode */
1256 match_arguments(&am, block, op1, op2, NULL, match_commutative
1257 | match_mode_neutral | match_am | match_immediate | match_try_am);
1259 /* construct an Add with source address mode */
1260 if (am.op_type == ia32_AddrModeS) {
1261 ir_graph *irg = current_ir_graph;
1262 ia32_address_t *am_addr = &am.addr;
1263 new_node = new_rd_ia32_Add(dbgi, irg, new_block, am_addr->base,
1264 am_addr->index, am_addr->mem, am.new_op1,
1266 set_am_attributes(new_node, &am);
1267 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1269 new_node = fix_mem_proj(new_node, &am);
1274 /* otherwise construct a lea */
1275 new_node = create_lea_from_address(dbgi, new_block, &addr);
1276 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1281 * Creates an ia32 Mul.
1283 * @return the created ia32 Mul node
1285 static ir_node *gen_Mul(ir_node *node) {
1286 ir_node *op1 = get_Mul_left(node);
1287 ir_node *op2 = get_Mul_right(node);
1288 ir_mode *mode = get_irn_mode(node);
1290 if (mode_is_float(mode)) {
1291 if (ia32_cg_config.use_sse2)
1292 return gen_binop(node, op1, op2, new_rd_ia32_xMul,
1293 match_commutative | match_am);
1295 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
1296 match_commutative | match_am);
1298 return gen_binop(node, op1, op2, new_rd_ia32_IMul,
1299 match_commutative | match_am | match_mode_neutral |
1300 match_immediate | match_am_and_immediates);
1304 * Creates an ia32 Mulh.
1305 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1306 * this result while Mul returns the lower 32 bit.
1308 * @return the created ia32 Mulh node
1310 static ir_node *gen_Mulh(ir_node *node)
1312 ir_node *block = get_nodes_block(node);
1313 ir_node *new_block = be_transform_node(block);
1314 ir_graph *irg = current_ir_graph;
1315 dbg_info *dbgi = get_irn_dbg_info(node);
1316 ir_mode *mode = get_irn_mode(node);
1317 ir_node *op1 = get_Mulh_left(node);
1318 ir_node *op2 = get_Mulh_right(node);
1319 ir_node *proj_res_high;
1321 ia32_address_mode_t am;
1322 ia32_address_t *addr = &am.addr;
1324 assert(!mode_is_float(mode) && "Mulh with float not supported");
1325 assert(get_mode_size_bits(mode) == 32);
1327 match_arguments(&am, block, op1, op2, NULL, match_commutative | match_am);
1329 if (mode_is_signed(mode)) {
1330 new_node = new_rd_ia32_IMul1OP(dbgi, irg, new_block, addr->base,
1331 addr->index, addr->mem, am.new_op1,
1334 new_node = new_rd_ia32_Mul(dbgi, irg, new_block, addr->base,
1335 addr->index, addr->mem, am.new_op1,
1339 set_am_attributes(new_node, &am);
1340 /* we can't use source address mode anymore when using immediates */
1341 if(is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2))
1342 set_ia32_am_support(new_node, ia32_am_None, ia32_am_arity_none);
1343 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1345 assert(get_irn_mode(new_node) == mode_T);
1347 fix_mem_proj(new_node, &am);
1349 assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
1350 proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
1351 mode_Iu, pn_ia32_IMul1OP_res_high);
1353 return proj_res_high;
1359 * Creates an ia32 And.
1361 * @return The created ia32 And node
1363 static ir_node *gen_And(ir_node *node) {
1364 ir_node *op1 = get_And_left(node);
1365 ir_node *op2 = get_And_right(node);
1366 assert(! mode_is_float(get_irn_mode(node)));
1368 /* is it a zero extension? */
1369 if (is_Const(op2)) {
1370 tarval *tv = get_Const_tarval(op2);
1371 long v = get_tarval_long(tv);
1373 if (v == 0xFF || v == 0xFFFF) {
1374 dbg_info *dbgi = get_irn_dbg_info(node);
1375 ir_node *block = get_nodes_block(node);
1382 assert(v == 0xFFFF);
1385 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1391 return gen_binop(node, op1, op2, new_rd_ia32_And,
1392 match_commutative | match_mode_neutral | match_am
1399 * Creates an ia32 Or.
1401 * @return The created ia32 Or node
1403 static ir_node *gen_Or(ir_node *node) {
1404 ir_node *op1 = get_Or_left(node);
1405 ir_node *op2 = get_Or_right(node);
1407 assert (! mode_is_float(get_irn_mode(node)));
1408 return gen_binop(node, op1, op2, new_rd_ia32_Or, match_commutative
1409 | match_mode_neutral | match_am | match_immediate);
1415 * Creates an ia32 Eor.
1417 * @return The created ia32 Eor node
1419 static ir_node *gen_Eor(ir_node *node) {
1420 ir_node *op1 = get_Eor_left(node);
1421 ir_node *op2 = get_Eor_right(node);
1423 assert(! mode_is_float(get_irn_mode(node)));
1424 return gen_binop(node, op1, op2, new_rd_ia32_Xor, match_commutative
1425 | match_mode_neutral | match_am | match_immediate);
1430 * Creates an ia32 Sub.
1432 * @return The created ia32 Sub node
1434 static ir_node *gen_Sub(ir_node *node) {
1435 ir_node *op1 = get_Sub_left(node);
1436 ir_node *op2 = get_Sub_right(node);
1437 ir_mode *mode = get_irn_mode(node);
1439 if (mode_is_float(mode)) {
1440 if (ia32_cg_config.use_sse2)
1441 return gen_binop(node, op1, op2, new_rd_ia32_xSub, match_am);
1443 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfsub,
1447 if (is_Const(op2)) {
1448 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1452 return gen_binop(node, op1, op2, new_rd_ia32_Sub, match_mode_neutral
1453 | match_am | match_immediate);
1457 * Generates an ia32 DivMod with additional infrastructure for the
1458 * register allocator if needed.
1460 static ir_node *create_Div(ir_node *node)
1462 ir_graph *irg = current_ir_graph;
1463 dbg_info *dbgi = get_irn_dbg_info(node);
1464 ir_node *block = get_nodes_block(node);
1465 ir_node *new_block = be_transform_node(block);
1472 ir_node *sign_extension;
1473 ia32_address_mode_t am;
1474 ia32_address_t *addr = &am.addr;
1476 /* the upper bits have random contents for smaller modes */
1477 switch (get_irn_opcode(node)) {
1479 op1 = get_Div_left(node);
1480 op2 = get_Div_right(node);
1481 mem = get_Div_mem(node);
1482 mode = get_Div_resmode(node);
1485 op1 = get_Mod_left(node);
1486 op2 = get_Mod_right(node);
1487 mem = get_Mod_mem(node);
1488 mode = get_Mod_resmode(node);
1491 op1 = get_DivMod_left(node);
1492 op2 = get_DivMod_right(node);
1493 mem = get_DivMod_mem(node);
1494 mode = get_DivMod_resmode(node);
1497 panic("invalid divmod node %+F", node);
1500 match_arguments(&am, block, op1, op2, NULL, match_am);
1502 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1503 is the memory of the consumed address. We can have only the second op as address
1504 in Div nodes, so check only op2. */
1505 if(!is_NoMem(mem) && skip_Proj(mem) != skip_Proj(op2)) {
1506 new_mem = be_transform_node(mem);
1507 if(!is_NoMem(addr->mem)) {
1511 new_mem = new_rd_Sync(dbgi, irg, new_block, 2, in);
1514 new_mem = addr->mem;
1517 if (mode_is_signed(mode)) {
1518 ir_node *produceval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1519 add_irn_dep(produceval, get_irg_frame(irg));
1520 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block, am.new_op1,
1523 new_node = new_rd_ia32_IDiv(dbgi, irg, new_block, addr->base,
1524 addr->index, new_mem, am.new_op2,
1525 am.new_op1, sign_extension);
1527 sign_extension = new_rd_ia32_Const(dbgi, irg, new_block, NULL, 0, 0);
1528 add_irn_dep(sign_extension, get_irg_frame(irg));
1530 new_node = new_rd_ia32_Div(dbgi, irg, new_block, addr->base,
1531 addr->index, new_mem, am.new_op2,
1532 am.new_op1, sign_extension);
1535 set_irn_pinned(new_node, get_irn_pinned(node));
1537 set_am_attributes(new_node, &am);
1538 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1540 new_node = fix_mem_proj(new_node, &am);
1546 static ir_node *gen_Mod(ir_node *node) {
1547 return create_Div(node);
1550 static ir_node *gen_Div(ir_node *node) {
1551 return create_Div(node);
1554 static ir_node *gen_DivMod(ir_node *node) {
1555 return create_Div(node);
1561 * Creates an ia32 floating Div.
1563 * @return The created ia32 xDiv node
1565 static ir_node *gen_Quot(ir_node *node)
1567 ir_node *op1 = get_Quot_left(node);
1568 ir_node *op2 = get_Quot_right(node);
1570 if (ia32_cg_config.use_sse2) {
1571 return gen_binop(node, op1, op2, new_rd_ia32_xDiv, match_am);
1573 return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfdiv, match_am);
1579 * Creates an ia32 Shl.
1581 * @return The created ia32 Shl node
1583 static ir_node *gen_Shl(ir_node *node) {
1584 ir_node *left = get_Shl_left(node);
1585 ir_node *right = get_Shl_right(node);
1587 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
1588 match_mode_neutral | match_immediate);
1592 * Creates an ia32 Shr.
1594 * @return The created ia32 Shr node
1596 static ir_node *gen_Shr(ir_node *node) {
1597 ir_node *left = get_Shr_left(node);
1598 ir_node *right = get_Shr_right(node);
1600 return gen_shift_binop(node, left, right, new_rd_ia32_Shr, match_immediate);
1606 * Creates an ia32 Sar.
1608 * @return The created ia32 Shrs node
1610 static ir_node *gen_Shrs(ir_node *node) {
1611 ir_node *left = get_Shrs_left(node);
1612 ir_node *right = get_Shrs_right(node);
1613 ir_mode *mode = get_irn_mode(node);
1615 if(is_Const(right) && mode == mode_Is) {
1616 tarval *tv = get_Const_tarval(right);
1617 long val = get_tarval_long(tv);
1619 /* this is a sign extension */
1620 ir_graph *irg = current_ir_graph;
1621 dbg_info *dbgi = get_irn_dbg_info(node);
1622 ir_node *block = be_transform_node(get_nodes_block(node));
1624 ir_node *new_op = be_transform_node(op);
1625 ir_node *pval = new_rd_ia32_ProduceVal(dbgi, irg, block);
1626 add_irn_dep(pval, get_irg_frame(irg));
1628 return new_rd_ia32_Cltd(dbgi, irg, block, new_op, pval);
1632 /* 8 or 16 bit sign extension? */
1633 if(is_Const(right) && is_Shl(left) && mode == mode_Is) {
1634 ir_node *shl_left = get_Shl_left(left);
1635 ir_node *shl_right = get_Shl_right(left);
1636 if(is_Const(shl_right)) {
1637 tarval *tv1 = get_Const_tarval(right);
1638 tarval *tv2 = get_Const_tarval(shl_right);
1639 if(tv1 == tv2 && tarval_is_long(tv1)) {
1640 long val = get_tarval_long(tv1);
1641 if(val == 16 || val == 24) {
1642 dbg_info *dbgi = get_irn_dbg_info(node);
1643 ir_node *block = get_nodes_block(node);
1653 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1662 return gen_shift_binop(node, left, right, new_rd_ia32_Sar, match_immediate);
1668 * Creates an ia32 RotL.
1670 * @param op1 The first operator
1671 * @param op2 The second operator
1672 * @return The created ia32 RotL node
1674 static ir_node *gen_RotL(ir_node *node, ir_node *op1, ir_node *op2) {
1675 return gen_shift_binop(node, op1, op2, new_rd_ia32_Rol, match_immediate);
1681 * Creates an ia32 RotR.
1682 * NOTE: There is no RotR with immediate because this would always be a RotL
1683 * "imm-mode_size_bits" which can be pre-calculated.
1685 * @param op1 The first operator
1686 * @param op2 The second operator
1687 * @return The created ia32 RotR node
1689 static ir_node *gen_RotR(ir_node *node, ir_node *op1, ir_node *op2) {
1690 return gen_shift_binop(node, op1, op2, new_rd_ia32_Ror, match_immediate);
1696 * Creates an ia32 RotR or RotL (depending on the found pattern).
1698 * @return The created ia32 RotL or RotR node
1700 static ir_node *gen_Rot(ir_node *node) {
1701 ir_node *rotate = NULL;
1702 ir_node *op1 = get_Rot_left(node);
1703 ir_node *op2 = get_Rot_right(node);
1705 /* Firm has only Rot (which is a RotL), so we are looking for a right (op2)
1706 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1707 that means we can create a RotR instead of an Add and a RotL */
1709 if (get_irn_op(op2) == op_Add) {
1711 ir_node *left = get_Add_left(add);
1712 ir_node *right = get_Add_right(add);
1713 if (is_Const(right)) {
1714 tarval *tv = get_Const_tarval(right);
1715 ir_mode *mode = get_irn_mode(node);
1716 long bits = get_mode_size_bits(mode);
1718 if (get_irn_op(left) == op_Minus &&
1719 tarval_is_long(tv) &&
1720 get_tarval_long(tv) == bits &&
1723 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1724 rotate = gen_RotR(node, op1, get_Minus_op(left));
1729 if (rotate == NULL) {
1730 rotate = gen_RotL(node, op1, op2);
1739 * Transforms a Minus node.
1741 * @return The created ia32 Minus node
1743 static ir_node *gen_Minus(ir_node *node)
1745 ir_node *op = get_Minus_op(node);
1746 ir_node *block = be_transform_node(get_nodes_block(node));
1747 ir_graph *irg = current_ir_graph;
1748 dbg_info *dbgi = get_irn_dbg_info(node);
1749 ir_mode *mode = get_irn_mode(node);
1754 if (mode_is_float(mode)) {
1755 ir_node *new_op = be_transform_node(op);
1756 if (ia32_cg_config.use_sse2) {
1757 /* TODO: non-optimal... if we have many xXors, then we should
1758 * rather create a load for the const and use that instead of
1759 * several AM nodes... */
1760 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1761 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1762 ir_node *nomem = new_rd_NoMem(irg);
1764 new_node = new_rd_ia32_xXor(dbgi, irg, block, noreg_gp, noreg_gp,
1765 nomem, new_op, noreg_xmm);
1767 size = get_mode_size_bits(mode);
1768 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1770 set_ia32_am_sc(new_node, ent);
1771 set_ia32_op_type(new_node, ia32_AddrModeS);
1772 set_ia32_ls_mode(new_node, mode);
1774 new_node = new_rd_ia32_vfchs(dbgi, irg, block, new_op);
1777 new_node = gen_unop(node, op, new_rd_ia32_Neg, match_mode_neutral);
1780 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1786 * Transforms a Not node.
1788 * @return The created ia32 Not node
1790 static ir_node *gen_Not(ir_node *node) {
1791 ir_node *op = get_Not_op(node);
1793 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1794 assert (! mode_is_float(get_irn_mode(node)));
1796 return gen_unop(node, op, new_rd_ia32_Not, match_mode_neutral);
1802 * Transforms an Abs node.
1804 * @return The created ia32 Abs node
1806 static ir_node *gen_Abs(ir_node *node)
1808 ir_node *block = get_nodes_block(node);
1809 ir_node *new_block = be_transform_node(block);
1810 ir_node *op = get_Abs_op(node);
1811 ir_graph *irg = current_ir_graph;
1812 dbg_info *dbgi = get_irn_dbg_info(node);
1813 ir_mode *mode = get_irn_mode(node);
1814 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1815 ir_node *nomem = new_NoMem();
1821 if (mode_is_float(mode)) {
1822 new_op = be_transform_node(op);
1824 if (ia32_cg_config.use_sse2) {
1825 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1826 new_node = new_rd_ia32_xAnd(dbgi,irg, new_block, noreg_gp, noreg_gp,
1827 nomem, new_op, noreg_fp);
1829 size = get_mode_size_bits(mode);
1830 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1832 set_ia32_am_sc(new_node, ent);
1834 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1836 set_ia32_op_type(new_node, ia32_AddrModeS);
1837 set_ia32_ls_mode(new_node, mode);
1839 new_node = new_rd_ia32_vfabs(dbgi, irg, new_block, new_op);
1840 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1843 ir_node *xor, *pval, *sign_extension;
1845 if (get_mode_size_bits(mode) == 32) {
1846 new_op = be_transform_node(op);
1848 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1851 pval = new_rd_ia32_ProduceVal(dbgi, irg, new_block);
1852 sign_extension = new_rd_ia32_Cltd(dbgi, irg, new_block,
1855 add_irn_dep(pval, get_irg_frame(irg));
1856 SET_IA32_ORIG_NODE(sign_extension,ia32_get_old_node_name(env_cg, node));
1858 xor = new_rd_ia32_Xor(dbgi, irg, new_block, noreg_gp, noreg_gp,
1859 nomem, new_op, sign_extension);
1860 SET_IA32_ORIG_NODE(xor, ia32_get_old_node_name(env_cg, node));
1862 new_node = new_rd_ia32_Sub(dbgi, irg, new_block, noreg_gp, noreg_gp,
1863 nomem, xor, sign_extension);
1864 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1870 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1872 ir_graph *irg = current_ir_graph;
1880 /* we have a Cmp as input */
1882 ir_node *pred = get_Proj_pred(node);
1884 flags = be_transform_node(pred);
1885 *pnc_out = get_Proj_proj(node);
1890 /* a mode_b value, we have to compare it against 0 */
1891 dbgi = get_irn_dbg_info(node);
1892 new_block = be_transform_node(get_nodes_block(node));
1893 new_op = be_transform_node(node);
1894 noreg = ia32_new_NoReg_gp(env_cg);
1895 nomem = new_NoMem();
1896 flags = new_rd_ia32_Test(dbgi, irg, new_block, noreg, noreg, nomem,
1897 new_op, new_op, 0, 0);
1898 *pnc_out = pn_Cmp_Lg;
1903 * Transforms a Load.
1905 * @return the created ia32 Load node
1907 static ir_node *gen_Load(ir_node *node) {
1908 ir_node *old_block = get_nodes_block(node);
1909 ir_node *block = be_transform_node(old_block);
1910 ir_node *ptr = get_Load_ptr(node);
1911 ir_node *mem = get_Load_mem(node);
1912 ir_node *new_mem = be_transform_node(mem);
1915 ir_graph *irg = current_ir_graph;
1916 dbg_info *dbgi = get_irn_dbg_info(node);
1917 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1918 ir_mode *mode = get_Load_mode(node);
1921 ia32_address_t addr;
1923 /* construct load address */
1924 memset(&addr, 0, sizeof(addr));
1925 ia32_create_address_mode(&addr, ptr, /*force=*/0);
1932 base = be_transform_node(base);
1938 index = be_transform_node(index);
1941 if (mode_is_float(mode)) {
1942 if (ia32_cg_config.use_sse2) {
1943 new_node = new_rd_ia32_xLoad(dbgi, irg, block, base, index, new_mem,
1945 res_mode = mode_xmm;
1947 new_node = new_rd_ia32_vfld(dbgi, irg, block, base, index, new_mem,
1949 res_mode = mode_vfp;
1952 assert(mode != mode_b);
1954 /* create a conv node with address mode for smaller modes */
1955 if(get_mode_size_bits(mode) < 32) {
1956 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, block, base, index,
1957 new_mem, noreg, mode);
1959 new_node = new_rd_ia32_Load(dbgi, irg, block, base, index, new_mem);
1964 set_irn_pinned(new_node, get_irn_pinned(node));
1965 set_ia32_op_type(new_node, ia32_AddrModeS);
1966 set_ia32_ls_mode(new_node, mode);
1967 set_address(new_node, &addr);
1969 if(get_irn_pinned(node) == op_pin_state_floats) {
1970 add_ia32_flags(new_node, arch_irn_flags_rematerializable);
1973 /* make sure we are scheduled behind the initial IncSP/Barrier
1974 * to avoid spills being placed before it
1976 if (block == get_irg_start_block(irg)) {
1977 add_irn_dep(new_node, get_irg_frame(irg));
1980 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
1985 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
1986 ir_node *ptr, ir_node *other)
1993 /* we only use address mode if we're the only user of the load */
1994 if(get_irn_n_edges(node) > 1)
1997 load = get_Proj_pred(node);
2000 if(get_nodes_block(load) != block)
2003 /* Store should be attached to the load */
2004 if(!is_Proj(mem) || get_Proj_pred(mem) != load)
2006 /* store should have the same pointer as the load */
2007 if(get_Load_ptr(load) != ptr)
2010 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2011 if(other != NULL && get_nodes_block(other) == block
2012 && heights_reachable_in_block(heights, other, load))
2018 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2019 ir_node *mem, ir_node *ptr, ir_mode *mode,
2020 construct_binop_dest_func *func,
2021 construct_binop_dest_func *func8bit,
2022 match_flags_t flags)
2024 ir_node *src_block = get_nodes_block(node);
2026 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
2027 ir_graph *irg = current_ir_graph;
2032 ia32_address_mode_t am;
2033 ia32_address_t *addr = &am.addr;
2034 memset(&am, 0, sizeof(am));
2036 assert(flags & match_dest_am);
2037 assert(flags & match_immediate); /* there is no destam node without... */
2038 commutative = (flags & match_commutative) != 0;
2040 if(use_dest_am(src_block, op1, mem, ptr, op2)) {
2041 build_address(&am, op1);
2042 new_op = create_immediate_or_transform(op2, 0);
2043 } else if(commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2044 build_address(&am, op2);
2045 new_op = create_immediate_or_transform(op1, 0);
2050 if(addr->base == NULL)
2051 addr->base = noreg_gp;
2052 if(addr->index == NULL)
2053 addr->index = noreg_gp;
2054 if(addr->mem == NULL)
2055 addr->mem = new_NoMem();
2057 dbgi = get_irn_dbg_info(node);
2058 block = be_transform_node(src_block);
2059 if(get_mode_size_bits(mode) == 8) {
2060 new_node = func8bit(dbgi, irg, block, addr->base, addr->index,
2063 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem,
2066 set_address(new_node, addr);
2067 set_ia32_op_type(new_node, ia32_AddrModeD);
2068 set_ia32_ls_mode(new_node, mode);
2069 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2074 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2075 ir_node *ptr, ir_mode *mode,
2076 construct_unop_dest_func *func)
2078 ir_graph *irg = current_ir_graph;
2079 ir_node *src_block = get_nodes_block(node);
2083 ia32_address_mode_t am;
2084 ia32_address_t *addr = &am.addr;
2085 memset(&am, 0, sizeof(am));
2087 if(!use_dest_am(src_block, op, mem, ptr, NULL))
2090 build_address(&am, op);
2092 dbgi = get_irn_dbg_info(node);
2093 block = be_transform_node(src_block);
2094 new_node = func(dbgi, irg, block, addr->base, addr->index, addr->mem);
2095 set_address(new_node, addr);
2096 set_ia32_op_type(new_node, ia32_AddrModeD);
2097 set_ia32_ls_mode(new_node, mode);
2098 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2103 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem) {
2104 ir_mode *mode = get_irn_mode(node);
2105 ir_node *psi_true = get_Psi_val(node, 0);
2106 ir_node *psi_default = get_Psi_default(node);
2117 ia32_address_t addr;
2119 if(get_mode_size_bits(mode) != 8)
2122 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2124 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2130 build_address_ptr(&addr, ptr, mem);
2132 irg = current_ir_graph;
2133 dbgi = get_irn_dbg_info(node);
2134 block = get_nodes_block(node);
2135 new_block = be_transform_node(block);
2136 cond = get_Psi_cond(node, 0);
2137 flags = get_flags_node(cond, &pnc);
2138 new_mem = be_transform_node(mem);
2139 new_node = new_rd_ia32_SetMem(dbgi, irg, new_block, addr.base,
2140 addr.index, addr.mem, flags, pnc, negated);
2141 set_address(new_node, &addr);
2142 set_ia32_op_type(new_node, ia32_AddrModeD);
2143 set_ia32_ls_mode(new_node, mode);
2144 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2149 static ir_node *try_create_dest_am(ir_node *node) {
2150 ir_node *val = get_Store_value(node);
2151 ir_node *mem = get_Store_mem(node);
2152 ir_node *ptr = get_Store_ptr(node);
2153 ir_mode *mode = get_irn_mode(val);
2154 unsigned bits = get_mode_size_bits(mode);
2159 /* handle only GP modes for now... */
2160 if(!mode_needs_gp_reg(mode))
2164 /* store must be the only user of the val node */
2165 if(get_irn_n_edges(val) > 1)
2167 /* skip pointless convs */
2169 ir_node *conv_op = get_Conv_op(val);
2170 ir_mode *pred_mode = get_irn_mode(conv_op);
2171 if(pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2179 /* value must be in the same block */
2180 if(get_nodes_block(node) != get_nodes_block(val))
2183 switch(get_irn_opcode(val)) {
2185 op1 = get_Add_left(val);
2186 op2 = get_Add_right(val);
2187 if(is_Const_1(op2)) {
2188 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2189 new_rd_ia32_IncMem);
2191 } else if(is_Const_Minus_1(op2)) {
2192 new_node = dest_am_unop(val, op1, mem, ptr, mode,
2193 new_rd_ia32_DecMem);
2196 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2197 new_rd_ia32_AddMem, new_rd_ia32_AddMem8Bit,
2198 match_dest_am | match_commutative |
2202 op1 = get_Sub_left(val);
2203 op2 = get_Sub_right(val);
2205 ir_fprintf(stderr, "Optimisation warning: not-normalize sub ,C"
2208 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2209 new_rd_ia32_SubMem, new_rd_ia32_SubMem8Bit,
2210 match_dest_am | match_immediate |
2214 op1 = get_And_left(val);
2215 op2 = get_And_right(val);
2216 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2217 new_rd_ia32_AndMem, new_rd_ia32_AndMem8Bit,
2218 match_dest_am | match_commutative |
2222 op1 = get_Or_left(val);
2223 op2 = get_Or_right(val);
2224 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2225 new_rd_ia32_OrMem, new_rd_ia32_OrMem8Bit,
2226 match_dest_am | match_commutative |
2230 op1 = get_Eor_left(val);
2231 op2 = get_Eor_right(val);
2232 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2233 new_rd_ia32_XorMem, new_rd_ia32_XorMem8Bit,
2234 match_dest_am | match_commutative |
2238 op1 = get_Shl_left(val);
2239 op2 = get_Shl_right(val);
2240 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2241 new_rd_ia32_ShlMem, new_rd_ia32_ShlMem,
2242 match_dest_am | match_immediate);
2245 op1 = get_Shr_left(val);
2246 op2 = get_Shr_right(val);
2247 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2248 new_rd_ia32_ShrMem, new_rd_ia32_ShrMem,
2249 match_dest_am | match_immediate);
2252 op1 = get_Shrs_left(val);
2253 op2 = get_Shrs_right(val);
2254 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2255 new_rd_ia32_SarMem, new_rd_ia32_SarMem,
2256 match_dest_am | match_immediate);
2259 op1 = get_Rot_left(val);
2260 op2 = get_Rot_right(val);
2261 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2262 new_rd_ia32_RolMem, new_rd_ia32_RolMem,
2263 match_dest_am | match_immediate);
2265 /* TODO: match ROR patterns... */
2267 new_node = try_create_SetMem(val, ptr, mem);
2270 op1 = get_Minus_op(val);
2271 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NegMem);
2274 /* should be lowered already */
2275 assert(mode != mode_b);
2276 op1 = get_Not_op(val);
2277 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_rd_ia32_NotMem);
2283 if(new_node != NULL) {
2284 if(get_irn_pinned(new_node) != op_pin_state_pinned &&
2285 get_irn_pinned(node) == op_pin_state_pinned) {
2286 set_irn_pinned(new_node, op_pin_state_pinned);
2293 static int is_float_to_int32_conv(const ir_node *node)
2295 ir_mode *mode = get_irn_mode(node);
2299 if(get_mode_size_bits(mode) != 32 || !mode_needs_gp_reg(mode))
2304 conv_op = get_Conv_op(node);
2305 conv_mode = get_irn_mode(conv_op);
2307 if(!mode_is_float(conv_mode))
2314 * Transform a Store(floatConst).
2316 * @return the created ia32 Store node
2318 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns) {
2319 ir_mode *mode = get_irn_mode(cns);
2320 int size = get_mode_size_bits(mode);
2321 tarval *tv = get_Const_tarval(cns);
2322 ir_node *block = get_nodes_block(node);
2323 ir_node *new_block = be_transform_node(block);
2324 ir_node *ptr = get_Store_ptr(node);
2325 ir_node *mem = get_Store_mem(node);
2326 ir_graph *irg = current_ir_graph;
2327 dbg_info *dbgi = get_irn_dbg_info(node);
2328 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2331 ia32_address_t addr;
2333 unsigned val = get_tarval_sub_bits(tv, 0) |
2334 (get_tarval_sub_bits(tv, 1) << 8) |
2335 (get_tarval_sub_bits(tv, 2) << 16) |
2336 (get_tarval_sub_bits(tv, 3) << 24);
2337 ir_node *imm = create_Immediate(NULL, 0, val);
2339 /* construct store address */
2340 memset(&addr, 0, sizeof(addr));
2341 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2343 if (addr.base == NULL) {
2346 addr.base = be_transform_node(addr.base);
2349 if (addr.index == NULL) {
2352 addr.index = be_transform_node(addr.index);
2354 addr.mem = be_transform_node(mem);
2356 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2357 addr.index, addr.mem, imm);
2359 set_irn_pinned(new_node, get_irn_pinned(node));
2360 set_ia32_op_type(new_node, ia32_AddrModeD);
2361 set_ia32_ls_mode(new_node, mode_Iu);
2363 set_address(new_node, &addr);
2365 /** add more stores if needed */
2367 unsigned val = get_tarval_sub_bits(tv, ofs) |
2368 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2369 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2370 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2371 ir_node *imm = create_Immediate(NULL, 0, val);
2374 addr.mem = new_node;
2376 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2377 addr.index, addr.mem, imm);
2379 set_irn_pinned(new_node, get_irn_pinned(node));
2380 set_ia32_op_type(new_node, ia32_AddrModeD);
2381 set_ia32_ls_mode(new_node, mode_Iu);
2383 set_address(new_node, &addr);
2388 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2393 * Transforms a normal Store.
2395 * @return the created ia32 Store node
2397 static ir_node *gen_normal_Store(ir_node *node)
2399 ir_node *val = get_Store_value(node);
2400 ir_mode *mode = get_irn_mode(val);
2401 ir_node *block = get_nodes_block(node);
2402 ir_node *new_block = be_transform_node(block);
2403 ir_node *ptr = get_Store_ptr(node);
2404 ir_node *mem = get_Store_mem(node);
2405 ir_graph *irg = current_ir_graph;
2406 dbg_info *dbgi = get_irn_dbg_info(node);
2407 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2410 ia32_address_t addr;
2412 /* check for destination address mode */
2413 new_node = try_create_dest_am(node);
2414 if (new_node != NULL)
2417 /* construct store address */
2418 memset(&addr, 0, sizeof(addr));
2419 ia32_create_address_mode(&addr, ptr, /*force=*/0);
2421 if (addr.base == NULL) {
2424 addr.base = be_transform_node(addr.base);
2427 if (addr.index == NULL) {
2430 addr.index = be_transform_node(addr.index);
2432 addr.mem = be_transform_node(mem);
2434 if (mode_is_float(mode)) {
2435 /* convs (and strict-convs) before stores are unnecessary if the mode
2437 while (is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2438 val = get_Conv_op(val);
2440 new_val = be_transform_node(val);
2441 if (ia32_cg_config.use_sse2) {
2442 new_node = new_rd_ia32_xStore(dbgi, irg, new_block, addr.base,
2443 addr.index, addr.mem, new_val);
2445 new_node = new_rd_ia32_vfst(dbgi, irg, new_block, addr.base,
2446 addr.index, addr.mem, new_val, mode);
2448 } else if (is_float_to_int32_conv(val)) {
2449 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2450 val = get_Conv_op(val);
2452 /* convs (and strict-convs) before stores are unnecessary if the mode
2454 while(is_Conv(val) && mode == get_irn_mode(get_Conv_op(val))) {
2455 val = get_Conv_op(val);
2457 new_val = be_transform_node(val);
2459 new_node = new_rd_ia32_vfist(dbgi, irg, new_block, addr.base,
2460 addr.index, addr.mem, new_val, trunc_mode);
2462 new_val = create_immediate_or_transform(val, 0);
2463 assert(mode != mode_b);
2465 if (get_mode_size_bits(mode) == 8) {
2466 new_node = new_rd_ia32_Store8Bit(dbgi, irg, new_block, addr.base,
2467 addr.index, addr.mem, new_val);
2469 new_node = new_rd_ia32_Store(dbgi, irg, new_block, addr.base,
2470 addr.index, addr.mem, new_val);
2474 set_irn_pinned(new_node, get_irn_pinned(node));
2475 set_ia32_op_type(new_node, ia32_AddrModeD);
2476 set_ia32_ls_mode(new_node, mode);
2478 set_address(new_node, &addr);
2479 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2485 * Transforms a Store.
2487 * @return the created ia32 Store node
2489 static ir_node *gen_Store(ir_node *node)
2491 ir_node *val = get_Store_value(node);
2492 ir_mode *mode = get_irn_mode(val);
2494 if (mode_is_float(mode) && is_Const(val)) {
2497 /* we are storing a floating point constant */
2498 if (ia32_cg_config.use_sse2) {
2499 transform = !is_simple_sse_Const(val);
2501 transform = !is_simple_x87_Const(val);
2504 return gen_float_const_Store(node, val);
2506 return gen_normal_Store(node);
2510 * Transforms a Switch.
2512 * @return the created ia32 SwitchJmp node
2514 static ir_node *create_Switch(ir_node *node)
2516 ir_graph *irg = current_ir_graph;
2517 dbg_info *dbgi = get_irn_dbg_info(node);
2518 ir_node *block = be_transform_node(get_nodes_block(node));
2519 ir_node *sel = get_Cond_selector(node);
2520 ir_node *new_sel = be_transform_node(sel);
2521 int switch_min = INT_MAX;
2522 int switch_max = INT_MIN;
2523 long default_pn = get_Cond_defaultProj(node);
2525 const ir_edge_t *edge;
2527 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2529 /* determine the smallest switch case value */
2530 foreach_out_edge(node, edge) {
2531 ir_node *proj = get_edge_src_irn(edge);
2532 long pn = get_Proj_proj(proj);
2533 if(pn == default_pn)
2542 if((unsigned) (switch_max - switch_min) > 256000) {
2543 panic("Size of switch %+F bigger than 256000", node);
2546 if (switch_min != 0) {
2547 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2549 /* if smallest switch case is not 0 we need an additional sub */
2550 new_sel = new_rd_ia32_Lea(dbgi, irg, block, new_sel, noreg);
2551 add_ia32_am_offs_int(new_sel, -switch_min);
2552 set_ia32_op_type(new_sel, ia32_AddrModeS);
2554 SET_IA32_ORIG_NODE(new_sel, ia32_get_old_node_name(env_cg, node));
2557 new_node = new_rd_ia32_SwitchJmp(dbgi, irg, block, new_sel, default_pn);
2558 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2564 * Transform a Cond node.
2566 static ir_node *gen_Cond(ir_node *node) {
2567 ir_node *block = get_nodes_block(node);
2568 ir_node *new_block = be_transform_node(block);
2569 ir_graph *irg = current_ir_graph;
2570 dbg_info *dbgi = get_irn_dbg_info(node);
2571 ir_node *sel = get_Cond_selector(node);
2572 ir_mode *sel_mode = get_irn_mode(sel);
2573 ir_node *flags = NULL;
2577 if (sel_mode != mode_b) {
2578 return create_Switch(node);
2581 /* we get flags from a cmp */
2582 flags = get_flags_node(sel, &pnc);
2584 new_node = new_rd_ia32_Jcc(dbgi, irg, new_block, flags, pnc);
2585 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2591 * Transforms a CopyB node.
2593 * @return The transformed node.
2595 static ir_node *gen_CopyB(ir_node *node) {
2596 ir_node *block = be_transform_node(get_nodes_block(node));
2597 ir_node *src = get_CopyB_src(node);
2598 ir_node *new_src = be_transform_node(src);
2599 ir_node *dst = get_CopyB_dst(node);
2600 ir_node *new_dst = be_transform_node(dst);
2601 ir_node *mem = get_CopyB_mem(node);
2602 ir_node *new_mem = be_transform_node(mem);
2603 ir_node *res = NULL;
2604 ir_graph *irg = current_ir_graph;
2605 dbg_info *dbgi = get_irn_dbg_info(node);
2606 int size = get_type_size_bytes(get_CopyB_type(node));
2609 /* If we have to copy more than 32 bytes, we use REP MOVSx and */
2610 /* then we need the size explicitly in ECX. */
2611 if (size >= 32 * 4) {
2612 rem = size & 0x3; /* size % 4 */
2615 res = new_rd_ia32_Const(dbgi, irg, block, NULL, 0, size);
2616 add_irn_dep(res, get_irg_frame(irg));
2618 res = new_rd_ia32_CopyB(dbgi, irg, block, new_dst, new_src, res, new_mem, rem);
2621 ir_fprintf(stderr, "Optimisation warning copyb %+F with size <4\n",
2624 res = new_rd_ia32_CopyB_i(dbgi, irg, block, new_dst, new_src, new_mem, size);
2627 SET_IA32_ORIG_NODE(res, ia32_get_old_node_name(env_cg, node));
2632 static ir_node *gen_be_Copy(ir_node *node)
2634 ir_node *new_node = be_duplicate_node(node);
2635 ir_mode *mode = get_irn_mode(new_node);
2637 if (mode_needs_gp_reg(mode)) {
2638 set_irn_mode(new_node, mode_Iu);
2644 static ir_node *create_Fucom(ir_node *node)
2646 ir_graph *irg = current_ir_graph;
2647 dbg_info *dbgi = get_irn_dbg_info(node);
2648 ir_node *block = get_nodes_block(node);
2649 ir_node *new_block = be_transform_node(block);
2650 ir_node *left = get_Cmp_left(node);
2651 ir_node *new_left = be_transform_node(left);
2652 ir_node *right = get_Cmp_right(node);
2656 if(ia32_cg_config.use_fucomi) {
2657 new_right = be_transform_node(right);
2658 new_node = new_rd_ia32_vFucomi(dbgi, irg, new_block, new_left,
2660 set_ia32_commutative(new_node);
2661 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2663 if(ia32_cg_config.use_ftst && is_Const_0(right)) {
2664 new_node = new_rd_ia32_vFtstFnstsw(dbgi, irg, new_block, new_left,
2667 new_right = be_transform_node(right);
2668 new_node = new_rd_ia32_vFucomFnstsw(dbgi, irg, new_block, new_left,
2672 set_ia32_commutative(new_node);
2674 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2676 new_node = new_rd_ia32_Sahf(dbgi, irg, new_block, new_node);
2677 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2683 static ir_node *create_Ucomi(ir_node *node)
2685 ir_graph *irg = current_ir_graph;
2686 dbg_info *dbgi = get_irn_dbg_info(node);
2687 ir_node *src_block = get_nodes_block(node);
2688 ir_node *new_block = be_transform_node(src_block);
2689 ir_node *left = get_Cmp_left(node);
2690 ir_node *right = get_Cmp_right(node);
2692 ia32_address_mode_t am;
2693 ia32_address_t *addr = &am.addr;
2695 match_arguments(&am, src_block, left, right, NULL,
2696 match_commutative | match_am);
2698 new_node = new_rd_ia32_Ucomi(dbgi, irg, new_block, addr->base, addr->index,
2699 addr->mem, am.new_op1, am.new_op2,
2701 set_am_attributes(new_node, &am);
2703 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2705 new_node = fix_mem_proj(new_node, &am);
2711 * helper function: checks wether all Cmp projs are Lg or Eq which is needed
2712 * to fold an and into a test node
2714 static int can_fold_test_and(ir_node *node)
2716 const ir_edge_t *edge;
2718 /** we can only have eq and lg projs */
2719 foreach_out_edge(node, edge) {
2720 ir_node *proj = get_edge_src_irn(edge);
2721 pn_Cmp pnc = get_Proj_proj(proj);
2722 if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2729 static ir_node *gen_Cmp(ir_node *node)
2731 ir_graph *irg = current_ir_graph;
2732 dbg_info *dbgi = get_irn_dbg_info(node);
2733 ir_node *block = get_nodes_block(node);
2734 ir_node *new_block = be_transform_node(block);
2735 ir_node *left = get_Cmp_left(node);
2736 ir_node *right = get_Cmp_right(node);
2737 ir_mode *cmp_mode = get_irn_mode(left);
2739 ia32_address_mode_t am;
2740 ia32_address_t *addr = &am.addr;
2743 if(mode_is_float(cmp_mode)) {
2744 if (ia32_cg_config.use_sse2) {
2745 return create_Ucomi(node);
2747 return create_Fucom(node);
2751 assert(mode_needs_gp_reg(cmp_mode));
2753 /* we prefer the Test instruction where possible except cases where
2754 * we can use SourceAM */
2755 cmp_unsigned = !mode_is_signed(cmp_mode);
2756 if (is_Const_0(right)) {
2758 get_irn_n_edges(left) == 1 &&
2759 can_fold_test_and(node)) {
2760 /* Test(and_left, and_right) */
2761 ir_node *and_left = get_And_left(left);
2762 ir_node *and_right = get_And_right(left);
2763 ir_mode *mode = get_irn_mode(and_left);
2765 match_arguments(&am, block, and_left, and_right, NULL,
2767 match_am | match_8bit_am | match_16bit_am |
2768 match_am_and_immediates | match_immediate |
2769 match_8bit | match_16bit);
2770 if (get_mode_size_bits(mode) == 8) {
2771 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2772 addr->index, addr->mem, am.new_op1,
2773 am.new_op2, am.ins_permuted,
2776 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2777 addr->index, addr->mem, am.new_op1,
2778 am.new_op2, am.ins_permuted, cmp_unsigned);
2781 match_arguments(&am, block, NULL, left, NULL,
2782 match_am | match_8bit_am | match_16bit_am |
2783 match_8bit | match_16bit);
2784 if (am.op_type == ia32_AddrModeS) {
2786 ir_node *imm_zero = try_create_Immediate(right, 0);
2787 if (get_mode_size_bits(cmp_mode) == 8) {
2788 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2789 addr->index, addr->mem, am.new_op2,
2790 imm_zero, am.ins_permuted,
2793 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2794 addr->index, addr->mem, am.new_op2,
2795 imm_zero, am.ins_permuted, cmp_unsigned);
2798 /* Test(left, left) */
2799 if (get_mode_size_bits(cmp_mode) == 8) {
2800 new_node = new_rd_ia32_Test8Bit(dbgi, irg, new_block, addr->base,
2801 addr->index, addr->mem, am.new_op2,
2802 am.new_op2, am.ins_permuted,
2805 new_node = new_rd_ia32_Test(dbgi, irg, new_block, addr->base,
2806 addr->index, addr->mem, am.new_op2,
2807 am.new_op2, am.ins_permuted,
2813 /* Cmp(left, right) */
2814 match_arguments(&am, block, left, right, NULL,
2815 match_commutative | match_am | match_8bit_am |
2816 match_16bit_am | match_am_and_immediates |
2817 match_immediate | match_8bit | match_16bit);
2818 if (get_mode_size_bits(cmp_mode) == 8) {
2819 new_node = new_rd_ia32_Cmp8Bit(dbgi, irg, new_block, addr->base,
2820 addr->index, addr->mem, am.new_op1,
2821 am.new_op2, am.ins_permuted,
2824 new_node = new_rd_ia32_Cmp(dbgi, irg, new_block, addr->base,
2825 addr->index, addr->mem, am.new_op1,
2826 am.new_op2, am.ins_permuted, cmp_unsigned);
2829 set_am_attributes(new_node, &am);
2830 assert(cmp_mode != NULL);
2831 set_ia32_ls_mode(new_node, cmp_mode);
2833 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2835 new_node = fix_mem_proj(new_node, &am);
2840 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2843 ir_graph *irg = current_ir_graph;
2844 dbg_info *dbgi = get_irn_dbg_info(node);
2845 ir_node *block = get_nodes_block(node);
2846 ir_node *new_block = be_transform_node(block);
2847 ir_node *val_true = get_Psi_val(node, 0);
2848 ir_node *val_false = get_Psi_default(node);
2850 match_flags_t match_flags;
2851 ia32_address_mode_t am;
2852 ia32_address_t *addr;
2854 assert(ia32_cg_config.use_cmov);
2855 assert(mode_needs_gp_reg(get_irn_mode(val_true)));
2859 match_flags = match_commutative | match_am | match_16bit_am |
2862 match_arguments(&am, block, val_false, val_true, flags, match_flags);
2864 new_node = new_rd_ia32_CMov(dbgi, irg, new_block, addr->base, addr->index,
2865 addr->mem, am.new_op1, am.new_op2, new_flags,
2866 am.ins_permuted, pnc);
2867 set_am_attributes(new_node, &am);
2869 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
2871 new_node = fix_mem_proj(new_node, &am);
2878 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2879 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2882 ir_graph *irg = current_ir_graph;
2883 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2884 ir_node *nomem = new_NoMem();
2885 ir_mode *mode = get_irn_mode(orig_node);
2888 new_node = new_rd_ia32_Set(dbgi, irg, new_block, flags, pnc, ins_permuted);
2889 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2891 /* we might need to conv the result up */
2892 if(get_mode_size_bits(mode) > 8) {
2893 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, noreg, noreg,
2894 nomem, new_node, mode_Bu);
2895 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, orig_node));
2902 * Transforms a Psi node into CMov.
2904 * @return The transformed node.
2906 static ir_node *gen_Psi(ir_node *node)
2908 dbg_info *dbgi = get_irn_dbg_info(node);
2909 ir_node *block = get_nodes_block(node);
2910 ir_node *new_block = be_transform_node(block);
2911 ir_node *psi_true = get_Psi_val(node, 0);
2912 ir_node *psi_default = get_Psi_default(node);
2913 ir_node *cond = get_Psi_cond(node, 0);
2914 ir_node *flags = NULL;
2918 assert(get_Psi_n_conds(node) == 1);
2919 assert(get_irn_mode(cond) == mode_b);
2920 assert(mode_needs_gp_reg(get_irn_mode(node)));
2922 flags = get_flags_node(cond, &pnc);
2924 if(is_Const_1(psi_true) && is_Const_0(psi_default)) {
2925 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 0);
2926 } else if(is_Const_0(psi_true) && is_Const_1(psi_default)) {
2927 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, 1);
2929 new_node = create_CMov(node, cond, flags, pnc);
2936 * Create a conversion from x87 state register to general purpose.
2938 static ir_node *gen_x87_fp_to_gp(ir_node *node) {
2939 ir_node *block = be_transform_node(get_nodes_block(node));
2940 ir_node *op = get_Conv_op(node);
2941 ir_node *new_op = be_transform_node(op);
2942 ia32_code_gen_t *cg = env_cg;
2943 ir_graph *irg = current_ir_graph;
2944 dbg_info *dbgi = get_irn_dbg_info(node);
2945 ir_node *noreg = ia32_new_NoReg_gp(cg);
2946 ir_node *trunc_mode = ia32_new_Fpu_truncate(cg);
2947 ir_mode *mode = get_irn_mode(node);
2948 ir_node *fist, *load;
2951 fist = new_rd_ia32_vfist(dbgi, irg, block, get_irg_frame(irg), noreg,
2952 new_NoMem(), new_op, trunc_mode);
2954 set_irn_pinned(fist, op_pin_state_floats);
2955 set_ia32_use_frame(fist);
2956 set_ia32_op_type(fist, ia32_AddrModeD);
2958 assert(get_mode_size_bits(mode) <= 32);
2959 /* exception we can only store signed 32 bit integers, so for unsigned
2960 we store a 64bit (signed) integer and load the lower bits */
2961 if(get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
2962 set_ia32_ls_mode(fist, mode_Ls);
2964 set_ia32_ls_mode(fist, mode_Is);
2966 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(cg, node));
2969 load = new_rd_ia32_Load(dbgi, irg, block, get_irg_frame(irg), noreg, fist);
2971 set_irn_pinned(load, op_pin_state_floats);
2972 set_ia32_use_frame(load);
2973 set_ia32_op_type(load, ia32_AddrModeS);
2974 set_ia32_ls_mode(load, mode_Is);
2975 if(get_ia32_ls_mode(fist) == mode_Ls) {
2976 ia32_attr_t *attr = get_ia32_attr(load);
2977 attr->data.need_64bit_stackent = 1;
2979 ia32_attr_t *attr = get_ia32_attr(load);
2980 attr->data.need_32bit_stackent = 1;
2982 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(cg, node));
2984 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
2988 * Creates a x87 strict Conv by placing a Sore and a Load
2990 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
2992 ir_node *block = get_nodes_block(node);
2993 ir_graph *irg = current_ir_graph;
2994 dbg_info *dbgi = get_irn_dbg_info(node);
2995 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2996 ir_node *nomem = new_NoMem();
2997 ir_node *frame = get_irg_frame(irg);
2998 ir_node *store, *load;
3001 store = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, nomem, node,
3003 set_ia32_use_frame(store);
3004 set_ia32_op_type(store, ia32_AddrModeD);
3005 SET_IA32_ORIG_NODE(store, ia32_get_old_node_name(env_cg, node));
3007 load = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, store,
3009 set_ia32_use_frame(load);
3010 set_ia32_op_type(load, ia32_AddrModeS);
3011 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
3013 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3018 * Create a conversion from general purpose to x87 register
3020 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode) {
3021 ir_node *src_block = get_nodes_block(node);
3022 ir_node *block = be_transform_node(src_block);
3023 ir_graph *irg = current_ir_graph;
3024 dbg_info *dbgi = get_irn_dbg_info(node);
3025 ir_node *op = get_Conv_op(node);
3026 ir_node *new_op = NULL;
3030 ir_mode *store_mode;
3036 /* fild can use source AM if the operand is a signed 32bit integer */
3037 if (src_mode == mode_Is) {
3038 ia32_address_mode_t am;
3040 match_arguments(&am, src_block, NULL, op, NULL,
3041 match_am | match_try_am);
3042 if (am.op_type == ia32_AddrModeS) {
3043 ia32_address_t *addr = &am.addr;
3045 fild = new_rd_ia32_vfild(dbgi, irg, block, addr->base,
3046 addr->index, addr->mem);
3047 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3050 set_am_attributes(fild, &am);
3051 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
3053 fix_mem_proj(fild, &am);
3058 if(new_op == NULL) {
3059 new_op = be_transform_node(op);
3062 noreg = ia32_new_NoReg_gp(env_cg);
3063 nomem = new_NoMem();
3064 mode = get_irn_mode(op);
3066 /* first convert to 32 bit signed if necessary */
3067 src_bits = get_mode_size_bits(src_mode);
3068 if (src_bits == 8) {
3069 new_op = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, block, noreg, noreg, nomem,
3071 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3073 } else if (src_bits < 32) {
3074 new_op = new_rd_ia32_Conv_I2I(dbgi, irg, block, noreg, noreg, nomem,
3076 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
3080 assert(get_mode_size_bits(mode) == 32);
3083 store = new_rd_ia32_Store(dbgi, irg, block, get_irg_frame(irg), noreg, nomem,
3086 set_ia32_use_frame(store);
3087 set_ia32_op_type(store, ia32_AddrModeD);
3088 set_ia32_ls_mode(store, mode_Iu);
3090 /* exception for 32bit unsigned, do a 64bit spill+load */
3091 if(!mode_is_signed(mode)) {
3094 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3096 ir_node *zero_store = new_rd_ia32_Store(dbgi, irg, block,
3097 get_irg_frame(irg), noreg, nomem,
3100 set_ia32_use_frame(zero_store);
3101 set_ia32_op_type(zero_store, ia32_AddrModeD);
3102 add_ia32_am_offs_int(zero_store, 4);
3103 set_ia32_ls_mode(zero_store, mode_Iu);
3108 store = new_rd_Sync(dbgi, irg, block, 2, in);
3109 store_mode = mode_Ls;
3111 store_mode = mode_Is;
3115 fild = new_rd_ia32_vfild(dbgi, irg, block, get_irg_frame(irg), noreg, store);
3117 set_ia32_use_frame(fild);
3118 set_ia32_op_type(fild, ia32_AddrModeS);
3119 set_ia32_ls_mode(fild, store_mode);
3121 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3127 * Create a conversion from one integer mode into another one
3129 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3130 dbg_info *dbgi, ir_node *block, ir_node *op,
3133 ir_graph *irg = current_ir_graph;
3134 int src_bits = get_mode_size_bits(src_mode);
3135 int tgt_bits = get_mode_size_bits(tgt_mode);
3136 ir_node *new_block = be_transform_node(block);
3138 ir_mode *smaller_mode;
3140 ia32_address_mode_t am;
3141 ia32_address_t *addr = &am.addr;
3144 if (src_bits < tgt_bits) {
3145 smaller_mode = src_mode;
3146 smaller_bits = src_bits;
3148 smaller_mode = tgt_mode;
3149 smaller_bits = tgt_bits;
3152 #ifdef DEBUG_libfirm
3154 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3159 match_arguments(&am, block, NULL, op, NULL,
3160 match_8bit | match_16bit |
3161 match_am | match_8bit_am | match_16bit_am);
3162 if (smaller_bits == 8) {
3163 new_node = new_rd_ia32_Conv_I2I8Bit(dbgi, irg, new_block, addr->base,
3164 addr->index, addr->mem, am.new_op2,
3167 new_node = new_rd_ia32_Conv_I2I(dbgi, irg, new_block, addr->base,
3168 addr->index, addr->mem, am.new_op2,
3171 set_am_attributes(new_node, &am);
3172 /* match_arguments assume that out-mode = in-mode, this isn't true here
3174 set_ia32_ls_mode(new_node, smaller_mode);
3175 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3176 new_node = fix_mem_proj(new_node, &am);
3181 * Transforms a Conv node.
3183 * @return The created ia32 Conv node
3185 static ir_node *gen_Conv(ir_node *node) {
3186 ir_node *block = get_nodes_block(node);
3187 ir_node *new_block = be_transform_node(block);
3188 ir_node *op = get_Conv_op(node);
3189 ir_node *new_op = NULL;
3190 ir_graph *irg = current_ir_graph;
3191 dbg_info *dbgi = get_irn_dbg_info(node);
3192 ir_mode *src_mode = get_irn_mode(op);
3193 ir_mode *tgt_mode = get_irn_mode(node);
3194 int src_bits = get_mode_size_bits(src_mode);
3195 int tgt_bits = get_mode_size_bits(tgt_mode);
3196 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3197 ir_node *nomem = new_rd_NoMem(irg);
3198 ir_node *res = NULL;
3200 if (src_mode == mode_b) {
3201 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3202 /* nothing to do, we already model bools as 0/1 ints */
3203 return be_transform_node(op);
3206 if (src_mode == tgt_mode) {
3207 if (get_Conv_strict(node)) {
3208 if (ia32_cg_config.use_sse2) {
3209 /* when we are in SSE mode, we can kill all strict no-op conversion */
3210 return be_transform_node(op);
3213 /* this should be optimized already, but who knows... */
3214 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3215 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3216 return be_transform_node(op);
3220 if (mode_is_float(src_mode)) {
3221 new_op = be_transform_node(op);
3222 /* we convert from float ... */
3223 if (mode_is_float(tgt_mode)) {
3224 if(src_mode == mode_E && tgt_mode == mode_D
3225 && !get_Conv_strict(node)) {
3226 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3231 if (ia32_cg_config.use_sse2) {
3232 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3233 res = new_rd_ia32_Conv_FP2FP(dbgi, irg, new_block, noreg, noreg,
3235 set_ia32_ls_mode(res, tgt_mode);
3237 if(get_Conv_strict(node)) {
3238 res = gen_x87_strict_conv(tgt_mode, new_op);
3239 SET_IA32_ORIG_NODE(get_Proj_pred(res), ia32_get_old_node_name(env_cg, node));
3242 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3247 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3248 if (ia32_cg_config.use_sse2) {
3249 res = new_rd_ia32_Conv_FP2I(dbgi, irg, new_block, noreg, noreg,
3251 set_ia32_ls_mode(res, src_mode);
3253 return gen_x87_fp_to_gp(node);
3257 /* we convert from int ... */
3258 if (mode_is_float(tgt_mode)) {
3260 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3261 if (ia32_cg_config.use_sse2) {
3262 new_op = be_transform_node(op);
3263 res = new_rd_ia32_Conv_I2FP(dbgi, irg, new_block, noreg, noreg,
3265 set_ia32_ls_mode(res, tgt_mode);
3267 res = gen_x87_gp_to_fp(node, src_mode);
3268 if(get_Conv_strict(node)) {
3269 res = gen_x87_strict_conv(tgt_mode, res);
3270 SET_IA32_ORIG_NODE(get_Proj_pred(res),
3271 ia32_get_old_node_name(env_cg, node));
3275 } else if(tgt_mode == mode_b) {
3276 /* mode_b lowering already took care that we only have 0/1 values */
3277 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3278 src_mode, tgt_mode));
3279 return be_transform_node(op);
3282 if (src_bits == tgt_bits) {
3283 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3284 src_mode, tgt_mode));
3285 return be_transform_node(op);
3288 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3296 static int check_immediate_constraint(long val, char immediate_constraint_type)
3298 switch (immediate_constraint_type) {
3302 return val >= 0 && val <= 32;
3304 return val >= 0 && val <= 63;
3306 return val >= -128 && val <= 127;
3308 return val == 0xff || val == 0xffff;
3310 return val >= 0 && val <= 3;
3312 return val >= 0 && val <= 255;
3314 return val >= 0 && val <= 127;
3318 panic("Invalid immediate constraint found");
3322 static ir_node *try_create_Immediate(ir_node *node,
3323 char immediate_constraint_type)
3326 tarval *offset = NULL;
3327 int offset_sign = 0;
3329 ir_entity *symconst_ent = NULL;
3330 int symconst_sign = 0;
3332 ir_node *cnst = NULL;
3333 ir_node *symconst = NULL;
3336 mode = get_irn_mode(node);
3337 if(!mode_is_int(mode) && !mode_is_reference(mode)) {
3341 if(is_Minus(node)) {
3343 node = get_Minus_op(node);
3346 if(is_Const(node)) {
3349 offset_sign = minus;
3350 } else if(is_SymConst(node)) {
3353 symconst_sign = minus;
3354 } else if(is_Add(node)) {
3355 ir_node *left = get_Add_left(node);
3356 ir_node *right = get_Add_right(node);
3357 if(is_Const(left) && is_SymConst(right)) {
3360 symconst_sign = minus;
3361 offset_sign = minus;
3362 } else if(is_SymConst(left) && is_Const(right)) {
3365 symconst_sign = minus;
3366 offset_sign = minus;
3368 } else if(is_Sub(node)) {
3369 ir_node *left = get_Sub_left(node);
3370 ir_node *right = get_Sub_right(node);
3371 if(is_Const(left) && is_SymConst(right)) {
3374 symconst_sign = !minus;
3375 offset_sign = minus;
3376 } else if(is_SymConst(left) && is_Const(right)) {
3379 symconst_sign = minus;
3380 offset_sign = !minus;
3387 offset = get_Const_tarval(cnst);
3388 if(tarval_is_long(offset)) {
3389 val = get_tarval_long(offset);
3391 ir_fprintf(stderr, "Optimisation Warning: tarval from %+F is not a "
3396 if(!check_immediate_constraint(val, immediate_constraint_type))
3399 if(symconst != NULL) {
3400 if(immediate_constraint_type != 0) {
3401 /* we need full 32bits for symconsts */
3405 /* unfortunately the assembler/linker doesn't support -symconst */
3409 if(get_SymConst_kind(symconst) != symconst_addr_ent)
3411 symconst_ent = get_SymConst_entity(symconst);
3413 if(cnst == NULL && symconst == NULL)
3416 if(offset_sign && offset != NULL) {
3417 offset = tarval_neg(offset);
3420 new_node = create_Immediate(symconst_ent, symconst_sign, val);
3425 static ir_node *create_immediate_or_transform(ir_node *node,
3426 char immediate_constraint_type)
3428 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3429 if (new_node == NULL) {
3430 new_node = be_transform_node(node);
3435 static const arch_register_req_t no_register_req = {
3436 arch_register_req_type_none,
3437 NULL, /* regclass */
3438 NULL, /* limit bitset */
3440 0 /* different pos */
3444 * An assembler constraint.
3446 typedef struct constraint_t constraint_t;
3447 struct constraint_t {
3450 const arch_register_req_t **out_reqs;
3452 const arch_register_req_t *req;
3453 unsigned immediate_possible;
3454 char immediate_type;
3457 static void parse_asm_constraint(int pos, constraint_t *constraint, const char *c)
3459 int immediate_possible = 0;
3460 char immediate_type = 0;
3461 unsigned limited = 0;
3462 const arch_register_class_t *cls = NULL;
3463 ir_graph *irg = current_ir_graph;
3464 struct obstack *obst = get_irg_obstack(irg);
3465 arch_register_req_t *req;
3466 unsigned *limited_ptr = NULL;
3470 /* TODO: replace all the asserts with nice error messages */
3473 /* a memory constraint: no need to do anything in backend about it
3474 * (the dependencies are already respected by the memory edge of
3476 constraint->req = &no_register_req;
3488 assert(cls == NULL ||
3489 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3490 cls = &ia32_reg_classes[CLASS_ia32_gp];
3491 limited |= 1 << REG_EAX;
3494 assert(cls == NULL ||
3495 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3496 cls = &ia32_reg_classes[CLASS_ia32_gp];
3497 limited |= 1 << REG_EBX;
3500 assert(cls == NULL ||
3501 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3502 cls = &ia32_reg_classes[CLASS_ia32_gp];
3503 limited |= 1 << REG_ECX;
3506 assert(cls == NULL ||
3507 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3508 cls = &ia32_reg_classes[CLASS_ia32_gp];
3509 limited |= 1 << REG_EDX;
3512 assert(cls == NULL ||
3513 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3514 cls = &ia32_reg_classes[CLASS_ia32_gp];
3515 limited |= 1 << REG_EDI;
3518 assert(cls == NULL ||
3519 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3520 cls = &ia32_reg_classes[CLASS_ia32_gp];
3521 limited |= 1 << REG_ESI;
3524 case 'q': /* q means lower part of the regs only, this makes no
3525 * difference to Q for us (we only assigne whole registers) */
3526 assert(cls == NULL ||
3527 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3528 cls = &ia32_reg_classes[CLASS_ia32_gp];
3529 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3533 assert(cls == NULL ||
3534 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3535 cls = &ia32_reg_classes[CLASS_ia32_gp];
3536 limited |= 1 << REG_EAX | 1 << REG_EDX;
3539 assert(cls == NULL ||
3540 (cls == &ia32_reg_classes[CLASS_ia32_gp] && limited != 0));
3541 cls = &ia32_reg_classes[CLASS_ia32_gp];
3542 limited |= 1 << REG_EAX | 1 << REG_EBX | 1 << REG_ECX |
3543 1 << REG_EDX | 1 << REG_ESI | 1 << REG_EDI |
3550 assert(cls == NULL);
3551 cls = &ia32_reg_classes[CLASS_ia32_gp];
3557 /* TODO: mark values so the x87 simulator knows about t and u */
3558 assert(cls == NULL);
3559 cls = &ia32_reg_classes[CLASS_ia32_vfp];
3564 assert(cls == NULL);
3565 /* TODO: check that sse2 is supported */
3566 cls = &ia32_reg_classes[CLASS_ia32_xmm];
3576 assert(!immediate_possible);
3577 immediate_possible = 1;
3578 immediate_type = *c;
3582 assert(!immediate_possible);
3583 immediate_possible = 1;
3587 assert(!immediate_possible && cls == NULL);
3588 immediate_possible = 1;
3589 cls = &ia32_reg_classes[CLASS_ia32_gp];
3602 assert(constraint->is_in && "can only specify same constraint "
3605 sscanf(c, "%d%n", &same_as, &p);
3613 /* memory constraint no need to do anything in backend about it
3614 * (the dependencies are already respected by the memory edge of
3616 constraint->req = &no_register_req;
3619 case 'E': /* no float consts yet */
3620 case 'F': /* no float consts yet */
3621 case 's': /* makes no sense on x86 */
3622 case 'X': /* we can't support that in firm */
3625 case '<': /* no autodecrement on x86 */
3626 case '>': /* no autoincrement on x86 */
3627 case 'C': /* sse constant not supported yet */
3628 case 'G': /* 80387 constant not supported yet */
3629 case 'y': /* we don't support mmx registers yet */
3630 case 'Z': /* not available in 32 bit mode */
3631 case 'e': /* not available in 32 bit mode */
3632 panic("unsupported asm constraint '%c' found in (%+F)",
3633 *c, current_ir_graph);
3636 panic("unknown asm constraint '%c' found in (%+F)", *c,
3644 const arch_register_req_t *other_constr;
3646 assert(cls == NULL && "same as and register constraint not supported");
3647 assert(!immediate_possible && "same as and immediate constraint not "
3649 assert(same_as < constraint->n_outs && "wrong constraint number in "
3650 "same_as constraint");
3652 other_constr = constraint->out_reqs[same_as];
3654 req = obstack_alloc(obst, sizeof(req[0]));
3655 req->cls = other_constr->cls;
3656 req->type = arch_register_req_type_should_be_same;
3657 req->limited = NULL;
3658 req->other_same = 1U << pos;
3659 req->other_different = 0;
3661 /* switch constraints. This is because in firm we have same_as
3662 * constraints on the output constraints while in the gcc asm syntax
3663 * they are specified on the input constraints */
3664 constraint->req = other_constr;
3665 constraint->out_reqs[same_as] = req;
3666 constraint->immediate_possible = 0;
3670 if(immediate_possible && cls == NULL) {
3671 cls = &ia32_reg_classes[CLASS_ia32_gp];
3673 assert(!immediate_possible || cls == &ia32_reg_classes[CLASS_ia32_gp]);
3674 assert(cls != NULL);
3676 if(immediate_possible) {
3677 assert(constraint->is_in
3678 && "immediate make no sense for output constraints");
3680 /* todo: check types (no float input on 'r' constrained in and such... */
3683 req = obstack_alloc(obst, sizeof(req[0]) + sizeof(unsigned));
3684 limited_ptr = (unsigned*) (req+1);
3686 req = obstack_alloc(obst, sizeof(req[0]));
3688 memset(req, 0, sizeof(req[0]));
3691 req->type = arch_register_req_type_limited;
3692 *limited_ptr = limited;
3693 req->limited = limited_ptr;
3695 req->type = arch_register_req_type_normal;
3699 constraint->req = req;
3700 constraint->immediate_possible = immediate_possible;
3701 constraint->immediate_type = immediate_type;
3704 static void parse_clobber(ir_node *node, int pos, constraint_t *constraint,
3705 const char *clobber)
3707 ir_graph *irg = get_irn_irg(node);
3708 struct obstack *obst = get_irg_obstack(irg);
3709 const arch_register_t *reg = NULL;
3712 arch_register_req_t *req;
3713 const arch_register_class_t *cls;
3718 /* TODO: construct a hashmap instead of doing linear search for clobber
3720 for(c = 0; c < N_CLASSES; ++c) {
3721 cls = & ia32_reg_classes[c];
3722 for(r = 0; r < cls->n_regs; ++r) {
3723 const arch_register_t *temp_reg = arch_register_for_index(cls, r);
3724 if(strcmp(temp_reg->name, clobber) == 0
3725 || (c == CLASS_ia32_gp && strcmp(temp_reg->name+1, clobber) == 0)) {
3734 panic("Register '%s' mentioned in asm clobber is unknown\n", clobber);
3738 assert(reg->index < 32);
3740 limited = obstack_alloc(obst, sizeof(limited[0]));
3741 *limited = 1 << reg->index;
3743 req = obstack_alloc(obst, sizeof(req[0]));
3744 memset(req, 0, sizeof(req[0]));
3745 req->type = arch_register_req_type_limited;
3747 req->limited = limited;
3749 constraint->req = req;
3750 constraint->immediate_possible = 0;
3751 constraint->immediate_type = 0;
3754 static int is_memory_op(const ir_asm_constraint *constraint)
3756 ident *id = constraint->constraint;
3757 const char *str = get_id_str(id);
3760 for(c = str; *c != '\0'; ++c) {
3769 * generates code for a ASM node
3771 static ir_node *gen_ASM(ir_node *node)
3774 ir_graph *irg = current_ir_graph;
3775 ir_node *block = get_nodes_block(node);
3776 ir_node *new_block = be_transform_node(block);
3777 dbg_info *dbgi = get_irn_dbg_info(node);
3781 int n_out_constraints;
3783 const arch_register_req_t **out_reg_reqs;
3784 const arch_register_req_t **in_reg_reqs;
3785 ia32_asm_reg_t *register_map;
3786 unsigned reg_map_size = 0;
3787 struct obstack *obst;
3788 const ir_asm_constraint *in_constraints;
3789 const ir_asm_constraint *out_constraints;
3791 constraint_t parsed_constraint;
3793 arity = get_irn_arity(node);
3794 in = alloca(arity * sizeof(in[0]));
3795 memset(in, 0, arity * sizeof(in[0]));
3797 n_out_constraints = get_ASM_n_output_constraints(node);
3798 n_clobbers = get_ASM_n_clobbers(node);
3799 out_arity = n_out_constraints + n_clobbers;
3800 /* hack to keep space for mem proj */
3804 in_constraints = get_ASM_input_constraints(node);
3805 out_constraints = get_ASM_output_constraints(node);
3806 clobbers = get_ASM_clobbers(node);
3808 /* construct output constraints */
3809 obst = get_irg_obstack(irg);
3810 out_reg_reqs = obstack_alloc(obst, out_arity * sizeof(out_reg_reqs[0]));
3811 parsed_constraint.out_reqs = out_reg_reqs;
3812 parsed_constraint.n_outs = n_out_constraints;
3813 parsed_constraint.is_in = 0;
3815 for(i = 0; i < out_arity; ++i) {
3818 if(i < n_out_constraints) {
3819 const ir_asm_constraint *constraint = &out_constraints[i];
3820 c = get_id_str(constraint->constraint);
3821 parse_asm_constraint(i, &parsed_constraint, c);
3823 if(constraint->pos > reg_map_size)
3824 reg_map_size = constraint->pos;
3826 out_reg_reqs[i] = parsed_constraint.req;
3827 } else if(i < out_arity - 1) {
3828 ident *glob_id = clobbers [i - n_out_constraints];
3829 assert(glob_id != NULL);
3830 c = get_id_str(glob_id);
3831 parse_clobber(node, i, &parsed_constraint, c);
3833 out_reg_reqs[i+1] = parsed_constraint.req;
3837 out_reg_reqs[n_out_constraints] = &no_register_req;
3839 /* construct input constraints */
3840 in_reg_reqs = obstack_alloc(obst, arity * sizeof(in_reg_reqs[0]));
3841 parsed_constraint.is_in = 1;
3842 for(i = 0; i < arity; ++i) {
3843 const ir_asm_constraint *constraint = &in_constraints[i];
3844 ident *constr_id = constraint->constraint;
3845 const char *c = get_id_str(constr_id);
3847 parse_asm_constraint(i, &parsed_constraint, c);
3848 in_reg_reqs[i] = parsed_constraint.req;
3850 if(constraint->pos > reg_map_size)
3851 reg_map_size = constraint->pos;
3853 if(parsed_constraint.immediate_possible) {
3854 ir_node *pred = get_irn_n(node, i);
3855 char imm_type = parsed_constraint.immediate_type;
3856 ir_node *immediate = try_create_Immediate(pred, imm_type);
3858 if(immediate != NULL) {
3865 register_map = NEW_ARR_D(ia32_asm_reg_t, obst, reg_map_size);
3866 memset(register_map, 0, reg_map_size * sizeof(register_map[0]));
3868 for(i = 0; i < n_out_constraints; ++i) {
3869 const ir_asm_constraint *constraint = &out_constraints[i];
3870 unsigned pos = constraint->pos;
3872 assert(pos < reg_map_size);
3873 register_map[pos].use_input = 0;
3874 register_map[pos].valid = 1;
3875 register_map[pos].memory = is_memory_op(constraint);
3876 register_map[pos].inout_pos = i;
3877 register_map[pos].mode = constraint->mode;
3880 /* transform inputs */
3881 for(i = 0; i < arity; ++i) {
3882 const ir_asm_constraint *constraint = &in_constraints[i];
3883 unsigned pos = constraint->pos;
3884 ir_node *pred = get_irn_n(node, i);
3885 ir_node *transformed;
3887 assert(pos < reg_map_size);
3888 register_map[pos].use_input = 1;
3889 register_map[pos].valid = 1;
3890 register_map[pos].memory = is_memory_op(constraint);
3891 register_map[pos].inout_pos = i;
3892 register_map[pos].mode = constraint->mode;
3897 transformed = be_transform_node(pred);
3898 in[i] = transformed;
3901 new_node = new_rd_ia32_Asm(dbgi, irg, new_block, arity, in, out_arity,
3902 get_ASM_text(node), register_map);
3904 set_ia32_out_req_all(new_node, out_reg_reqs);
3905 set_ia32_in_req_all(new_node, in_reg_reqs);
3907 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3913 * Transforms a FrameAddr into an ia32 Add.
3915 static ir_node *gen_be_FrameAddr(ir_node *node) {
3916 ir_node *block = be_transform_node(get_nodes_block(node));
3917 ir_node *op = be_get_FrameAddr_frame(node);
3918 ir_node *new_op = be_transform_node(op);
3919 ir_graph *irg = current_ir_graph;
3920 dbg_info *dbgi = get_irn_dbg_info(node);
3921 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3924 new_node = new_rd_ia32_Lea(dbgi, irg, block, new_op, noreg);
3925 set_ia32_frame_ent(new_node, arch_get_frame_entity(env_cg->arch_env, node));
3926 set_ia32_use_frame(new_node);
3928 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
3934 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3936 static ir_node *gen_be_Return(ir_node *node) {
3937 ir_graph *irg = current_ir_graph;
3938 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3939 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3940 ir_entity *ent = get_irg_entity(irg);
3941 ir_type *tp = get_entity_type(ent);
3946 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3947 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3950 int pn_ret_val, pn_ret_mem, arity, i;
3952 assert(ret_val != NULL);
3953 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3954 return be_duplicate_node(node);
3957 res_type = get_method_res_type(tp, 0);
3959 if (! is_Primitive_type(res_type)) {
3960 return be_duplicate_node(node);
3963 mode = get_type_mode(res_type);
3964 if (! mode_is_float(mode)) {
3965 return be_duplicate_node(node);
3968 assert(get_method_n_ress(tp) == 1);
3970 pn_ret_val = get_Proj_proj(ret_val);
3971 pn_ret_mem = get_Proj_proj(ret_mem);
3973 /* get the Barrier */
3974 barrier = get_Proj_pred(ret_val);
3976 /* get result input of the Barrier */
3977 ret_val = get_irn_n(barrier, pn_ret_val);
3978 new_ret_val = be_transform_node(ret_val);
3980 /* get memory input of the Barrier */
3981 ret_mem = get_irn_n(barrier, pn_ret_mem);
3982 new_ret_mem = be_transform_node(ret_mem);
3984 frame = get_irg_frame(irg);
3986 dbgi = get_irn_dbg_info(barrier);
3987 block = be_transform_node(get_nodes_block(barrier));
3989 noreg = ia32_new_NoReg_gp(env_cg);
3991 /* store xmm0 onto stack */
3992 sse_store = new_rd_ia32_xStoreSimple(dbgi, irg, block, frame, noreg,
3993 new_ret_mem, new_ret_val);
3994 set_ia32_ls_mode(sse_store, mode);
3995 set_ia32_op_type(sse_store, ia32_AddrModeD);
3996 set_ia32_use_frame(sse_store);
3998 /* load into x87 register */
3999 fld = new_rd_ia32_vfld(dbgi, irg, block, frame, noreg, sse_store, mode);
4000 set_ia32_op_type(fld, ia32_AddrModeS);
4001 set_ia32_use_frame(fld);
4003 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
4004 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
4006 /* create a new barrier */
4007 arity = get_irn_arity(barrier);
4008 in = alloca(arity * sizeof(in[0]));
4009 for (i = 0; i < arity; ++i) {
4012 if (i == pn_ret_val) {
4014 } else if (i == pn_ret_mem) {
4017 ir_node *in = get_irn_n(barrier, i);
4018 new_in = be_transform_node(in);
4023 new_barrier = new_ir_node(dbgi, irg, block,
4024 get_irn_op(barrier), get_irn_mode(barrier),
4026 copy_node_attr(barrier, new_barrier);
4027 be_duplicate_deps(barrier, new_barrier);
4028 be_set_transformed_node(barrier, new_barrier);
4029 mark_irn_visited(barrier);
4031 /* transform normally */
4032 return be_duplicate_node(node);
4036 * Transform a be_AddSP into an ia32_SubSP.
4038 static ir_node *gen_be_AddSP(ir_node *node)
4040 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
4041 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
4043 return gen_binop(node, sp, sz, new_rd_ia32_SubSP, match_am);
4047 * Transform a be_SubSP into an ia32_AddSP
4049 static ir_node *gen_be_SubSP(ir_node *node)
4051 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
4052 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
4054 return gen_binop(node, sp, sz, new_rd_ia32_AddSP, match_am);
4058 * This function just sets the register for the Unknown node
4059 * as this is not done during register allocation because Unknown
4060 * is an "ignore" node.
4062 static ir_node *gen_Unknown(ir_node *node) {
4063 ir_mode *mode = get_irn_mode(node);
4065 if (mode_is_float(mode)) {
4066 if (ia32_cg_config.use_sse2) {
4067 return ia32_new_Unknown_xmm(env_cg);
4069 /* Unknown nodes are buggy in x87 simulator, use zero for now... */
4070 ir_graph *irg = current_ir_graph;
4071 dbg_info *dbgi = get_irn_dbg_info(node);
4072 ir_node *block = get_irg_start_block(irg);
4073 ir_node *ret = new_rd_ia32_vfldz(dbgi, irg, block);
4075 /* Const Nodes before the initial IncSP are a bad idea, because
4076 * they could be spilled and we have no SP ready at that point yet.
4077 * So add a dependency to the initial frame pointer calculation to
4078 * avoid that situation.
4080 add_irn_dep(ret, get_irg_frame(irg));
4083 } else if (mode_needs_gp_reg(mode)) {
4084 return ia32_new_Unknown_gp(env_cg);
4086 panic("unsupported Unknown-Mode");
4092 * Change some phi modes
4094 static ir_node *gen_Phi(ir_node *node) {
4095 ir_node *block = be_transform_node(get_nodes_block(node));
4096 ir_graph *irg = current_ir_graph;
4097 dbg_info *dbgi = get_irn_dbg_info(node);
4098 ir_mode *mode = get_irn_mode(node);
4101 if(mode_needs_gp_reg(mode)) {
4102 /* we shouldn't have any 64bit stuff around anymore */
4103 assert(get_mode_size_bits(mode) <= 32);
4104 /* all integer operations are on 32bit registers now */
4106 } else if(mode_is_float(mode)) {
4107 if (ia32_cg_config.use_sse2) {
4114 /* phi nodes allow loops, so we use the old arguments for now
4115 * and fix this later */
4116 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
4117 get_irn_in(node) + 1);
4118 copy_node_attr(node, phi);
4119 be_duplicate_deps(node, phi);
4121 be_set_transformed_node(node, phi);
4122 be_enqueue_preds(node);
4130 static ir_node *gen_IJmp(ir_node *node)
4132 ir_node *block = get_nodes_block(node);
4133 ir_node *new_block = be_transform_node(block);
4134 ir_graph *irg = current_ir_graph;
4135 dbg_info *dbgi = get_irn_dbg_info(node);
4136 ir_node *op = get_IJmp_target(node);
4138 ia32_address_mode_t am;
4139 ia32_address_t *addr = &am.addr;
4141 assert(get_irn_mode(op) == mode_P);
4143 match_arguments(&am, block, NULL, op, NULL,
4144 match_am | match_8bit_am | match_16bit_am |
4145 match_immediate | match_8bit | match_16bit);
4147 new_node = new_rd_ia32_IJmp(dbgi, irg, new_block, addr->base, addr->index,
4148 addr->mem, am.new_op2);
4149 set_am_attributes(new_node, &am);
4150 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4152 new_node = fix_mem_proj(new_node, &am);
4157 typedef ir_node *construct_load_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4160 typedef ir_node *construct_store_func(dbg_info *db, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index, \
4161 ir_node *val, ir_node *mem);
4164 * Transforms a lowered Load into a "real" one.
4166 static ir_node *gen_lowered_Load(ir_node *node, construct_load_func func)
4168 ir_node *block = be_transform_node(get_nodes_block(node));
4169 ir_node *ptr = get_irn_n(node, 0);
4170 ir_node *new_ptr = be_transform_node(ptr);
4171 ir_node *mem = get_irn_n(node, 1);
4172 ir_node *new_mem = be_transform_node(mem);
4173 ir_graph *irg = current_ir_graph;
4174 dbg_info *dbgi = get_irn_dbg_info(node);
4175 ir_mode *mode = get_ia32_ls_mode(node);
4176 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4179 new_op = func(dbgi, irg, block, new_ptr, noreg, new_mem);
4181 set_ia32_op_type(new_op, ia32_AddrModeS);
4182 set_ia32_am_offs_int(new_op, get_ia32_am_offs_int(node));
4183 set_ia32_am_scale(new_op, get_ia32_am_scale(node));
4184 set_ia32_am_sc(new_op, get_ia32_am_sc(node));
4185 if (is_ia32_am_sc_sign(node))
4186 set_ia32_am_sc_sign(new_op);
4187 set_ia32_ls_mode(new_op, mode);
4188 if (is_ia32_use_frame(node)) {
4189 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4190 set_ia32_use_frame(new_op);
4193 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4199 * Transforms a lowered Store into a "real" one.
4201 static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
4203 ir_node *block = be_transform_node(get_nodes_block(node));
4204 ir_node *ptr = get_irn_n(node, 0);
4205 ir_node *new_ptr = be_transform_node(ptr);
4206 ir_node *val = get_irn_n(node, 1);
4207 ir_node *new_val = be_transform_node(val);
4208 ir_node *mem = get_irn_n(node, 2);
4209 ir_node *new_mem = be_transform_node(mem);
4210 ir_graph *irg = current_ir_graph;
4211 dbg_info *dbgi = get_irn_dbg_info(node);
4212 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4213 ir_mode *mode = get_ia32_ls_mode(node);
4217 new_op = func(dbgi, irg, block, new_ptr, noreg, new_val, new_mem);
4219 am_offs = get_ia32_am_offs_int(node);
4220 add_ia32_am_offs_int(new_op, am_offs);
4222 set_ia32_op_type(new_op, ia32_AddrModeD);
4223 set_ia32_ls_mode(new_op, mode);
4224 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4225 set_ia32_use_frame(new_op);
4227 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4232 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
4234 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
4235 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
4237 return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
4238 match_immediate | match_mode_neutral);
4241 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
4243 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
4244 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
4245 return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
4249 static ir_node *gen_ia32_l_SarDep(ir_node *node)
4251 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
4252 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
4253 return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
4257 static ir_node *gen_ia32_l_Add(ir_node *node) {
4258 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
4259 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
4260 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Add,
4261 match_commutative | match_am | match_immediate |
4262 match_mode_neutral);
4264 if(is_Proj(lowered)) {
4265 lowered = get_Proj_pred(lowered);
4267 assert(is_ia32_Add(lowered));
4268 set_irn_mode(lowered, mode_T);
4274 static ir_node *gen_ia32_l_Adc(ir_node *node)
4276 return gen_binop_flags(node, new_rd_ia32_Adc,
4277 match_commutative | match_am | match_immediate |
4278 match_mode_neutral);
4282 * Transforms an ia32_l_vfild into a "real" ia32_vfild node
4284 * @param node The node to transform
4285 * @return the created ia32 vfild node
4287 static ir_node *gen_ia32_l_vfild(ir_node *node) {
4288 return gen_lowered_Load(node, new_rd_ia32_vfild);
4292 * Transforms an ia32_l_Load into a "real" ia32_Load node
4294 * @param node The node to transform
4295 * @return the created ia32 Load node
4297 static ir_node *gen_ia32_l_Load(ir_node *node) {
4298 return gen_lowered_Load(node, new_rd_ia32_Load);
4302 * Transforms an ia32_l_Store into a "real" ia32_Store node
4304 * @param node The node to transform
4305 * @return the created ia32 Store node
4307 static ir_node *gen_ia32_l_Store(ir_node *node) {
4308 return gen_lowered_Store(node, new_rd_ia32_Store);
4312 * Transforms a l_vfist into a "real" vfist node.
4314 * @param node The node to transform
4315 * @return the created ia32 vfist node
4317 static ir_node *gen_ia32_l_vfist(ir_node *node) {
4318 ir_node *block = be_transform_node(get_nodes_block(node));
4319 ir_node *ptr = get_irn_n(node, 0);
4320 ir_node *new_ptr = be_transform_node(ptr);
4321 ir_node *val = get_irn_n(node, 1);
4322 ir_node *new_val = be_transform_node(val);
4323 ir_node *mem = get_irn_n(node, 2);
4324 ir_node *new_mem = be_transform_node(mem);
4325 ir_graph *irg = current_ir_graph;
4326 dbg_info *dbgi = get_irn_dbg_info(node);
4327 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4328 ir_mode *mode = get_ia32_ls_mode(node);
4329 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4333 new_op = new_rd_ia32_vfist(dbgi, irg, block, new_ptr, noreg, new_mem,
4334 new_val, trunc_mode);
4336 am_offs = get_ia32_am_offs_int(node);
4337 add_ia32_am_offs_int(new_op, am_offs);
4339 set_ia32_op_type(new_op, ia32_AddrModeD);
4340 set_ia32_ls_mode(new_op, mode);
4341 set_ia32_frame_ent(new_op, get_ia32_frame_ent(node));
4342 set_ia32_use_frame(new_op);
4344 SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env_cg, node));
4350 * Transforms a l_MulS into a "real" MulS node.
4352 * @return the created ia32 Mul node
4354 static ir_node *gen_ia32_l_Mul(ir_node *node) {
4355 ir_node *left = get_binop_left(node);
4356 ir_node *right = get_binop_right(node);
4358 return gen_binop(node, left, right, new_rd_ia32_Mul,
4359 match_commutative | match_am | match_mode_neutral);
4363 * Transforms a l_IMulS into a "real" IMul1OPS node.
4365 * @return the created ia32 IMul1OP node
4367 static ir_node *gen_ia32_l_IMul(ir_node *node) {
4368 ir_node *left = get_binop_left(node);
4369 ir_node *right = get_binop_right(node);
4371 return gen_binop(node, left, right, new_rd_ia32_IMul1OP,
4372 match_commutative | match_am | match_mode_neutral);
4375 static ir_node *gen_ia32_l_Sub(ir_node *node) {
4376 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
4377 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
4378 ir_node *lowered = gen_binop(node, left, right, new_rd_ia32_Sub,
4379 match_am | match_immediate | match_mode_neutral);
4381 if(is_Proj(lowered)) {
4382 lowered = get_Proj_pred(lowered);
4384 assert(is_ia32_Sub(lowered));
4385 set_irn_mode(lowered, mode_T);
4391 static ir_node *gen_ia32_l_Sbb(ir_node *node) {
4392 return gen_binop_flags(node, new_rd_ia32_Sbb,
4393 match_am | match_immediate | match_mode_neutral);
4397 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
4398 * op1 - target to be shifted
4399 * op2 - contains bits to be shifted into target
4401 * Only op3 can be an immediate.
4403 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
4404 ir_node *low, ir_node *count)
4406 ir_node *block = get_nodes_block(node);
4407 ir_node *new_block = be_transform_node(block);
4408 ir_graph *irg = current_ir_graph;
4409 dbg_info *dbgi = get_irn_dbg_info(node);
4410 ir_node *new_high = be_transform_node(high);
4411 ir_node *new_low = be_transform_node(low);
4415 /* the shift amount can be any mode that is bigger than 5 bits, since all
4416 * other bits are ignored anyway */
4417 while (is_Conv(count) && get_irn_n_edges(count) == 1) {
4418 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
4419 count = get_Conv_op(count);
4421 new_count = create_immediate_or_transform(count, 0);
4423 if (is_ia32_l_ShlD(node)) {
4424 new_node = new_rd_ia32_ShlD(dbgi, irg, new_block, new_high, new_low,
4427 new_node = new_rd_ia32_ShrD(dbgi, irg, new_block, new_high, new_low,
4430 SET_IA32_ORIG_NODE(new_node, ia32_get_old_node_name(env_cg, node));
4435 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4437 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4438 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4439 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4440 return gen_lowered_64bit_shifts(node, high, low, count);
4443 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4445 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4446 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4447 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4448 return gen_lowered_64bit_shifts(node, high, low, count);
4451 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node) {
4452 ir_node *src_block = get_nodes_block(node);
4453 ir_node *block = be_transform_node(src_block);
4454 ir_graph *irg = current_ir_graph;
4455 dbg_info *dbgi = get_irn_dbg_info(node);
4456 ir_node *frame = get_irg_frame(irg);
4457 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4458 ir_node *nomem = new_NoMem();
4459 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4460 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4461 ir_node *new_val_low = be_transform_node(val_low);
4462 ir_node *new_val_high = be_transform_node(val_high);
4467 ir_node *store_high;
4469 if(!mode_is_signed(get_irn_mode(val_high))) {
4470 panic("unsigned long long -> float not supported yet (%+F)", node);
4474 store_low = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4476 store_high = new_rd_ia32_Store(dbgi, irg, block, frame, noreg, nomem,
4478 SET_IA32_ORIG_NODE(store_low, ia32_get_old_node_name(env_cg, node));
4479 SET_IA32_ORIG_NODE(store_high, ia32_get_old_node_name(env_cg, node));
4481 set_ia32_use_frame(store_low);
4482 set_ia32_use_frame(store_high);
4483 set_ia32_op_type(store_low, ia32_AddrModeD);
4484 set_ia32_op_type(store_high, ia32_AddrModeD);
4485 set_ia32_ls_mode(store_low, mode_Iu);
4486 set_ia32_ls_mode(store_high, mode_Is);
4487 add_ia32_am_offs_int(store_high, 4);
4491 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4494 fild = new_rd_ia32_vfild(dbgi, irg, block, frame, noreg, sync);
4496 set_ia32_use_frame(fild);
4497 set_ia32_op_type(fild, ia32_AddrModeS);
4498 set_ia32_ls_mode(fild, mode_Ls);
4500 SET_IA32_ORIG_NODE(fild, ia32_get_old_node_name(env_cg, node));
4502 return new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4505 static ir_node *gen_ia32_l_FloattoLL(ir_node *node) {
4506 ir_node *src_block = get_nodes_block(node);
4507 ir_node *block = be_transform_node(src_block);
4508 ir_graph *irg = current_ir_graph;
4509 dbg_info *dbgi = get_irn_dbg_info(node);
4510 ir_node *frame = get_irg_frame(irg);
4511 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4512 ir_node *nomem = new_NoMem();
4513 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4514 ir_node *new_val = be_transform_node(val);
4515 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
4520 fist = new_rd_ia32_vfist(dbgi, irg, block, frame, noreg, nomem, new_val,
4522 SET_IA32_ORIG_NODE(fist, ia32_get_old_node_name(env_cg, node));
4523 set_ia32_use_frame(fist);
4524 set_ia32_op_type(fist, ia32_AddrModeD);
4525 set_ia32_ls_mode(fist, mode_Ls);
4531 * the BAD transformer.
4533 static ir_node *bad_transform(ir_node *node) {
4534 panic("No transform function for %+F available.\n", node);
4538 static ir_node *gen_Proj_l_FloattoLL(ir_node *node) {
4539 ir_graph *irg = current_ir_graph;
4540 ir_node *block = be_transform_node(get_nodes_block(node));
4541 ir_node *pred = get_Proj_pred(node);
4542 ir_node *new_pred = be_transform_node(pred);
4543 ir_node *frame = get_irg_frame(irg);
4544 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4545 dbg_info *dbgi = get_irn_dbg_info(node);
4546 long pn = get_Proj_proj(node);
4551 load = new_rd_ia32_Load(dbgi, irg, block, frame, noreg, new_pred);
4552 SET_IA32_ORIG_NODE(load, ia32_get_old_node_name(env_cg, node));
4553 set_ia32_use_frame(load);
4554 set_ia32_op_type(load, ia32_AddrModeS);
4555 set_ia32_ls_mode(load, mode_Iu);
4556 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4557 * 32 bit from it with this particular load */
4558 attr = get_ia32_attr(load);
4559 attr->data.need_64bit_stackent = 1;
4561 if (pn == pn_ia32_l_FloattoLL_res_high) {
4562 add_ia32_am_offs_int(load, 4);
4564 assert(pn == pn_ia32_l_FloattoLL_res_low);
4567 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4573 * Transform the Projs of an AddSP.
4575 static ir_node *gen_Proj_be_AddSP(ir_node *node) {
4576 ir_node *block = be_transform_node(get_nodes_block(node));
4577 ir_node *pred = get_Proj_pred(node);
4578 ir_node *new_pred = be_transform_node(pred);
4579 ir_graph *irg = current_ir_graph;
4580 dbg_info *dbgi = get_irn_dbg_info(node);
4581 long proj = get_Proj_proj(node);
4583 if (proj == pn_be_AddSP_sp) {
4584 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4585 pn_ia32_SubSP_stack);
4586 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4588 } else if(proj == pn_be_AddSP_res) {
4589 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4590 pn_ia32_SubSP_addr);
4591 } else if (proj == pn_be_AddSP_M) {
4592 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4596 return new_rd_Unknown(irg, get_irn_mode(node));
4600 * Transform the Projs of a SubSP.
4602 static ir_node *gen_Proj_be_SubSP(ir_node *node) {
4603 ir_node *block = be_transform_node(get_nodes_block(node));
4604 ir_node *pred = get_Proj_pred(node);
4605 ir_node *new_pred = be_transform_node(pred);
4606 ir_graph *irg = current_ir_graph;
4607 dbg_info *dbgi = get_irn_dbg_info(node);
4608 long proj = get_Proj_proj(node);
4610 if (proj == pn_be_SubSP_sp) {
4611 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4612 pn_ia32_AddSP_stack);
4613 arch_set_irn_register(env_cg->arch_env, res, &ia32_gp_regs[REG_ESP]);
4615 } else if (proj == pn_be_SubSP_M) {
4616 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4620 return new_rd_Unknown(irg, get_irn_mode(node));
4624 * Transform and renumber the Projs from a Load.
4626 static ir_node *gen_Proj_Load(ir_node *node) {
4628 ir_node *block = be_transform_node(get_nodes_block(node));
4629 ir_node *pred = get_Proj_pred(node);
4630 ir_graph *irg = current_ir_graph;
4631 dbg_info *dbgi = get_irn_dbg_info(node);
4632 long proj = get_Proj_proj(node);
4635 /* loads might be part of source address mode matches, so we don't
4636 transform the ProjMs yet (with the exception of loads whose result is
4639 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4642 assert(pn_ia32_Load_M == 1); /* convention: mem-result of Source-AM
4644 /* this is needed, because sometimes we have loops that are only
4645 reachable through the ProjM */
4646 be_enqueue_preds(node);
4647 /* do it in 2 steps, to silence firm verifier */
4648 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4649 set_Proj_proj(res, pn_ia32_Load_M);
4653 /* renumber the proj */
4654 new_pred = be_transform_node(pred);
4655 if (is_ia32_Load(new_pred)) {
4658 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4660 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4661 case pn_Load_X_regular:
4662 return new_rd_Jmp(dbgi, irg, block);
4663 case pn_Load_X_except:
4664 /* This Load might raise an exception. Mark it. */
4665 set_ia32_exc_label(new_pred, 1);
4666 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4670 } else if (is_ia32_Conv_I2I(new_pred) ||
4671 is_ia32_Conv_I2I8Bit(new_pred)) {
4672 set_irn_mode(new_pred, mode_T);
4673 if (proj == pn_Load_res) {
4674 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4675 } else if (proj == pn_Load_M) {
4676 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4678 } else if (is_ia32_xLoad(new_pred)) {
4681 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4683 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4684 case pn_Load_X_regular:
4685 return new_rd_Jmp(dbgi, irg, block);
4686 case pn_Load_X_except:
4687 /* This Load might raise an exception. Mark it. */
4688 set_ia32_exc_label(new_pred, 1);
4689 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4693 } else if (is_ia32_vfld(new_pred)) {
4696 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4698 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4699 case pn_Load_X_regular:
4700 return new_rd_Jmp(dbgi, irg, block);
4701 case pn_Load_X_except:
4702 /* This Load might raise an exception. Mark it. */
4703 set_ia32_exc_label(new_pred, 1);
4704 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4709 /* can happen for ProJMs when source address mode happened for the
4712 /* however it should not be the result proj, as that would mean the
4713 load had multiple users and should not have been used for
4715 if (proj != pn_Load_M) {
4716 panic("internal error: transformed node not a Load");
4718 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4722 return new_rd_Unknown(irg, get_irn_mode(node));
4726 * Transform and renumber the Projs from a DivMod like instruction.
4728 static ir_node *gen_Proj_DivMod(ir_node *node) {
4729 ir_node *block = be_transform_node(get_nodes_block(node));
4730 ir_node *pred = get_Proj_pred(node);
4731 ir_node *new_pred = be_transform_node(pred);
4732 ir_graph *irg = current_ir_graph;
4733 dbg_info *dbgi = get_irn_dbg_info(node);
4734 ir_mode *mode = get_irn_mode(node);
4735 long proj = get_Proj_proj(node);
4737 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4739 switch (get_irn_opcode(pred)) {
4743 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4745 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4746 case pn_Div_X_regular:
4747 return new_rd_Jmp(dbgi, irg, block);
4748 case pn_Div_X_except:
4749 set_ia32_exc_label(new_pred, 1);
4750 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4758 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4760 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4761 case pn_Mod_X_except:
4762 set_ia32_exc_label(new_pred, 1);
4763 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4771 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4772 case pn_DivMod_res_div:
4773 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4774 case pn_DivMod_res_mod:
4775 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4776 case pn_DivMod_X_regular:
4777 return new_rd_Jmp(dbgi, irg, block);
4778 case pn_DivMod_X_except:
4779 set_ia32_exc_label(new_pred, 1);
4780 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4790 return new_rd_Unknown(irg, mode);
4794 * Transform and renumber the Projs from a CopyB.
4796 static ir_node *gen_Proj_CopyB(ir_node *node) {
4797 ir_node *block = be_transform_node(get_nodes_block(node));
4798 ir_node *pred = get_Proj_pred(node);
4799 ir_node *new_pred = be_transform_node(pred);
4800 ir_graph *irg = current_ir_graph;
4801 dbg_info *dbgi = get_irn_dbg_info(node);
4802 ir_mode *mode = get_irn_mode(node);
4803 long proj = get_Proj_proj(node);
4806 case pn_CopyB_M_regular:
4807 if (is_ia32_CopyB_i(new_pred)) {
4808 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4809 } else if (is_ia32_CopyB(new_pred)) {
4810 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4818 return new_rd_Unknown(irg, mode);
4822 * Transform and renumber the Projs from a Quot.
4824 static ir_node *gen_Proj_Quot(ir_node *node) {
4825 ir_node *block = be_transform_node(get_nodes_block(node));
4826 ir_node *pred = get_Proj_pred(node);
4827 ir_node *new_pred = be_transform_node(pred);
4828 ir_graph *irg = current_ir_graph;
4829 dbg_info *dbgi = get_irn_dbg_info(node);
4830 ir_mode *mode = get_irn_mode(node);
4831 long proj = get_Proj_proj(node);
4835 if (is_ia32_xDiv(new_pred)) {
4836 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4837 } else if (is_ia32_vfdiv(new_pred)) {
4838 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4842 if (is_ia32_xDiv(new_pred)) {
4843 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4844 } else if (is_ia32_vfdiv(new_pred)) {
4845 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4848 case pn_Quot_X_regular:
4849 case pn_Quot_X_except:
4855 return new_rd_Unknown(irg, mode);
4859 * Transform the Thread Local Storage Proj.
4861 static ir_node *gen_Proj_tls(ir_node *node) {
4862 ir_node *block = be_transform_node(get_nodes_block(node));
4863 ir_graph *irg = current_ir_graph;
4864 dbg_info *dbgi = NULL;
4865 ir_node *res = new_rd_ia32_LdTls(dbgi, irg, block, mode_Iu);
4870 static ir_node *gen_be_Call(ir_node *node) {
4871 ir_node *res = be_duplicate_node(node);
4872 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4877 static ir_node *gen_be_IncSP(ir_node *node) {
4878 ir_node *res = be_duplicate_node(node);
4879 be_node_add_flags(res, -1, arch_irn_flags_modify_flags);
4885 * Transform the Projs from a be_Call.
4887 static ir_node *gen_Proj_be_Call(ir_node *node) {
4888 ir_node *block = be_transform_node(get_nodes_block(node));
4889 ir_node *call = get_Proj_pred(node);
4890 ir_node *new_call = be_transform_node(call);
4891 ir_graph *irg = current_ir_graph;
4892 dbg_info *dbgi = get_irn_dbg_info(node);
4893 ir_type *method_type = be_Call_get_type(call);
4894 int n_res = get_method_n_ress(method_type);
4895 long proj = get_Proj_proj(node);
4896 ir_mode *mode = get_irn_mode(node);
4898 const arch_register_class_t *cls;
4900 /* The following is kinda tricky: If we're using SSE, then we have to
4901 * move the result value of the call in floating point registers to an
4902 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4903 * after the call, we have to make sure to correctly make the
4904 * MemProj and the result Proj use these 2 nodes
4906 if (proj == pn_be_Call_M_regular) {
4907 // get new node for result, are we doing the sse load/store hack?
4908 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4909 ir_node *call_res_new;
4910 ir_node *call_res_pred = NULL;
4912 if (call_res != NULL) {
4913 call_res_new = be_transform_node(call_res);
4914 call_res_pred = get_Proj_pred(call_res_new);
4917 if (call_res_pred == NULL || be_is_Call(call_res_pred)) {
4918 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4919 pn_be_Call_M_regular);
4921 assert(is_ia32_xLoad(call_res_pred));
4922 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4926 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4927 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4929 ir_node *frame = get_irg_frame(irg);
4930 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4932 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4935 /* in case there is no memory output: create one to serialize the copy
4937 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4938 pn_be_Call_M_regular);
4939 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4940 pn_be_Call_first_res);
4942 /* store st(0) onto stack */
4943 fstp = new_rd_ia32_vfst(dbgi, irg, block, frame, noreg, call_mem,
4945 set_ia32_op_type(fstp, ia32_AddrModeD);
4946 set_ia32_use_frame(fstp);
4948 /* load into SSE register */
4949 sse_load = new_rd_ia32_xLoad(dbgi, irg, block, frame, noreg, fstp,
4951 set_ia32_op_type(sse_load, ia32_AddrModeS);
4952 set_ia32_use_frame(sse_load);
4954 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4960 /* transform call modes */
4961 if (mode_is_data(mode)) {
4962 cls = arch_get_irn_reg_class(env_cg->arch_env, node, -1);
4966 return new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4970 * Transform the Projs from a Cmp.
4972 static ir_node *gen_Proj_Cmp(ir_node *node)
4974 /* this probably means not all mode_b nodes were lowered... */
4975 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4980 * Transform and potentially renumber Proj nodes.
4982 static ir_node *gen_Proj(ir_node *node) {
4983 ir_node *pred = get_Proj_pred(node);
4984 if (is_Store(pred)) {
4985 long proj = get_Proj_proj(node);
4986 if (proj == pn_Store_M) {
4987 return be_transform_node(pred);
4990 return new_r_Bad(current_ir_graph);
4992 } else if (is_Load(pred)) {
4993 return gen_Proj_Load(node);
4994 } else if (is_Div(pred) || is_Mod(pred) || is_DivMod(pred)) {
4995 return gen_Proj_DivMod(node);
4996 } else if (is_CopyB(pred)) {
4997 return gen_Proj_CopyB(node);
4998 } else if (is_Quot(pred)) {
4999 return gen_Proj_Quot(node);
5000 } else if (be_is_SubSP(pred)) {
5001 return gen_Proj_be_SubSP(node);
5002 } else if (be_is_AddSP(pred)) {
5003 return gen_Proj_be_AddSP(node);
5004 } else if (be_is_Call(pred)) {
5005 return gen_Proj_be_Call(node);
5006 } else if (is_Cmp(pred)) {
5007 return gen_Proj_Cmp(node);
5008 } else if (get_irn_op(pred) == op_Start) {
5009 long proj = get_Proj_proj(node);
5010 if (proj == pn_Start_X_initial_exec) {
5011 ir_node *block = get_nodes_block(pred);
5012 dbg_info *dbgi = get_irn_dbg_info(node);
5015 /* we exchange the ProjX with a jump */
5016 block = be_transform_node(block);
5017 jump = new_rd_Jmp(dbgi, current_ir_graph, block);
5020 if (node == be_get_old_anchor(anchor_tls)) {
5021 return gen_Proj_tls(node);
5023 } else if (is_ia32_l_FloattoLL(pred)) {
5024 return gen_Proj_l_FloattoLL(node);
5026 } else if(!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
5030 ir_mode *mode = get_irn_mode(node);
5031 if (mode_needs_gp_reg(mode)) {
5032 ir_node *new_pred = be_transform_node(pred);
5033 ir_node *block = be_transform_node(get_nodes_block(node));
5034 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
5035 mode_Iu, get_Proj_proj(node));
5036 #ifdef DEBUG_libfirm
5037 new_proj->node_nr = node->node_nr;
5043 return be_duplicate_node(node);
5047 * Enters all transform functions into the generic pointer
5049 static void register_transformers(void)
5053 /* first clear the generic function pointer for all ops */
5054 clear_irp_opcodes_generic_func();
5056 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
5057 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
5095 /* transform ops from intrinsic lowering */
5111 GEN(ia32_l_LLtoFloat);
5112 GEN(ia32_l_FloattoLL);
5118 /* we should never see these nodes */
5133 /* handle generic backend nodes */
5142 op_Mulh = get_op_Mulh();
5151 * Pre-transform all unknown and noreg nodes.
5153 static void ia32_pretransform_node(void *arch_cg) {
5154 ia32_code_gen_t *cg = arch_cg;
5156 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
5157 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
5158 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
5159 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
5160 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
5161 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
5166 * Walker, checks if all ia32 nodes producing more than one result have
5167 * its Projs, other wise creates new projs and keep them using a be_Keep node.
5169 static void add_missing_keep_walker(ir_node *node, void *data)
5172 unsigned found_projs = 0;
5173 const ir_edge_t *edge;
5174 ir_mode *mode = get_irn_mode(node);
5179 if(!is_ia32_irn(node))
5182 n_outs = get_ia32_n_res(node);
5185 if(is_ia32_SwitchJmp(node))
5188 assert(n_outs < (int) sizeof(unsigned) * 8);
5189 foreach_out_edge(node, edge) {
5190 ir_node *proj = get_edge_src_irn(edge);
5191 int pn = get_Proj_proj(proj);
5193 assert(get_irn_mode(proj) == mode_M || pn < n_outs);
5194 found_projs |= 1 << pn;
5198 /* are keeps missing? */
5200 for(i = 0; i < n_outs; ++i) {
5203 const arch_register_req_t *req;
5204 const arch_register_class_t *class;
5206 if(found_projs & (1 << i)) {
5210 req = get_ia32_out_req(node, i);
5215 if(class == &ia32_reg_classes[CLASS_ia32_flags]) {
5219 block = get_nodes_block(node);
5220 in[0] = new_r_Proj(current_ir_graph, block, node,
5221 arch_register_class_mode(class), i);
5222 if(last_keep != NULL) {
5223 be_Keep_add_node(last_keep, class, in[0]);
5225 last_keep = be_new_Keep(class, current_ir_graph, block, 1, in);
5226 if(sched_is_scheduled(node)) {
5227 sched_add_after(node, last_keep);
5234 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
5237 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
5239 ir_graph *irg = be_get_birg_irg(cg->birg);
5240 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
5243 /* do the transformation */
5244 void ia32_transform_graph(ia32_code_gen_t *cg) {
5246 ir_graph *irg = cg->irg;
5248 register_transformers();
5250 initial_fpcw = NULL;
5252 BE_TIMER_PUSH(t_heights);
5253 heights = heights_new(irg);
5254 BE_TIMER_POP(t_heights);
5255 ia32_calculate_non_address_mode_nodes(cg->birg);
5257 /* the transform phase is not safe for CSE (yet) because several nodes get
5258 * attributes set after their creation */
5259 cse_last = get_opt_cse();
5262 be_transform_graph(cg->birg, ia32_pretransform_node, cg);
5264 set_opt_cse(cse_last);
5266 ia32_free_non_address_mode_nodes();
5267 heights_free(heights);
5271 void ia32_init_transform(void)
5273 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");