2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief This file implements the IR transformation from firm into
24 * @author Christian Wuerdig, Matthias Braun
34 #include "irgraph_t.h"
39 #include "iredges_t.h"
51 #include "../benode_t.h"
52 #include "../besched.h"
54 #include "../beutil.h"
55 #include "../beirg_t.h"
56 #include "../betranshlp.h"
59 #include "bearch_ia32_t.h"
60 #include "ia32_common_transform.h"
61 #include "ia32_nodes_attr.h"
62 #include "ia32_transform.h"
63 #include "ia32_new_nodes.h"
64 #include "ia32_map_regs.h"
65 #include "ia32_dbg_stat.h"
66 #include "ia32_optimize.h"
67 #include "ia32_util.h"
68 #include "ia32_address_mode.h"
69 #include "ia32_architecture.h"
71 #include "gen_ia32_regalloc_if.h"
73 #define SFP_SIGN "0x80000000"
74 #define DFP_SIGN "0x8000000000000000"
75 #define SFP_ABS "0x7FFFFFFF"
76 #define DFP_ABS "0x7FFFFFFFFFFFFFFF"
77 #define DFP_INTMAX "9223372036854775807"
78 #define ULL_BIAS "18446744073709551616"
80 #define ENT_SFP_SIGN ".LC_ia32_sfp_sign"
81 #define ENT_DFP_SIGN ".LC_ia32_dfp_sign"
82 #define ENT_SFP_ABS ".LC_ia32_sfp_abs"
83 #define ENT_DFP_ABS ".LC_ia32_dfp_abs"
84 #define ENT_ULL_BIAS ".LC_ia32_ull_bias"
86 #define mode_vfp (ia32_reg_classes[CLASS_ia32_vfp].mode)
87 #define mode_xmm (ia32_reg_classes[CLASS_ia32_xmm].mode)
89 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
91 static ir_node *initial_fpcw = NULL;
93 extern ir_op *get_op_Mulh(void);
95 typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block,
96 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1,
99 typedef ir_node *construct_binop_flags_func(dbg_info *db, ir_node *block,
100 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
103 typedef ir_node *construct_shift_func(dbg_info *db, ir_node *block,
104 ir_node *op1, ir_node *op2);
106 typedef ir_node *construct_binop_dest_func(dbg_info *db, ir_node *block,
107 ir_node *base, ir_node *index, ir_node *mem, ir_node *op);
109 typedef ir_node *construct_unop_dest_func(dbg_info *db, ir_node *block,
110 ir_node *base, ir_node *index, ir_node *mem);
112 typedef ir_node *construct_binop_float_func(dbg_info *db, ir_node *block,
113 ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2,
116 typedef ir_node *construct_unop_func(dbg_info *db, ir_node *block, ir_node *op);
118 static ir_node *create_immediate_or_transform(ir_node *node,
119 char immediate_constraint_type);
121 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
122 dbg_info *dbgi, ir_node *block,
123 ir_node *op, ir_node *orig_node);
125 /** Return non-zero is a node represents the 0 constant. */
126 static bool is_Const_0(ir_node *node)
128 return is_Const(node) && is_Const_null(node);
131 /** Return non-zero is a node represents the 1 constant. */
132 static bool is_Const_1(ir_node *node)
134 return is_Const(node) && is_Const_one(node);
137 /** Return non-zero is a node represents the -1 constant. */
138 static bool is_Const_Minus_1(ir_node *node)
140 return is_Const(node) && is_Const_all_one(node);
144 * returns true if constant can be created with a simple float command
146 static bool is_simple_x87_Const(ir_node *node)
148 tarval *tv = get_Const_tarval(node);
149 if (tarval_is_null(tv) || tarval_is_one(tv))
152 /* TODO: match all the other float constants */
157 * returns true if constant can be created with a simple float command
159 static bool is_simple_sse_Const(ir_node *node)
161 tarval *tv = get_Const_tarval(node);
162 ir_mode *mode = get_tarval_mode(tv);
167 if (tarval_is_null(tv) || tarval_is_one(tv))
170 if (mode == mode_D) {
171 unsigned val = get_tarval_sub_bits(tv, 0) |
172 (get_tarval_sub_bits(tv, 1) << 8) |
173 (get_tarval_sub_bits(tv, 2) << 16) |
174 (get_tarval_sub_bits(tv, 3) << 24);
176 /* lower 32bit are zero, really a 32bit constant */
180 /* TODO: match all the other float constants */
185 * Transforms a Const.
187 static ir_node *gen_Const(ir_node *node)
189 ir_node *old_block = get_nodes_block(node);
190 ir_node *block = be_transform_node(old_block);
191 dbg_info *dbgi = get_irn_dbg_info(node);
192 ir_mode *mode = get_irn_mode(node);
194 assert(is_Const(node));
196 if (mode_is_float(mode)) {
198 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
199 ir_node *nomem = new_NoMem();
203 if (ia32_cg_config.use_sse2) {
204 tarval *tv = get_Const_tarval(node);
205 if (tarval_is_null(tv)) {
206 load = new_bd_ia32_xZero(dbgi, block);
207 set_ia32_ls_mode(load, mode);
209 } else if (tarval_is_one(tv)) {
210 int cnst = mode == mode_F ? 26 : 55;
211 ir_node *imm1 = create_Immediate(NULL, 0, cnst);
212 ir_node *imm2 = create_Immediate(NULL, 0, 2);
213 ir_node *pslld, *psrld;
215 load = new_bd_ia32_xAllOnes(dbgi, block);
216 set_ia32_ls_mode(load, mode);
217 pslld = new_bd_ia32_xPslld(dbgi, block, load, imm1);
218 set_ia32_ls_mode(pslld, mode);
219 psrld = new_bd_ia32_xPsrld(dbgi, block, pslld, imm2);
220 set_ia32_ls_mode(psrld, mode);
222 } else if (mode == mode_F) {
223 /* we can place any 32bit constant by using a movd gp, sse */
224 unsigned val = get_tarval_sub_bits(tv, 0) |
225 (get_tarval_sub_bits(tv, 1) << 8) |
226 (get_tarval_sub_bits(tv, 2) << 16) |
227 (get_tarval_sub_bits(tv, 3) << 24);
228 ir_node *cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, val);
229 load = new_bd_ia32_xMovd(dbgi, block, cnst);
230 set_ia32_ls_mode(load, mode);
233 if (mode == mode_D) {
234 unsigned val = get_tarval_sub_bits(tv, 0) |
235 (get_tarval_sub_bits(tv, 1) << 8) |
236 (get_tarval_sub_bits(tv, 2) << 16) |
237 (get_tarval_sub_bits(tv, 3) << 24);
239 ir_node *imm32 = create_Immediate(NULL, 0, 32);
240 ir_node *cnst, *psllq;
242 /* fine, lower 32bit are zero, produce 32bit value */
243 val = get_tarval_sub_bits(tv, 4) |
244 (get_tarval_sub_bits(tv, 5) << 8) |
245 (get_tarval_sub_bits(tv, 6) << 16) |
246 (get_tarval_sub_bits(tv, 7) << 24);
247 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, val);
248 load = new_bd_ia32_xMovd(dbgi, block, cnst);
249 set_ia32_ls_mode(load, mode);
250 psllq = new_bd_ia32_xPsllq(dbgi, block, load, imm32);
251 set_ia32_ls_mode(psllq, mode);
256 floatent = create_float_const_entity(node);
258 load = new_bd_ia32_xLoad(dbgi, block, noreg, noreg, nomem,
260 set_ia32_op_type(load, ia32_AddrModeS);
261 set_ia32_am_sc(load, floatent);
262 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
263 res = new_r_Proj(current_ir_graph, block, load, mode_xmm, pn_ia32_xLoad_res);
266 if (is_Const_null(node)) {
267 load = new_bd_ia32_vfldz(dbgi, block);
269 set_ia32_ls_mode(load, mode);
270 } else if (is_Const_one(node)) {
271 load = new_bd_ia32_vfld1(dbgi, block);
273 set_ia32_ls_mode(load, mode);
277 floatent = create_float_const_entity(node);
278 /* create_float_const_ent is smart and sometimes creates
280 ls_mode = get_type_mode(get_entity_type(floatent));
282 load = new_bd_ia32_vfld(dbgi, block, noreg, noreg, nomem,
284 set_ia32_op_type(load, ia32_AddrModeS);
285 set_ia32_am_sc(load, floatent);
286 arch_irn_add_flags(load, arch_irn_flags_rematerializable);
287 res = new_r_Proj(current_ir_graph, block, load, mode_vfp, pn_ia32_vfld_res);
291 SET_IA32_ORIG_NODE(load, node);
293 be_dep_on_frame(load);
295 } else { /* non-float mode */
297 tarval *tv = get_Const_tarval(node);
300 tv = tarval_convert_to(tv, mode_Iu);
302 if (tv == get_tarval_bad() || tv == get_tarval_undefined() ||
304 panic("couldn't convert constant tarval (%+F)", node);
306 val = get_tarval_long(tv);
308 cnst = new_bd_ia32_Const(dbgi, block, NULL, 0, val);
309 SET_IA32_ORIG_NODE(cnst, node);
311 be_dep_on_frame(cnst);
317 * Transforms a SymConst.
319 static ir_node *gen_SymConst(ir_node *node)
321 ir_node *old_block = get_nodes_block(node);
322 ir_node *block = be_transform_node(old_block);
323 dbg_info *dbgi = get_irn_dbg_info(node);
324 ir_mode *mode = get_irn_mode(node);
327 if (mode_is_float(mode)) {
328 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
329 ir_node *nomem = new_NoMem();
331 if (ia32_cg_config.use_sse2)
332 cnst = new_bd_ia32_xLoad(dbgi, block, noreg, noreg, nomem, mode_E);
334 cnst = new_bd_ia32_vfld(dbgi, block, noreg, noreg, nomem, mode_E);
335 set_ia32_am_sc(cnst, get_SymConst_entity(node));
336 set_ia32_use_frame(cnst);
340 if (get_SymConst_kind(node) != symconst_addr_ent) {
341 panic("backend only support symconst_addr_ent (at %+F)", node);
343 entity = get_SymConst_entity(node);
344 cnst = new_bd_ia32_Const(dbgi, block, entity, 0, 0);
347 SET_IA32_ORIG_NODE(cnst, node);
349 be_dep_on_frame(cnst);
354 * Create a float type for the given mode and cache it.
356 * @param mode the mode for the float type (might be integer mode for SSE2 types)
357 * @param align alignment
359 static ir_type *ia32_create_float_type(ir_mode *mode, unsigned align) {
365 if (mode == mode_Iu) {
366 static ir_type *int_Iu[16] = {NULL, };
368 if (int_Iu[align] == NULL) {
369 snprintf(buf, sizeof(buf), "int_Iu_%u", align);
370 int_Iu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
371 /* set the specified alignment */
372 set_type_alignment_bytes(tp, align);
374 return int_Iu[align];
375 } else if (mode == mode_Lu) {
376 static ir_type *int_Lu[16] = {NULL, };
378 if (int_Lu[align] == NULL) {
379 snprintf(buf, sizeof(buf), "int_Lu_%u", align);
380 int_Lu[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
381 /* set the specified alignment */
382 set_type_alignment_bytes(tp, align);
384 return int_Lu[align];
385 } else if (mode == mode_F) {
386 static ir_type *float_F[16] = {NULL, };
388 if (float_F[align] == NULL) {
389 snprintf(buf, sizeof(buf), "float_F_%u", align);
390 float_F[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
391 /* set the specified alignment */
392 set_type_alignment_bytes(tp, align);
394 return float_F[align];
395 } else if (mode == mode_D) {
396 static ir_type *float_D[16] = {NULL, };
398 if (float_D[align] == NULL) {
399 snprintf(buf, sizeof(buf), "float_D_%u", align);
400 float_D[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
401 /* set the specified alignment */
402 set_type_alignment_bytes(tp, align);
404 return float_D[align];
406 static ir_type *float_E[16] = {NULL, };
408 if (float_E[align] == NULL) {
409 snprintf(buf, sizeof(buf), "float_E_%u", align);
410 float_E[align] = tp = new_type_primitive(new_id_from_str(buf), mode);
411 /* set the specified alignment */
412 set_type_alignment_bytes(tp, align);
414 return float_E[align];
419 * Create a float[2] array type for the given atomic type.
421 * @param tp the atomic type
423 static ir_type *ia32_create_float_array(ir_type *tp) {
425 ir_mode *mode = get_type_mode(tp);
426 unsigned align = get_type_alignment_bytes(tp);
431 if (mode == mode_F) {
432 static ir_type *float_F[16] = {NULL, };
434 if (float_F[align] != NULL)
435 return float_F[align];
436 snprintf(buf, sizeof(buf), "arr_float_F_%u", align);
437 arr = float_F[align] = new_type_array(new_id_from_str(buf), 1, tp);
438 } else if (mode == mode_D) {
439 static ir_type *float_D[16] = {NULL, };
441 if (float_D[align] != NULL)
442 return float_D[align];
443 snprintf(buf, sizeof(buf), "arr_float_D_%u", align);
444 arr = float_D[align] = new_type_array(new_id_from_str(buf), 1, tp);
446 static ir_type *float_E[16] = {NULL, };
448 if (float_E[align] != NULL)
449 return float_E[align];
450 snprintf(buf, sizeof(buf), "arr_float_E_%u", align);
451 arr = float_E[align] = new_type_array(new_id_from_str(buf), 1, tp);
453 set_type_alignment_bytes(arr, align);
454 set_type_size_bytes(arr, 2 * get_type_size_bytes(tp));
455 set_type_state(arr, layout_fixed);
459 /* Generates an entity for a known FP const (used for FP Neg + Abs) */
460 ir_entity *ia32_gen_fp_known_const(ia32_known_const_t kct)
462 static const struct {
463 const char *ent_name;
464 const char *cnst_str;
467 } names [ia32_known_const_max] = {
468 { ENT_SFP_SIGN, SFP_SIGN, 0, 16 }, /* ia32_SSIGN */
469 { ENT_DFP_SIGN, DFP_SIGN, 1, 16 }, /* ia32_DSIGN */
470 { ENT_SFP_ABS, SFP_ABS, 0, 16 }, /* ia32_SABS */
471 { ENT_DFP_ABS, DFP_ABS, 1, 16 }, /* ia32_DABS */
472 { ENT_ULL_BIAS, ULL_BIAS, 2, 4 } /* ia32_ULLBIAS */
474 static ir_entity *ent_cache[ia32_known_const_max];
476 const char *ent_name, *cnst_str;
482 ent_name = names[kct].ent_name;
483 if (! ent_cache[kct]) {
484 cnst_str = names[kct].cnst_str;
486 switch (names[kct].mode) {
487 case 0: mode = mode_Iu; break;
488 case 1: mode = mode_Lu; break;
489 default: mode = mode_F; break;
491 tv = new_tarval_from_str(cnst_str, strlen(cnst_str), mode);
492 tp = ia32_create_float_type(mode, names[kct].align);
494 if (kct == ia32_ULLBIAS)
495 tp = ia32_create_float_array(tp);
496 ent = new_entity(get_glob_type(), new_id_from_str(ent_name), tp);
498 set_entity_ld_ident(ent, get_entity_ident(ent));
499 set_entity_visibility(ent, visibility_local);
500 set_entity_variability(ent, variability_constant);
501 set_entity_allocation(ent, allocation_static);
503 if (kct == ia32_ULLBIAS) {
504 ir_initializer_t *initializer = create_initializer_compound(2);
506 set_initializer_compound_value(initializer, 0,
507 create_initializer_tarval(get_tarval_null(mode)));
508 set_initializer_compound_value(initializer, 1,
509 create_initializer_tarval(tv));
511 set_entity_initializer(ent, initializer);
513 set_entity_initializer(ent, create_initializer_tarval(tv));
516 /* cache the entry */
517 ent_cache[kct] = ent;
520 return ent_cache[kct];
524 * return true if the node is a Proj(Load) and could be used in source address
525 * mode for another node. Will return only true if the @p other node is not
526 * dependent on the memory of the Load (for binary operations use the other
527 * input here, for unary operations use NULL).
529 static int ia32_use_source_address_mode(ir_node *block, ir_node *node,
530 ir_node *other, ir_node *other2, match_flags_t flags)
535 /* float constants are always available */
536 if (is_Const(node)) {
537 ir_mode *mode = get_irn_mode(node);
538 if (mode_is_float(mode)) {
539 if (ia32_cg_config.use_sse2) {
540 if (is_simple_sse_Const(node))
543 if (is_simple_x87_Const(node))
546 if (get_irn_n_edges(node) > 1)
554 load = get_Proj_pred(node);
555 pn = get_Proj_proj(node);
556 if (!is_Load(load) || pn != pn_Load_res)
558 if (get_nodes_block(load) != block)
560 /* we only use address mode if we're the only user of the load */
561 if (get_irn_n_edges(node) != (flags & match_two_users ? 2 : 1))
563 /* in some edge cases with address mode we might reach the load normally
564 * and through some AM sequence, if it is already materialized then we
565 * can't create an AM node from it */
566 if (be_is_transformed(node))
569 /* don't do AM if other node inputs depend on the load (via mem-proj) */
570 if (other != NULL && prevents_AM(block, load, other))
573 if (other2 != NULL && prevents_AM(block, load, other2))
579 typedef struct ia32_address_mode_t ia32_address_mode_t;
580 struct ia32_address_mode_t {
585 ia32_op_type_t op_type;
589 unsigned commutative : 1;
590 unsigned ins_permuted : 1;
593 static void build_address_ptr(ia32_address_t *addr, ir_node *ptr, ir_node *mem)
597 /* construct load address */
598 memset(addr, 0, sizeof(addr[0]));
599 ia32_create_address_mode(addr, ptr, 0);
601 noreg_gp = ia32_new_NoReg_gp(env_cg);
602 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
603 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
604 addr->mem = be_transform_node(mem);
607 static void build_address(ia32_address_mode_t *am, ir_node *node,
608 ia32_create_am_flags_t flags)
610 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
611 ia32_address_t *addr = &am->addr;
617 if (is_Const(node)) {
618 ir_entity *entity = create_float_const_entity(node);
619 addr->base = noreg_gp;
620 addr->index = noreg_gp;
621 addr->mem = new_NoMem();
622 addr->symconst_ent = entity;
624 am->ls_mode = get_type_mode(get_entity_type(entity));
625 am->pinned = op_pin_state_floats;
629 load = get_Proj_pred(node);
630 ptr = get_Load_ptr(load);
631 mem = get_Load_mem(load);
632 new_mem = be_transform_node(mem);
633 am->pinned = get_irn_pinned(load);
634 am->ls_mode = get_Load_mode(load);
635 am->mem_proj = be_get_Proj_for_pn(load, pn_Load_M);
638 /* construct load address */
639 ia32_create_address_mode(addr, ptr, flags);
641 addr->base = addr->base ? be_transform_node(addr->base) : noreg_gp;
642 addr->index = addr->index ? be_transform_node(addr->index) : noreg_gp;
646 static void set_address(ir_node *node, const ia32_address_t *addr)
648 set_ia32_am_scale(node, addr->scale);
649 set_ia32_am_sc(node, addr->symconst_ent);
650 set_ia32_am_offs_int(node, addr->offset);
651 if (addr->symconst_sign)
652 set_ia32_am_sc_sign(node);
654 set_ia32_use_frame(node);
655 set_ia32_frame_ent(node, addr->frame_entity);
659 * Apply attributes of a given address mode to a node.
661 static void set_am_attributes(ir_node *node, const ia32_address_mode_t *am)
663 set_address(node, &am->addr);
665 set_ia32_op_type(node, am->op_type);
666 set_ia32_ls_mode(node, am->ls_mode);
667 if (am->pinned == op_pin_state_pinned) {
668 /* beware: some nodes are already pinned and did not allow to change the state */
669 if (get_irn_pinned(node) != op_pin_state_pinned)
670 set_irn_pinned(node, op_pin_state_pinned);
673 set_ia32_commutative(node);
677 * Check, if a given node is a Down-Conv, ie. a integer Conv
678 * from a mode with a mode with more bits to a mode with lesser bits.
679 * Moreover, we return only true if the node has not more than 1 user.
681 * @param node the node
682 * @return non-zero if node is a Down-Conv
684 static int is_downconv(const ir_node *node)
692 /* we only want to skip the conv when we're the only user
693 * (not optimal but for now...)
695 if (get_irn_n_edges(node) > 1)
698 src_mode = get_irn_mode(get_Conv_op(node));
699 dest_mode = get_irn_mode(node);
701 ia32_mode_needs_gp_reg(src_mode) &&
702 ia32_mode_needs_gp_reg(dest_mode) &&
703 get_mode_size_bits(dest_mode) <= get_mode_size_bits(src_mode);
706 /* Skip all Down-Conv's on a given node and return the resulting node. */
707 ir_node *ia32_skip_downconv(ir_node *node)
709 while (is_downconv(node))
710 node = get_Conv_op(node);
715 static ir_node *create_upconv(ir_node *node, ir_node *orig_node)
717 ir_mode *mode = get_irn_mode(node);
722 if (mode_is_signed(mode)) {
727 block = get_nodes_block(node);
728 dbgi = get_irn_dbg_info(node);
730 return create_I2I_Conv(mode, tgt_mode, dbgi, block, node, orig_node);
734 * matches operands of a node into ia32 addressing/operand modes. This covers
735 * usage of source address mode, immediates, operations with non 32-bit modes,
737 * The resulting data is filled into the @p am struct. block is the block
738 * of the node whose arguments are matched. op1, op2 are the first and second
739 * input that are matched (op1 may be NULL). other_op is another unrelated
740 * input that is not matched! but which is needed sometimes to check if AM
741 * for op1/op2 is legal.
742 * @p flags describes the supported modes of the operation in detail.
744 static void match_arguments(ia32_address_mode_t *am, ir_node *block,
745 ir_node *op1, ir_node *op2, ir_node *other_op,
748 ia32_address_t *addr = &am->addr;
749 ir_mode *mode = get_irn_mode(op2);
750 int mode_bits = get_mode_size_bits(mode);
751 ir_node *noreg_gp, *new_op1, *new_op2;
753 unsigned commutative;
754 int use_am_and_immediates;
757 memset(am, 0, sizeof(am[0]));
759 commutative = (flags & match_commutative) != 0;
760 use_am_and_immediates = (flags & match_am_and_immediates) != 0;
761 use_am = (flags & match_am) != 0;
762 use_immediate = (flags & match_immediate) != 0;
763 assert(!use_am_and_immediates || use_immediate);
766 assert(!commutative || op1 != NULL);
767 assert(use_am || !(flags & match_8bit_am));
768 assert(use_am || !(flags & match_16bit_am));
770 if ((mode_bits == 8 && !(flags & match_8bit_am)) ||
771 (mode_bits == 16 && !(flags & match_16bit_am))) {
775 /* we can simply skip downconvs for mode neutral nodes: the upper bits
776 * can be random for these operations */
777 if (flags & match_mode_neutral) {
778 op2 = ia32_skip_downconv(op2);
780 op1 = ia32_skip_downconv(op1);
784 /* match immediates. firm nodes are normalized: constants are always on the
787 if (!(flags & match_try_am) && use_immediate) {
788 new_op2 = try_create_Immediate(op2, 0);
791 noreg_gp = ia32_new_NoReg_gp(env_cg);
792 if (new_op2 == NULL &&
793 use_am && ia32_use_source_address_mode(block, op2, op1, other_op, flags)) {
794 build_address(am, op2, 0);
795 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
796 if (mode_is_float(mode)) {
797 new_op2 = ia32_new_NoReg_vfp(env_cg);
801 am->op_type = ia32_AddrModeS;
802 } else if (commutative && (new_op2 == NULL || use_am_and_immediates) &&
804 ia32_use_source_address_mode(block, op1, op2, other_op, flags)) {
806 build_address(am, op1, 0);
808 if (mode_is_float(mode)) {
809 noreg = ia32_new_NoReg_vfp(env_cg);
814 if (new_op2 != NULL) {
817 new_op1 = be_transform_node(op2);
819 am->ins_permuted = 1;
821 am->op_type = ia32_AddrModeS;
823 am->op_type = ia32_Normal;
825 if (flags & match_try_am) {
831 new_op1 = (op1 == NULL ? NULL : be_transform_node(op1));
833 new_op2 = be_transform_node(op2);
835 (flags & match_mode_neutral ? mode_Iu : get_irn_mode(op2));
837 if (addr->base == NULL)
838 addr->base = noreg_gp;
839 if (addr->index == NULL)
840 addr->index = noreg_gp;
841 if (addr->mem == NULL)
842 addr->mem = new_NoMem();
844 am->new_op1 = new_op1;
845 am->new_op2 = new_op2;
846 am->commutative = commutative;
849 static ir_node *fix_mem_proj(ir_node *node, ia32_address_mode_t *am)
854 if (am->mem_proj == NULL)
857 /* we have to create a mode_T so the old MemProj can attach to us */
858 mode = get_irn_mode(node);
859 load = get_Proj_pred(am->mem_proj);
861 be_set_transformed_node(load, node);
863 if (mode != mode_T) {
864 set_irn_mode(node, mode_T);
865 return new_rd_Proj(NULL, current_ir_graph, get_nodes_block(node), node, mode, pn_ia32_res);
872 * Construct a standard binary operation, set AM and immediate if required.
874 * @param node The original node for which the binop is created
875 * @param op1 The first operand
876 * @param op2 The second operand
877 * @param func The node constructor function
878 * @return The constructed ia32 node.
880 static ir_node *gen_binop(ir_node *node, ir_node *op1, ir_node *op2,
881 construct_binop_func *func, match_flags_t flags)
884 ir_node *block, *new_block, *new_node;
885 ia32_address_mode_t am;
886 ia32_address_t *addr = &am.addr;
888 block = get_nodes_block(node);
889 match_arguments(&am, block, op1, op2, NULL, flags);
891 dbgi = get_irn_dbg_info(node);
892 new_block = be_transform_node(block);
893 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
894 am.new_op1, am.new_op2);
895 set_am_attributes(new_node, &am);
896 /* we can't use source address mode anymore when using immediates */
897 if (!(flags & match_am_and_immediates) &&
898 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
899 set_ia32_am_support(new_node, ia32_am_none);
900 SET_IA32_ORIG_NODE(new_node, node);
902 new_node = fix_mem_proj(new_node, &am);
909 n_ia32_l_binop_right,
910 n_ia32_l_binop_eflags
912 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Adc_left, n_Adc_left)
913 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Adc_right, n_Adc_right)
914 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Adc_eflags, n_Adc_eflags)
915 COMPILETIME_ASSERT(n_ia32_l_binop_left == n_ia32_l_Sbb_minuend, n_Sbb_minuend)
916 COMPILETIME_ASSERT(n_ia32_l_binop_right == n_ia32_l_Sbb_subtrahend, n_Sbb_subtrahend)
917 COMPILETIME_ASSERT(n_ia32_l_binop_eflags == n_ia32_l_Sbb_eflags, n_Sbb_eflags)
920 * Construct a binary operation which also consumes the eflags.
922 * @param node The node to transform
923 * @param func The node constructor function
924 * @param flags The match flags
925 * @return The constructor ia32 node
927 static ir_node *gen_binop_flags(ir_node *node, construct_binop_flags_func *func,
930 ir_node *src_block = get_nodes_block(node);
931 ir_node *op1 = get_irn_n(node, n_ia32_l_binop_left);
932 ir_node *op2 = get_irn_n(node, n_ia32_l_binop_right);
933 ir_node *eflags = get_irn_n(node, n_ia32_l_binop_eflags);
935 ir_node *block, *new_node, *new_eflags;
936 ia32_address_mode_t am;
937 ia32_address_t *addr = &am.addr;
939 match_arguments(&am, src_block, op1, op2, eflags, flags);
941 dbgi = get_irn_dbg_info(node);
942 block = be_transform_node(src_block);
943 new_eflags = be_transform_node(eflags);
944 new_node = func(dbgi, block, addr->base, addr->index, addr->mem,
945 am.new_op1, am.new_op2, new_eflags);
946 set_am_attributes(new_node, &am);
947 /* we can't use source address mode anymore when using immediates */
948 if (!(flags & match_am_and_immediates) &&
949 (is_ia32_Immediate(am.new_op1) || is_ia32_Immediate(am.new_op2)))
950 set_ia32_am_support(new_node, ia32_am_none);
951 SET_IA32_ORIG_NODE(new_node, node);
953 new_node = fix_mem_proj(new_node, &am);
958 static ir_node *get_fpcw(void)
961 if (initial_fpcw != NULL)
964 fpcw = be_abi_get_ignore_irn(env_cg->birg->abi,
965 &ia32_fp_cw_regs[REG_FPCW]);
966 initial_fpcw = be_transform_node(fpcw);
972 * Construct a standard binary operation, set AM and immediate if required.
974 * @param op1 The first operand
975 * @param op2 The second operand
976 * @param func The node constructor function
977 * @return The constructed ia32 node.
979 static ir_node *gen_binop_x87_float(ir_node *node, ir_node *op1, ir_node *op2,
980 construct_binop_float_func *func)
982 ir_mode *mode = get_irn_mode(node);
984 ir_node *block, *new_block, *new_node;
985 ia32_address_mode_t am;
986 ia32_address_t *addr = &am.addr;
987 ia32_x87_attr_t *attr;
988 /* All operations are considered commutative, because there are reverse
990 match_flags_t flags = match_commutative;
992 /* cannot use address mode with long double on x87 */
993 if (get_mode_size_bits(mode) <= 64)
996 block = get_nodes_block(node);
997 match_arguments(&am, block, op1, op2, NULL, flags);
999 dbgi = get_irn_dbg_info(node);
1000 new_block = be_transform_node(block);
1001 new_node = func(dbgi, new_block, addr->base, addr->index, addr->mem,
1002 am.new_op1, am.new_op2, get_fpcw());
1003 set_am_attributes(new_node, &am);
1005 attr = get_ia32_x87_attr(new_node);
1006 attr->attr.data.ins_permuted = am.ins_permuted;
1008 SET_IA32_ORIG_NODE(new_node, node);
1010 new_node = fix_mem_proj(new_node, &am);
1016 * Construct a shift/rotate binary operation, sets AM and immediate if required.
1018 * @param op1 The first operand
1019 * @param op2 The second operand
1020 * @param func The node constructor function
1021 * @return The constructed ia32 node.
1023 static ir_node *gen_shift_binop(ir_node *node, ir_node *op1, ir_node *op2,
1024 construct_shift_func *func,
1025 match_flags_t flags)
1028 ir_node *block, *new_block, *new_op1, *new_op2, *new_node;
1030 assert(! mode_is_float(get_irn_mode(node)));
1031 assert(flags & match_immediate);
1032 assert((flags & ~(match_mode_neutral | match_immediate)) == 0);
1034 if (flags & match_mode_neutral) {
1035 op1 = ia32_skip_downconv(op1);
1036 new_op1 = be_transform_node(op1);
1037 } else if (get_mode_size_bits(get_irn_mode(node)) != 32) {
1038 new_op1 = create_upconv(op1, node);
1040 new_op1 = be_transform_node(op1);
1043 /* the shift amount can be any mode that is bigger than 5 bits, since all
1044 * other bits are ignored anyway */
1045 while (is_Conv(op2) && get_irn_n_edges(op2) == 1) {
1046 ir_node *const op = get_Conv_op(op2);
1047 if (mode_is_float(get_irn_mode(op)))
1050 assert(get_mode_size_bits(get_irn_mode(op2)) >= 5);
1052 new_op2 = create_immediate_or_transform(op2, 0);
1054 dbgi = get_irn_dbg_info(node);
1055 block = get_nodes_block(node);
1056 new_block = be_transform_node(block);
1057 new_node = func(dbgi, new_block, new_op1, new_op2);
1058 SET_IA32_ORIG_NODE(new_node, node);
1060 /* lowered shift instruction may have a dependency operand, handle it here */
1061 if (get_irn_arity(node) == 3) {
1062 /* we have a dependency */
1063 ir_node *new_dep = be_transform_node(get_irn_n(node, 2));
1064 add_irn_dep(new_node, new_dep);
1072 * Construct a standard unary operation, set AM and immediate if required.
1074 * @param op The operand
1075 * @param func The node constructor function
1076 * @return The constructed ia32 node.
1078 static ir_node *gen_unop(ir_node *node, ir_node *op, construct_unop_func *func,
1079 match_flags_t flags)
1082 ir_node *block, *new_block, *new_op, *new_node;
1084 assert(flags == 0 || flags == match_mode_neutral);
1085 if (flags & match_mode_neutral) {
1086 op = ia32_skip_downconv(op);
1089 new_op = be_transform_node(op);
1090 dbgi = get_irn_dbg_info(node);
1091 block = get_nodes_block(node);
1092 new_block = be_transform_node(block);
1093 new_node = func(dbgi, new_block, new_op);
1095 SET_IA32_ORIG_NODE(new_node, node);
1100 static ir_node *create_lea_from_address(dbg_info *dbgi, ir_node *block,
1101 ia32_address_t *addr)
1103 ir_node *base, *index, *res;
1107 base = ia32_new_NoReg_gp(env_cg);
1109 base = be_transform_node(base);
1112 index = addr->index;
1113 if (index == NULL) {
1114 index = ia32_new_NoReg_gp(env_cg);
1116 index = be_transform_node(index);
1119 res = new_bd_ia32_Lea(dbgi, block, base, index);
1120 set_address(res, addr);
1126 * Returns non-zero if a given address mode has a symbolic or
1127 * numerical offset != 0.
1129 static int am_has_immediates(const ia32_address_t *addr)
1131 return addr->offset != 0 || addr->symconst_ent != NULL
1132 || addr->frame_entity || addr->use_frame;
1136 * Creates an ia32 Add.
1138 * @return the created ia32 Add node
1140 static ir_node *gen_Add(ir_node *node)
1142 ir_mode *mode = get_irn_mode(node);
1143 ir_node *op1 = get_Add_left(node);
1144 ir_node *op2 = get_Add_right(node);
1146 ir_node *block, *new_block, *new_node, *add_immediate_op;
1147 ia32_address_t addr;
1148 ia32_address_mode_t am;
1150 if (mode_is_float(mode)) {
1151 if (ia32_cg_config.use_sse2)
1152 return gen_binop(node, op1, op2, new_bd_ia32_xAdd,
1153 match_commutative | match_am);
1155 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfadd);
1158 ia32_mark_non_am(node);
1160 op2 = ia32_skip_downconv(op2);
1161 op1 = ia32_skip_downconv(op1);
1165 * 0. Immediate Trees (example Add(Symconst, Const) -> Const)
1166 * 1. Add with immediate -> Lea
1167 * 2. Add with possible source address mode -> Add
1168 * 3. Otherwise -> Lea
1170 memset(&addr, 0, sizeof(addr));
1171 ia32_create_address_mode(&addr, node, ia32_create_am_force);
1172 add_immediate_op = NULL;
1174 dbgi = get_irn_dbg_info(node);
1175 block = get_nodes_block(node);
1176 new_block = be_transform_node(block);
1179 if (addr.base == NULL && addr.index == NULL) {
1180 new_node = new_bd_ia32_Const(dbgi, new_block, addr.symconst_ent,
1181 addr.symconst_sign, addr.offset);
1182 be_dep_on_frame(new_node);
1183 SET_IA32_ORIG_NODE(new_node, node);
1186 /* add with immediate? */
1187 if (addr.index == NULL) {
1188 add_immediate_op = addr.base;
1189 } else if (addr.base == NULL && addr.scale == 0) {
1190 add_immediate_op = addr.index;
1193 if (add_immediate_op != NULL) {
1194 if (!am_has_immediates(&addr)) {
1195 #ifdef DEBUG_libfirm
1196 ir_fprintf(stderr, "Optimisation warning Add x,0 (%+F) found\n",
1199 return be_transform_node(add_immediate_op);
1202 new_node = create_lea_from_address(dbgi, new_block, &addr);
1203 SET_IA32_ORIG_NODE(new_node, node);
1207 /* test if we can use source address mode */
1208 match_arguments(&am, block, op1, op2, NULL, match_commutative
1209 | match_mode_neutral | match_am | match_immediate | match_try_am);
1211 /* construct an Add with source address mode */
1212 if (am.op_type == ia32_AddrModeS) {
1213 ia32_address_t *am_addr = &am.addr;
1214 new_node = new_bd_ia32_Add(dbgi, new_block, am_addr->base,
1215 am_addr->index, am_addr->mem, am.new_op1,
1217 set_am_attributes(new_node, &am);
1218 SET_IA32_ORIG_NODE(new_node, node);
1220 new_node = fix_mem_proj(new_node, &am);
1225 /* otherwise construct a lea */
1226 new_node = create_lea_from_address(dbgi, new_block, &addr);
1227 SET_IA32_ORIG_NODE(new_node, node);
1232 * Creates an ia32 Mul.
1234 * @return the created ia32 Mul node
1236 static ir_node *gen_Mul(ir_node *node)
1238 ir_node *op1 = get_Mul_left(node);
1239 ir_node *op2 = get_Mul_right(node);
1240 ir_mode *mode = get_irn_mode(node);
1242 if (mode_is_float(mode)) {
1243 if (ia32_cg_config.use_sse2)
1244 return gen_binop(node, op1, op2, new_bd_ia32_xMul,
1245 match_commutative | match_am);
1247 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfmul);
1249 return gen_binop(node, op1, op2, new_bd_ia32_IMul,
1250 match_commutative | match_am | match_mode_neutral |
1251 match_immediate | match_am_and_immediates);
1255 * Creates an ia32 Mulh.
1256 * Note: Mul produces a 64Bit result and Mulh returns the upper 32 bit of
1257 * this result while Mul returns the lower 32 bit.
1259 * @return the created ia32 Mulh node
1261 static ir_node *gen_Mulh(ir_node *node)
1263 ir_node *block = get_nodes_block(node);
1264 ir_node *new_block = be_transform_node(block);
1265 dbg_info *dbgi = get_irn_dbg_info(node);
1266 ir_node *op1 = get_Mulh_left(node);
1267 ir_node *op2 = get_Mulh_right(node);
1268 ir_mode *mode = get_irn_mode(node);
1270 ir_node *proj_res_high;
1272 if (mode_is_signed(mode)) {
1273 new_node = gen_binop(node, op1, op2, new_bd_ia32_IMul1OP, match_commutative | match_am);
1274 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1275 mode_Iu, pn_ia32_IMul1OP_res_high);
1277 new_node = gen_binop(node, op1, op2, new_bd_ia32_Mul, match_commutative | match_am);
1278 proj_res_high = new_rd_Proj(dbgi, current_ir_graph, new_block, new_node,
1279 mode_Iu, pn_ia32_Mul_res_high);
1281 return proj_res_high;
1285 * Creates an ia32 And.
1287 * @return The created ia32 And node
1289 static ir_node *gen_And(ir_node *node)
1291 ir_node *op1 = get_And_left(node);
1292 ir_node *op2 = get_And_right(node);
1293 assert(! mode_is_float(get_irn_mode(node)));
1295 /* is it a zero extension? */
1296 if (is_Const(op2)) {
1297 tarval *tv = get_Const_tarval(op2);
1298 long v = get_tarval_long(tv);
1300 if (v == 0xFF || v == 0xFFFF) {
1301 dbg_info *dbgi = get_irn_dbg_info(node);
1302 ir_node *block = get_nodes_block(node);
1309 assert(v == 0xFFFF);
1312 res = create_I2I_Conv(src_mode, mode_Iu, dbgi, block, op1, node);
1317 return gen_binop(node, op1, op2, new_bd_ia32_And,
1318 match_commutative | match_mode_neutral | match_am | match_immediate);
1324 * Creates an ia32 Or.
1326 * @return The created ia32 Or node
1328 static ir_node *gen_Or(ir_node *node)
1330 ir_node *op1 = get_Or_left(node);
1331 ir_node *op2 = get_Or_right(node);
1333 assert (! mode_is_float(get_irn_mode(node)));
1334 return gen_binop(node, op1, op2, new_bd_ia32_Or, match_commutative
1335 | match_mode_neutral | match_am | match_immediate);
1341 * Creates an ia32 Eor.
1343 * @return The created ia32 Eor node
1345 static ir_node *gen_Eor(ir_node *node)
1347 ir_node *op1 = get_Eor_left(node);
1348 ir_node *op2 = get_Eor_right(node);
1350 assert(! mode_is_float(get_irn_mode(node)));
1351 return gen_binop(node, op1, op2, new_bd_ia32_Xor, match_commutative
1352 | match_mode_neutral | match_am | match_immediate);
1357 * Creates an ia32 Sub.
1359 * @return The created ia32 Sub node
1361 static ir_node *gen_Sub(ir_node *node)
1363 ir_node *op1 = get_Sub_left(node);
1364 ir_node *op2 = get_Sub_right(node);
1365 ir_mode *mode = get_irn_mode(node);
1367 if (mode_is_float(mode)) {
1368 if (ia32_cg_config.use_sse2)
1369 return gen_binop(node, op1, op2, new_bd_ia32_xSub, match_am);
1371 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfsub);
1374 if (is_Const(op2)) {
1375 ir_fprintf(stderr, "Optimisation warning: found sub with const (%+F)\n",
1379 return gen_binop(node, op1, op2, new_bd_ia32_Sub, match_mode_neutral
1380 | match_am | match_immediate);
1383 static ir_node *transform_AM_mem(ir_graph *const irg, ir_node *const block,
1384 ir_node *const src_val,
1385 ir_node *const src_mem,
1386 ir_node *const am_mem)
1388 if (is_NoMem(am_mem)) {
1389 return be_transform_node(src_mem);
1390 } else if (is_Proj(src_val) &&
1392 get_Proj_pred(src_val) == get_Proj_pred(src_mem)) {
1393 /* avoid memory loop */
1395 } else if (is_Proj(src_val) && is_Sync(src_mem)) {
1396 ir_node *const ptr_pred = get_Proj_pred(src_val);
1397 int const arity = get_Sync_n_preds(src_mem);
1402 NEW_ARR_A(ir_node*, ins, arity + 1);
1404 /* NOTE: This sometimes produces dead-code because the old sync in
1405 * src_mem might not be used anymore, we should detect this case
1406 * and kill the sync... */
1407 for (i = arity - 1; i >= 0; --i) {
1408 ir_node *const pred = get_Sync_pred(src_mem, i);
1410 /* avoid memory loop */
1411 if (is_Proj(pred) && get_Proj_pred(pred) == ptr_pred)
1414 ins[n++] = be_transform_node(pred);
1419 return new_r_Sync(irg, block, n, ins);
1423 ins[0] = be_transform_node(src_mem);
1425 return new_r_Sync(irg, block, 2, ins);
1429 static ir_node *create_sex_32_64(dbg_info *dbgi, ir_node *block,
1430 ir_node *val, const ir_node *orig)
1435 if (ia32_cg_config.use_short_sex_eax) {
1436 ir_node *pval = new_bd_ia32_ProduceVal(dbgi, block);
1437 be_dep_on_frame(pval);
1438 res = new_bd_ia32_Cltd(dbgi, block, val, pval);
1440 ir_node *imm31 = create_Immediate(NULL, 0, 31);
1441 res = new_bd_ia32_Sar(dbgi, block, val, imm31);
1443 SET_IA32_ORIG_NODE(res, orig);
1448 * Generates an ia32 DivMod with additional infrastructure for the
1449 * register allocator if needed.
1451 static ir_node *create_Div(ir_node *node)
1453 dbg_info *dbgi = get_irn_dbg_info(node);
1454 ir_node *block = get_nodes_block(node);
1455 ir_node *new_block = be_transform_node(block);
1462 ir_node *sign_extension;
1463 ia32_address_mode_t am;
1464 ia32_address_t *addr = &am.addr;
1466 /* the upper bits have random contents for smaller modes */
1467 switch (get_irn_opcode(node)) {
1469 op1 = get_Div_left(node);
1470 op2 = get_Div_right(node);
1471 mem = get_Div_mem(node);
1472 mode = get_Div_resmode(node);
1475 op1 = get_Mod_left(node);
1476 op2 = get_Mod_right(node);
1477 mem = get_Mod_mem(node);
1478 mode = get_Mod_resmode(node);
1481 op1 = get_DivMod_left(node);
1482 op2 = get_DivMod_right(node);
1483 mem = get_DivMod_mem(node);
1484 mode = get_DivMod_resmode(node);
1487 panic("invalid divmod node %+F", node);
1490 match_arguments(&am, block, op1, op2, NULL, match_am);
1492 /* Beware: We don't need a Sync, if the memory predecessor of the Div node
1493 is the memory of the consumed address. We can have only the second op as address
1494 in Div nodes, so check only op2. */
1495 new_mem = transform_AM_mem(current_ir_graph, block, op2, mem, addr->mem);
1497 if (mode_is_signed(mode)) {
1498 sign_extension = create_sex_32_64(dbgi, new_block, am.new_op1, node);
1499 new_node = new_bd_ia32_IDiv(dbgi, new_block, addr->base,
1500 addr->index, new_mem, am.new_op2, am.new_op1, sign_extension);
1502 sign_extension = new_bd_ia32_Const(dbgi, new_block, NULL, 0, 0);
1503 be_dep_on_frame(sign_extension);
1505 new_node = new_bd_ia32_Div(dbgi, new_block, addr->base,
1506 addr->index, new_mem, am.new_op2,
1507 am.new_op1, sign_extension);
1510 set_irn_pinned(new_node, get_irn_pinned(node));
1512 set_am_attributes(new_node, &am);
1513 SET_IA32_ORIG_NODE(new_node, node);
1515 new_node = fix_mem_proj(new_node, &am);
1521 static ir_node *gen_Mod(ir_node *node)
1523 return create_Div(node);
1526 static ir_node *gen_Div(ir_node *node)
1528 return create_Div(node);
1531 static ir_node *gen_DivMod(ir_node *node)
1533 return create_Div(node);
1539 * Creates an ia32 floating Div.
1541 * @return The created ia32 xDiv node
1543 static ir_node *gen_Quot(ir_node *node)
1545 ir_node *op1 = get_Quot_left(node);
1546 ir_node *op2 = get_Quot_right(node);
1548 if (ia32_cg_config.use_sse2) {
1549 return gen_binop(node, op1, op2, new_bd_ia32_xDiv, match_am);
1551 return gen_binop_x87_float(node, op1, op2, new_bd_ia32_vfdiv);
1557 * Creates an ia32 Shl.
1559 * @return The created ia32 Shl node
1561 static ir_node *gen_Shl(ir_node *node)
1563 ir_node *left = get_Shl_left(node);
1564 ir_node *right = get_Shl_right(node);
1566 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
1567 match_mode_neutral | match_immediate);
1571 * Creates an ia32 Shr.
1573 * @return The created ia32 Shr node
1575 static ir_node *gen_Shr(ir_node *node)
1577 ir_node *left = get_Shr_left(node);
1578 ir_node *right = get_Shr_right(node);
1580 return gen_shift_binop(node, left, right, new_bd_ia32_Shr, match_immediate);
1586 * Creates an ia32 Sar.
1588 * @return The created ia32 Shrs node
1590 static ir_node *gen_Shrs(ir_node *node)
1592 ir_node *left = get_Shrs_left(node);
1593 ir_node *right = get_Shrs_right(node);
1595 if (is_Const(right)) {
1596 tarval *tv = get_Const_tarval(right);
1597 long val = get_tarval_long(tv);
1599 /* this is a sign extension */
1600 dbg_info *dbgi = get_irn_dbg_info(node);
1601 ir_node *block = be_transform_node(get_nodes_block(node));
1602 ir_node *new_op = be_transform_node(left);
1604 return create_sex_32_64(dbgi, block, new_op, node);
1608 /* 8 or 16 bit sign extension? */
1609 if (is_Const(right) && is_Shl(left)) {
1610 ir_node *shl_left = get_Shl_left(left);
1611 ir_node *shl_right = get_Shl_right(left);
1612 if (is_Const(shl_right)) {
1613 tarval *tv1 = get_Const_tarval(right);
1614 tarval *tv2 = get_Const_tarval(shl_right);
1615 if (tv1 == tv2 && tarval_is_long(tv1)) {
1616 long val = get_tarval_long(tv1);
1617 if (val == 16 || val == 24) {
1618 dbg_info *dbgi = get_irn_dbg_info(node);
1619 ir_node *block = get_nodes_block(node);
1629 res = create_I2I_Conv(src_mode, mode_Is, dbgi, block,
1638 return gen_shift_binop(node, left, right, new_bd_ia32_Sar, match_immediate);
1644 * Creates an ia32 Rol.
1646 * @param op1 The first operator
1647 * @param op2 The second operator
1648 * @return The created ia32 RotL node
1650 static ir_node *gen_Rol(ir_node *node, ir_node *op1, ir_node *op2)
1652 return gen_shift_binop(node, op1, op2, new_bd_ia32_Rol, match_immediate);
1658 * Creates an ia32 Ror.
1659 * NOTE: There is no RotR with immediate because this would always be a RotL
1660 * "imm-mode_size_bits" which can be pre-calculated.
1662 * @param op1 The first operator
1663 * @param op2 The second operator
1664 * @return The created ia32 RotR node
1666 static ir_node *gen_Ror(ir_node *node, ir_node *op1, ir_node *op2)
1668 return gen_shift_binop(node, op1, op2, new_bd_ia32_Ror, match_immediate);
1674 * Creates an ia32 RotR or RotL (depending on the found pattern).
1676 * @return The created ia32 RotL or RotR node
1678 static ir_node *gen_Rotl(ir_node *node)
1680 ir_node *rotate = NULL;
1681 ir_node *op1 = get_Rotl_left(node);
1682 ir_node *op2 = get_Rotl_right(node);
1684 /* Firm has only RotL, so we are looking for a right (op2)
1685 operand "-e+mode_size_bits" (it's an already modified "mode_size_bits-e",
1686 that means we can create a RotR instead of an Add and a RotL */
1690 ir_node *left = get_Add_left(add);
1691 ir_node *right = get_Add_right(add);
1692 if (is_Const(right)) {
1693 tarval *tv = get_Const_tarval(right);
1694 ir_mode *mode = get_irn_mode(node);
1695 long bits = get_mode_size_bits(mode);
1697 if (is_Minus(left) &&
1698 tarval_is_long(tv) &&
1699 get_tarval_long(tv) == bits &&
1702 DB((dbg, LEVEL_1, "RotL into RotR ... "));
1703 rotate = gen_Ror(node, op1, get_Minus_op(left));
1708 if (rotate == NULL) {
1709 rotate = gen_Rol(node, op1, op2);
1718 * Transforms a Minus node.
1720 * @return The created ia32 Minus node
1722 static ir_node *gen_Minus(ir_node *node)
1724 ir_node *op = get_Minus_op(node);
1725 ir_node *block = be_transform_node(get_nodes_block(node));
1726 dbg_info *dbgi = get_irn_dbg_info(node);
1727 ir_mode *mode = get_irn_mode(node);
1732 if (mode_is_float(mode)) {
1733 ir_node *new_op = be_transform_node(op);
1734 if (ia32_cg_config.use_sse2) {
1735 /* TODO: non-optimal... if we have many xXors, then we should
1736 * rather create a load for the const and use that instead of
1737 * several AM nodes... */
1738 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1739 ir_node *noreg_xmm = ia32_new_NoReg_xmm(env_cg);
1740 ir_node *nomem = new_NoMem();
1742 new_node = new_bd_ia32_xXor(dbgi, block, noreg_gp, noreg_gp,
1743 nomem, new_op, noreg_xmm);
1745 size = get_mode_size_bits(mode);
1746 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SSIGN : ia32_DSIGN);
1748 set_ia32_am_sc(new_node, ent);
1749 set_ia32_op_type(new_node, ia32_AddrModeS);
1750 set_ia32_ls_mode(new_node, mode);
1752 new_node = new_bd_ia32_vfchs(dbgi, block, new_op);
1755 new_node = gen_unop(node, op, new_bd_ia32_Neg, match_mode_neutral);
1758 SET_IA32_ORIG_NODE(new_node, node);
1764 * Transforms a Not node.
1766 * @return The created ia32 Not node
1768 static ir_node *gen_Not(ir_node *node)
1770 ir_node *op = get_Not_op(node);
1772 assert(get_irn_mode(node) != mode_b); /* should be lowered already */
1773 assert (! mode_is_float(get_irn_mode(node)));
1775 return gen_unop(node, op, new_bd_ia32_Not, match_mode_neutral);
1781 * Transforms an Abs node.
1783 * @return The created ia32 Abs node
1785 static ir_node *gen_Abs(ir_node *node)
1787 ir_node *block = get_nodes_block(node);
1788 ir_node *new_block = be_transform_node(block);
1789 ir_node *op = get_Abs_op(node);
1790 dbg_info *dbgi = get_irn_dbg_info(node);
1791 ir_mode *mode = get_irn_mode(node);
1792 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
1793 ir_node *nomem = new_NoMem();
1799 if (mode_is_float(mode)) {
1800 new_op = be_transform_node(op);
1802 if (ia32_cg_config.use_sse2) {
1803 ir_node *noreg_fp = ia32_new_NoReg_xmm(env_cg);
1804 new_node = new_bd_ia32_xAnd(dbgi, new_block, noreg_gp, noreg_gp,
1805 nomem, new_op, noreg_fp);
1807 size = get_mode_size_bits(mode);
1808 ent = ia32_gen_fp_known_const(size == 32 ? ia32_SABS : ia32_DABS);
1810 set_ia32_am_sc(new_node, ent);
1812 SET_IA32_ORIG_NODE(new_node, node);
1814 set_ia32_op_type(new_node, ia32_AddrModeS);
1815 set_ia32_ls_mode(new_node, mode);
1817 new_node = new_bd_ia32_vfabs(dbgi, new_block, new_op);
1818 SET_IA32_ORIG_NODE(new_node, node);
1821 ir_node *xor, *sign_extension;
1823 if (get_mode_size_bits(mode) == 32) {
1824 new_op = be_transform_node(op);
1826 new_op = create_I2I_Conv(mode, mode_Is, dbgi, block, op, node);
1829 sign_extension = create_sex_32_64(dbgi, new_block, new_op, node);
1831 xor = new_bd_ia32_Xor(dbgi, new_block, noreg_gp, noreg_gp,
1832 nomem, new_op, sign_extension);
1833 SET_IA32_ORIG_NODE(xor, node);
1835 new_node = new_bd_ia32_Sub(dbgi, new_block, noreg_gp, noreg_gp,
1836 nomem, xor, sign_extension);
1837 SET_IA32_ORIG_NODE(new_node, node);
1844 * Create a bt instruction for x & (1 << n) and place it into the block of cmp.
1846 static ir_node *gen_bt(ir_node *cmp, ir_node *x, ir_node *n)
1848 dbg_info *dbgi = get_irn_dbg_info(cmp);
1849 ir_node *block = get_nodes_block(cmp);
1850 ir_node *new_block = be_transform_node(block);
1851 ir_node *op1 = be_transform_node(x);
1852 ir_node *op2 = be_transform_node(n);
1854 return new_bd_ia32_Bt(dbgi, new_block, op1, op2);
1858 * Transform a node returning a "flag" result.
1860 * @param node the node to transform
1861 * @param pnc_out the compare mode to use
1863 static ir_node *get_flags_node(ir_node *node, pn_Cmp *pnc_out)
1872 /* we have a Cmp as input */
1873 if (is_Proj(node)) {
1874 ir_node *pred = get_Proj_pred(node);
1876 pn_Cmp pnc = get_Proj_proj(node);
1877 if (ia32_cg_config.use_bt && (pnc == pn_Cmp_Lg || pnc == pn_Cmp_Eq)) {
1878 ir_node *l = get_Cmp_left(pred);
1879 ir_node *r = get_Cmp_right(pred);
1881 ir_node *la = get_And_left(l);
1882 ir_node *ra = get_And_right(l);
1884 ir_node *c = get_Shl_left(la);
1885 if (is_Const_1(c) && (is_Const_0(r) || r == la)) {
1886 /* (1 << n) & ra) */
1887 ir_node *n = get_Shl_right(la);
1888 flags = gen_bt(pred, ra, n);
1889 /* we must generate a Jc/Jnc jump */
1890 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1893 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1898 ir_node *c = get_Shl_left(ra);
1899 if (is_Const_1(c) && (is_Const_0(r) || r == ra)) {
1900 /* la & (1 << n)) */
1901 ir_node *n = get_Shl_right(ra);
1902 flags = gen_bt(pred, la, n);
1903 /* we must generate a Jc/Jnc jump */
1904 pnc = pnc == pn_Cmp_Lg ? pn_Cmp_Lt : pn_Cmp_Ge;
1907 *pnc_out = ia32_pn_Cmp_unsigned | pnc;
1913 flags = be_transform_node(pred);
1919 /* a mode_b value, we have to compare it against 0 */
1920 dbgi = get_irn_dbg_info(node);
1921 new_block = be_transform_node(get_nodes_block(node));
1922 new_op = be_transform_node(node);
1923 noreg = ia32_new_NoReg_gp(env_cg);
1924 nomem = new_NoMem();
1925 flags = new_bd_ia32_Test(dbgi, new_block, noreg, noreg, nomem, new_op,
1926 new_op, /*is_permuted=*/0, /*cmp_unsigned=*/0);
1927 *pnc_out = pn_Cmp_Lg;
1932 * Transforms a Load.
1934 * @return the created ia32 Load node
1936 static ir_node *gen_Load(ir_node *node)
1938 ir_node *old_block = get_nodes_block(node);
1939 ir_node *block = be_transform_node(old_block);
1940 ir_node *ptr = get_Load_ptr(node);
1941 ir_node *mem = get_Load_mem(node);
1942 ir_node *new_mem = be_transform_node(mem);
1945 dbg_info *dbgi = get_irn_dbg_info(node);
1946 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
1947 ir_mode *mode = get_Load_mode(node);
1950 ia32_address_t addr;
1952 /* construct load address */
1953 memset(&addr, 0, sizeof(addr));
1954 ia32_create_address_mode(&addr, ptr, 0);
1961 base = be_transform_node(base);
1964 if (index == NULL) {
1967 index = be_transform_node(index);
1970 if (mode_is_float(mode)) {
1971 if (ia32_cg_config.use_sse2) {
1972 new_node = new_bd_ia32_xLoad(dbgi, block, base, index, new_mem,
1974 res_mode = mode_xmm;
1976 new_node = new_bd_ia32_vfld(dbgi, block, base, index, new_mem,
1978 res_mode = mode_vfp;
1981 assert(mode != mode_b);
1983 /* create a conv node with address mode for smaller modes */
1984 if (get_mode_size_bits(mode) < 32) {
1985 new_node = new_bd_ia32_Conv_I2I(dbgi, block, base, index,
1986 new_mem, noreg, mode);
1988 new_node = new_bd_ia32_Load(dbgi, block, base, index, new_mem);
1993 set_irn_pinned(new_node, get_irn_pinned(node));
1994 set_ia32_op_type(new_node, ia32_AddrModeS);
1995 set_ia32_ls_mode(new_node, mode);
1996 set_address(new_node, &addr);
1998 if (get_irn_pinned(node) == op_pin_state_floats) {
1999 assert(pn_ia32_xLoad_res == pn_ia32_vfld_res
2000 && pn_ia32_vfld_res == pn_ia32_Load_res
2001 && pn_ia32_Load_res == pn_ia32_res);
2002 arch_irn_add_flags(new_node, arch_irn_flags_rematerializable);
2005 SET_IA32_ORIG_NODE(new_node, node);
2007 be_dep_on_frame(new_node);
2011 static int use_dest_am(ir_node *block, ir_node *node, ir_node *mem,
2012 ir_node *ptr, ir_node *other)
2019 /* we only use address mode if we're the only user of the load */
2020 if (get_irn_n_edges(node) > 1)
2023 load = get_Proj_pred(node);
2026 if (get_nodes_block(load) != block)
2029 /* store should have the same pointer as the load */
2030 if (get_Load_ptr(load) != ptr)
2033 /* don't do AM if other node inputs depend on the load (via mem-proj) */
2034 if (other != NULL &&
2035 get_nodes_block(other) == block &&
2036 heights_reachable_in_block(heights, other, load)) {
2040 if (prevents_AM(block, load, mem))
2042 /* Store should be attached to the load via mem */
2043 assert(heights_reachable_in_block(heights, mem, load));
2048 static ir_node *dest_am_binop(ir_node *node, ir_node *op1, ir_node *op2,
2049 ir_node *mem, ir_node *ptr, ir_mode *mode,
2050 construct_binop_dest_func *func,
2051 construct_binop_dest_func *func8bit,
2052 match_flags_t flags)
2054 ir_node *src_block = get_nodes_block(node);
2056 ir_node *noreg_gp = ia32_new_NoReg_gp(env_cg);
2063 ia32_address_mode_t am;
2064 ia32_address_t *addr = &am.addr;
2065 memset(&am, 0, sizeof(am));
2067 assert(flags & match_immediate); /* there is no destam node without... */
2068 commutative = (flags & match_commutative) != 0;
2070 if (use_dest_am(src_block, op1, mem, ptr, op2)) {
2071 build_address(&am, op1, ia32_create_am_double_use);
2072 new_op = create_immediate_or_transform(op2, 0);
2073 } else if (commutative && use_dest_am(src_block, op2, mem, ptr, op1)) {
2074 build_address(&am, op2, ia32_create_am_double_use);
2075 new_op = create_immediate_or_transform(op1, 0);
2080 if (addr->base == NULL)
2081 addr->base = noreg_gp;
2082 if (addr->index == NULL)
2083 addr->index = noreg_gp;
2084 if (addr->mem == NULL)
2085 addr->mem = new_NoMem();
2087 dbgi = get_irn_dbg_info(node);
2088 block = be_transform_node(src_block);
2089 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2091 if (get_mode_size_bits(mode) == 8) {
2092 new_node = func8bit(dbgi, block, addr->base, addr->index, new_mem, new_op);
2094 new_node = func(dbgi, block, addr->base, addr->index, new_mem, new_op);
2096 set_address(new_node, addr);
2097 set_ia32_op_type(new_node, ia32_AddrModeD);
2098 set_ia32_ls_mode(new_node, mode);
2099 SET_IA32_ORIG_NODE(new_node, node);
2101 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2102 mem_proj = be_transform_node(am.mem_proj);
2103 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2108 static ir_node *dest_am_unop(ir_node *node, ir_node *op, ir_node *mem,
2109 ir_node *ptr, ir_mode *mode,
2110 construct_unop_dest_func *func)
2112 ir_node *src_block = get_nodes_block(node);
2118 ia32_address_mode_t am;
2119 ia32_address_t *addr = &am.addr;
2121 if (!use_dest_am(src_block, op, mem, ptr, NULL))
2124 memset(&am, 0, sizeof(am));
2125 build_address(&am, op, ia32_create_am_double_use);
2127 dbgi = get_irn_dbg_info(node);
2128 block = be_transform_node(src_block);
2129 new_mem = transform_AM_mem(current_ir_graph, block, am.am_node, mem, addr->mem);
2130 new_node = func(dbgi, block, addr->base, addr->index, new_mem);
2131 set_address(new_node, addr);
2132 set_ia32_op_type(new_node, ia32_AddrModeD);
2133 set_ia32_ls_mode(new_node, mode);
2134 SET_IA32_ORIG_NODE(new_node, node);
2136 be_set_transformed_node(get_Proj_pred(am.mem_proj), new_node);
2137 mem_proj = be_transform_node(am.mem_proj);
2138 be_set_transformed_node(mem_proj ? mem_proj : am.mem_proj, new_node);
2143 static ir_node *try_create_SetMem(ir_node *node, ir_node *ptr, ir_node *mem)
2145 ir_mode *mode = get_irn_mode(node);
2146 ir_node *mux_true = get_Mux_true(node);
2147 ir_node *mux_false = get_Mux_false(node);
2157 ia32_address_t addr;
2159 if (get_mode_size_bits(mode) != 8)
2162 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
2164 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
2170 build_address_ptr(&addr, ptr, mem);
2172 dbgi = get_irn_dbg_info(node);
2173 block = get_nodes_block(node);
2174 new_block = be_transform_node(block);
2175 cond = get_Mux_sel(node);
2176 flags = get_flags_node(cond, &pnc);
2177 new_mem = be_transform_node(mem);
2178 new_node = new_bd_ia32_SetMem(dbgi, new_block, addr.base,
2179 addr.index, addr.mem, flags, pnc, negated);
2180 set_address(new_node, &addr);
2181 set_ia32_op_type(new_node, ia32_AddrModeD);
2182 set_ia32_ls_mode(new_node, mode);
2183 SET_IA32_ORIG_NODE(new_node, node);
2188 static ir_node *try_create_dest_am(ir_node *node)
2190 ir_node *val = get_Store_value(node);
2191 ir_node *mem = get_Store_mem(node);
2192 ir_node *ptr = get_Store_ptr(node);
2193 ir_mode *mode = get_irn_mode(val);
2194 unsigned bits = get_mode_size_bits(mode);
2199 /* handle only GP modes for now... */
2200 if (!ia32_mode_needs_gp_reg(mode))
2204 /* store must be the only user of the val node */
2205 if (get_irn_n_edges(val) > 1)
2207 /* skip pointless convs */
2209 ir_node *conv_op = get_Conv_op(val);
2210 ir_mode *pred_mode = get_irn_mode(conv_op);
2211 if (!ia32_mode_needs_gp_reg(pred_mode))
2213 if (pred_mode == mode_b || bits <= get_mode_size_bits(pred_mode)) {
2221 /* value must be in the same block */
2222 if (get_nodes_block(node) != get_nodes_block(val))
2225 switch (get_irn_opcode(val)) {
2227 op1 = get_Add_left(val);
2228 op2 = get_Add_right(val);
2229 if (ia32_cg_config.use_incdec) {
2230 if (is_Const_1(op2)) {
2231 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_IncMem);
2233 } else if (is_Const_Minus_1(op2)) {
2234 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_DecMem);
2238 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2239 new_bd_ia32_AddMem, new_bd_ia32_AddMem8Bit,
2240 match_commutative | match_immediate);
2243 op1 = get_Sub_left(val);
2244 op2 = get_Sub_right(val);
2245 if (is_Const(op2)) {
2246 ir_fprintf(stderr, "Optimisation warning: not-normalized sub ,C found\n");
2248 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2249 new_bd_ia32_SubMem, new_bd_ia32_SubMem8Bit,
2253 op1 = get_And_left(val);
2254 op2 = get_And_right(val);
2255 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2256 new_bd_ia32_AndMem, new_bd_ia32_AndMem8Bit,
2257 match_commutative | match_immediate);
2260 op1 = get_Or_left(val);
2261 op2 = get_Or_right(val);
2262 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2263 new_bd_ia32_OrMem, new_bd_ia32_OrMem8Bit,
2264 match_commutative | match_immediate);
2267 op1 = get_Eor_left(val);
2268 op2 = get_Eor_right(val);
2269 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2270 new_bd_ia32_XorMem, new_bd_ia32_XorMem8Bit,
2271 match_commutative | match_immediate);
2274 op1 = get_Shl_left(val);
2275 op2 = get_Shl_right(val);
2276 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2277 new_bd_ia32_ShlMem, new_bd_ia32_ShlMem,
2281 op1 = get_Shr_left(val);
2282 op2 = get_Shr_right(val);
2283 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2284 new_bd_ia32_ShrMem, new_bd_ia32_ShrMem,
2288 op1 = get_Shrs_left(val);
2289 op2 = get_Shrs_right(val);
2290 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2291 new_bd_ia32_SarMem, new_bd_ia32_SarMem,
2295 op1 = get_Rotl_left(val);
2296 op2 = get_Rotl_right(val);
2297 new_node = dest_am_binop(val, op1, op2, mem, ptr, mode,
2298 new_bd_ia32_RolMem, new_bd_ia32_RolMem,
2301 /* TODO: match ROR patterns... */
2303 new_node = try_create_SetMem(val, ptr, mem);
2306 op1 = get_Minus_op(val);
2307 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NegMem);
2310 /* should be lowered already */
2311 assert(mode != mode_b);
2312 op1 = get_Not_op(val);
2313 new_node = dest_am_unop(val, op1, mem, ptr, mode, new_bd_ia32_NotMem);
2319 if (new_node != NULL) {
2320 if (get_irn_pinned(new_node) != op_pin_state_pinned &&
2321 get_irn_pinned(node) == op_pin_state_pinned) {
2322 set_irn_pinned(new_node, op_pin_state_pinned);
2329 static bool possible_int_mode_for_fp(ir_mode *mode)
2333 if (!mode_is_signed(mode))
2335 size = get_mode_size_bits(mode);
2336 if (size != 16 && size != 32)
2341 static int is_float_to_int_conv(const ir_node *node)
2343 ir_mode *mode = get_irn_mode(node);
2347 if (!possible_int_mode_for_fp(mode))
2352 conv_op = get_Conv_op(node);
2353 conv_mode = get_irn_mode(conv_op);
2355 if (!mode_is_float(conv_mode))
2362 * Transform a Store(floatConst) into a sequence of
2365 * @return the created ia32 Store node
2367 static ir_node *gen_float_const_Store(ir_node *node, ir_node *cns)
2369 ir_mode *mode = get_irn_mode(cns);
2370 unsigned size = get_mode_size_bytes(mode);
2371 tarval *tv = get_Const_tarval(cns);
2372 ir_node *block = get_nodes_block(node);
2373 ir_node *new_block = be_transform_node(block);
2374 ir_node *ptr = get_Store_ptr(node);
2375 ir_node *mem = get_Store_mem(node);
2376 dbg_info *dbgi = get_irn_dbg_info(node);
2380 ia32_address_t addr;
2382 assert(size % 4 == 0);
2385 build_address_ptr(&addr, ptr, mem);
2389 get_tarval_sub_bits(tv, ofs) |
2390 (get_tarval_sub_bits(tv, ofs + 1) << 8) |
2391 (get_tarval_sub_bits(tv, ofs + 2) << 16) |
2392 (get_tarval_sub_bits(tv, ofs + 3) << 24);
2393 ir_node *imm = create_Immediate(NULL, 0, val);
2395 ir_node *new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2396 addr.index, addr.mem, imm);
2398 set_irn_pinned(new_node, get_irn_pinned(node));
2399 set_ia32_op_type(new_node, ia32_AddrModeD);
2400 set_ia32_ls_mode(new_node, mode_Iu);
2401 set_address(new_node, &addr);
2402 SET_IA32_ORIG_NODE(new_node, node);
2405 ins[i++] = new_node;
2410 } while (size != 0);
2413 return new_rd_Sync(dbgi, current_ir_graph, new_block, i, ins);
2420 * Generate a vfist or vfisttp instruction.
2422 static ir_node *gen_vfist(dbg_info *dbgi, ir_graph *irg, ir_node *block, ir_node *base, ir_node *index,
2423 ir_node *mem, ir_node *val, ir_node **fist)
2427 if (ia32_cg_config.use_fisttp) {
2428 /* Note: fisttp ALWAYS pop the tos. We have to ensure here that the value is copied
2429 if other users exists */
2430 const arch_register_class_t *reg_class = &ia32_reg_classes[CLASS_ia32_vfp];
2431 ir_node *vfisttp = new_bd_ia32_vfisttp(dbgi, block, base, index, mem, val);
2432 ir_node *value = new_r_Proj(irg, block, vfisttp, mode_E, pn_ia32_vfisttp_res);
2433 be_new_Keep(reg_class, irg, block, 1, &value);
2435 new_node = new_r_Proj(irg, block, vfisttp, mode_M, pn_ia32_vfisttp_M);
2438 ir_node *trunc_mode = ia32_new_Fpu_truncate(env_cg);
2441 new_node = new_bd_ia32_vfist(dbgi, block, base, index, mem, val, trunc_mode);
2447 * Transforms a general (no special case) Store.
2449 * @return the created ia32 Store node
2451 static ir_node *gen_general_Store(ir_node *node)
2453 ir_node *val = get_Store_value(node);
2454 ir_mode *mode = get_irn_mode(val);
2455 ir_node *block = get_nodes_block(node);
2456 ir_node *new_block = be_transform_node(block);
2457 ir_node *ptr = get_Store_ptr(node);
2458 ir_node *mem = get_Store_mem(node);
2459 dbg_info *dbgi = get_irn_dbg_info(node);
2460 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2461 ir_node *new_val, *new_node, *store;
2462 ia32_address_t addr;
2464 /* check for destination address mode */
2465 new_node = try_create_dest_am(node);
2466 if (new_node != NULL)
2469 /* construct store address */
2470 memset(&addr, 0, sizeof(addr));
2471 ia32_create_address_mode(&addr, ptr, 0);
2473 if (addr.base == NULL) {
2476 addr.base = be_transform_node(addr.base);
2479 if (addr.index == NULL) {
2482 addr.index = be_transform_node(addr.index);
2484 addr.mem = be_transform_node(mem);
2486 if (mode_is_float(mode)) {
2487 /* Convs (and strict-Convs) before stores are unnecessary if the mode
2489 while (is_Conv(val) && mode == get_irn_mode(val)) {
2490 ir_node *op = get_Conv_op(val);
2491 if (!mode_is_float(get_irn_mode(op)))
2495 new_val = be_transform_node(val);
2496 if (ia32_cg_config.use_sse2) {
2497 new_node = new_bd_ia32_xStore(dbgi, new_block, addr.base,
2498 addr.index, addr.mem, new_val);
2500 new_node = new_bd_ia32_vfst(dbgi, new_block, addr.base,
2501 addr.index, addr.mem, new_val, mode);
2504 } else if (!ia32_cg_config.use_sse2 && is_float_to_int_conv(val)) {
2505 val = get_Conv_op(val);
2507 /* TODO: is this optimisation still necessary at all (middleend)? */
2508 /* We can skip ALL float->float up-Convs (and strict-up-Convs) before stores. */
2509 while (is_Conv(val)) {
2510 ir_node *op = get_Conv_op(val);
2511 if (!mode_is_float(get_irn_mode(op)))
2513 if (get_mode_size_bits(get_irn_mode(op)) > get_mode_size_bits(get_irn_mode(val)))
2517 new_val = be_transform_node(val);
2518 new_node = gen_vfist(dbgi, current_ir_graph, new_block, addr.base, addr.index, addr.mem, new_val, &store);
2520 new_val = create_immediate_or_transform(val, 0);
2521 assert(mode != mode_b);
2523 if (get_mode_size_bits(mode) == 8) {
2524 new_node = new_bd_ia32_Store8Bit(dbgi, new_block, addr.base,
2525 addr.index, addr.mem, new_val);
2527 new_node = new_bd_ia32_Store(dbgi, new_block, addr.base,
2528 addr.index, addr.mem, new_val);
2533 set_irn_pinned(store, get_irn_pinned(node));
2534 set_ia32_op_type(store, ia32_AddrModeD);
2535 set_ia32_ls_mode(store, mode);
2537 set_address(store, &addr);
2538 SET_IA32_ORIG_NODE(store, node);
2544 * Transforms a Store.
2546 * @return the created ia32 Store node
2548 static ir_node *gen_Store(ir_node *node)
2550 ir_node *val = get_Store_value(node);
2551 ir_mode *mode = get_irn_mode(val);
2553 if (mode_is_float(mode) && is_Const(val)) {
2554 /* We can transform every floating const store
2555 into a sequence of integer stores.
2556 If the constant is already in a register,
2557 it would be better to use it, but we don't
2558 have this information here. */
2559 return gen_float_const_Store(node, val);
2561 return gen_general_Store(node);
2565 * Transforms a Switch.
2567 * @return the created ia32 SwitchJmp node
2569 static ir_node *create_Switch(ir_node *node)
2571 dbg_info *dbgi = get_irn_dbg_info(node);
2572 ir_node *block = be_transform_node(get_nodes_block(node));
2573 ir_node *sel = get_Cond_selector(node);
2574 ir_node *new_sel = be_transform_node(sel);
2575 long switch_min = LONG_MAX;
2576 long switch_max = LONG_MIN;
2577 long default_pn = get_Cond_defaultProj(node);
2579 const ir_edge_t *edge;
2581 assert(get_mode_size_bits(get_irn_mode(sel)) == 32);
2583 /* determine the smallest switch case value */
2584 foreach_out_edge(node, edge) {
2585 ir_node *proj = get_edge_src_irn(edge);
2586 long pn = get_Proj_proj(proj);
2587 if (pn == default_pn)
2590 if (pn < switch_min)
2592 if (pn > switch_max)
2596 if ((unsigned long) (switch_max - switch_min) > 256000) {
2597 panic("Size of switch %+F bigger than 256000", node);
2600 if (switch_min != 0) {
2601 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2603 /* if smallest switch case is not 0 we need an additional sub */
2604 new_sel = new_bd_ia32_Lea(dbgi, block, new_sel, noreg);
2605 add_ia32_am_offs_int(new_sel, -switch_min);
2606 set_ia32_op_type(new_sel, ia32_AddrModeS);
2608 SET_IA32_ORIG_NODE(new_sel, node);
2611 new_node = new_bd_ia32_SwitchJmp(dbgi, block, new_sel, default_pn);
2612 SET_IA32_ORIG_NODE(new_node, node);
2618 * Transform a Cond node.
2620 static ir_node *gen_Cond(ir_node *node)
2622 ir_node *block = get_nodes_block(node);
2623 ir_node *new_block = be_transform_node(block);
2624 dbg_info *dbgi = get_irn_dbg_info(node);
2625 ir_node *sel = get_Cond_selector(node);
2626 ir_mode *sel_mode = get_irn_mode(sel);
2627 ir_node *flags = NULL;
2631 if (sel_mode != mode_b) {
2632 return create_Switch(node);
2635 /* we get flags from a Cmp */
2636 flags = get_flags_node(sel, &pnc);
2638 new_node = new_bd_ia32_Jcc(dbgi, new_block, flags, pnc);
2639 SET_IA32_ORIG_NODE(new_node, node);
2644 static ir_node *gen_be_Copy(ir_node *node)
2646 ir_node *new_node = be_duplicate_node(node);
2647 ir_mode *mode = get_irn_mode(new_node);
2649 if (ia32_mode_needs_gp_reg(mode)) {
2650 set_irn_mode(new_node, mode_Iu);
2656 static ir_node *create_Fucom(ir_node *node)
2658 dbg_info *dbgi = get_irn_dbg_info(node);
2659 ir_node *block = get_nodes_block(node);
2660 ir_node *new_block = be_transform_node(block);
2661 ir_node *left = get_Cmp_left(node);
2662 ir_node *new_left = be_transform_node(left);
2663 ir_node *right = get_Cmp_right(node);
2667 if (ia32_cg_config.use_fucomi) {
2668 new_right = be_transform_node(right);
2669 new_node = new_bd_ia32_vFucomi(dbgi, new_block, new_left,
2671 set_ia32_commutative(new_node);
2672 SET_IA32_ORIG_NODE(new_node, node);
2674 if (ia32_cg_config.use_ftst && is_Const_0(right)) {
2675 new_node = new_bd_ia32_vFtstFnstsw(dbgi, new_block, new_left, 0);
2677 new_right = be_transform_node(right);
2678 new_node = new_bd_ia32_vFucomFnstsw(dbgi, new_block, new_left, new_right, 0);
2681 set_ia32_commutative(new_node);
2683 SET_IA32_ORIG_NODE(new_node, node);
2685 new_node = new_bd_ia32_Sahf(dbgi, new_block, new_node);
2686 SET_IA32_ORIG_NODE(new_node, node);
2692 static ir_node *create_Ucomi(ir_node *node)
2694 dbg_info *dbgi = get_irn_dbg_info(node);
2695 ir_node *src_block = get_nodes_block(node);
2696 ir_node *new_block = be_transform_node(src_block);
2697 ir_node *left = get_Cmp_left(node);
2698 ir_node *right = get_Cmp_right(node);
2700 ia32_address_mode_t am;
2701 ia32_address_t *addr = &am.addr;
2703 match_arguments(&am, src_block, left, right, NULL,
2704 match_commutative | match_am);
2706 new_node = new_bd_ia32_Ucomi(dbgi, new_block, addr->base, addr->index,
2707 addr->mem, am.new_op1, am.new_op2,
2709 set_am_attributes(new_node, &am);
2711 SET_IA32_ORIG_NODE(new_node, node);
2713 new_node = fix_mem_proj(new_node, &am);
2719 * helper function: checks whether all Cmp projs are Lg or Eq which is needed
2720 * to fold an and into a test node
2722 static bool can_fold_test_and(ir_node *node)
2724 const ir_edge_t *edge;
2726 /** we can only have eq and lg projs */
2727 foreach_out_edge(node, edge) {
2728 ir_node *proj = get_edge_src_irn(edge);
2729 pn_Cmp pnc = get_Proj_proj(proj);
2730 if (pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg)
2738 * returns true if it is assured, that the upper bits of a node are "clean"
2739 * which means for a 16 or 8 bit value, that the upper bits in the register
2740 * are 0 for unsigned and a copy of the last significant bit for signed
2743 static bool upper_bits_clean(ir_node *transformed_node, ir_mode *mode)
2745 assert(ia32_mode_needs_gp_reg(mode));
2746 if (get_mode_size_bits(mode) >= 32)
2749 if (is_Proj(transformed_node))
2750 return upper_bits_clean(get_Proj_pred(transformed_node), mode);
2752 switch (get_ia32_irn_opcode(transformed_node)) {
2753 case iro_ia32_Conv_I2I:
2754 case iro_ia32_Conv_I2I8Bit: {
2755 ir_mode *smaller_mode = get_ia32_ls_mode(transformed_node);
2756 if (mode_is_signed(smaller_mode) != mode_is_signed(mode))
2758 if (get_mode_size_bits(smaller_mode) > get_mode_size_bits(mode))
2765 if (mode_is_signed(mode)) {
2766 return false; /* TODO handle signed modes */
2768 ir_node *right = get_irn_n(transformed_node, n_ia32_Shr_count);
2769 if (is_ia32_Immediate(right) || is_ia32_Const(right)) {
2770 const ia32_immediate_attr_t *attr
2771 = get_ia32_immediate_attr_const(right);
2772 if (attr->symconst == 0 &&
2773 (unsigned)attr->offset >= 32 - get_mode_size_bits(mode)) {
2777 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Shr_val), mode);
2781 /* TODO too conservative if shift amount is constant */
2782 return upper_bits_clean(get_irn_n(transformed_node, n_ia32_Sar_val), mode);
2785 if (!mode_is_signed(mode)) {
2787 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_right), mode) ||
2788 upper_bits_clean(get_irn_n(transformed_node, n_ia32_And_left), mode);
2790 /* TODO if one is known to be zero extended, then || is sufficient */
2795 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_right), mode) &&
2796 upper_bits_clean(get_irn_n(transformed_node, n_ia32_binary_left), mode);
2798 case iro_ia32_Const:
2799 case iro_ia32_Immediate: {
2800 const ia32_immediate_attr_t *attr =
2801 get_ia32_immediate_attr_const(transformed_node);
2802 if (mode_is_signed(mode)) {
2803 long shifted = attr->offset >> (get_mode_size_bits(mode) - 1);
2804 return shifted == 0 || shifted == -1;
2806 unsigned long shifted = (unsigned long)attr->offset;
2807 shifted >>= get_mode_size_bits(mode);
2808 return shifted == 0;
2818 * Generate code for a Cmp.
2820 static ir_node *gen_Cmp(ir_node *node)
2822 dbg_info *dbgi = get_irn_dbg_info(node);
2823 ir_node *block = get_nodes_block(node);
2824 ir_node *new_block = be_transform_node(block);
2825 ir_node *left = get_Cmp_left(node);
2826 ir_node *right = get_Cmp_right(node);
2827 ir_mode *cmp_mode = get_irn_mode(left);
2829 ia32_address_mode_t am;
2830 ia32_address_t *addr = &am.addr;
2833 if (mode_is_float(cmp_mode)) {
2834 if (ia32_cg_config.use_sse2) {
2835 return create_Ucomi(node);
2837 return create_Fucom(node);
2841 assert(ia32_mode_needs_gp_reg(cmp_mode));
2843 /* Prefer the Test instruction, when encountering (x & y) ==/!= 0 */
2844 cmp_unsigned = !mode_is_signed(cmp_mode);
2845 if (is_Const_0(right) &&
2847 get_irn_n_edges(left) == 1 &&
2848 can_fold_test_and(node)) {
2849 /* Test(and_left, and_right) */
2850 ir_node *and_left = get_And_left(left);
2851 ir_node *and_right = get_And_right(left);
2853 /* matze: code here used mode instead of cmd_mode, I think it is always
2854 * the same as cmp_mode, but I leave this here to see if this is really
2857 assert(get_irn_mode(and_left) == cmp_mode);
2859 match_arguments(&am, block, and_left, and_right, NULL,
2861 match_am | match_8bit_am | match_16bit_am |
2862 match_am_and_immediates | match_immediate);
2864 /* use 32bit compare mode if possible since the opcode is smaller */
2865 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2866 upper_bits_clean(am.new_op2, cmp_mode)) {
2867 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2870 if (get_mode_size_bits(cmp_mode) == 8) {
2871 new_node = new_bd_ia32_Test8Bit(dbgi, new_block, addr->base,
2872 addr->index, addr->mem, am.new_op1, am.new_op2, am.ins_permuted,
2875 new_node = new_bd_ia32_Test(dbgi, new_block, addr->base, addr->index,
2876 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2879 /* Cmp(left, right) */
2880 match_arguments(&am, block, left, right, NULL,
2881 match_commutative | match_am | match_8bit_am |
2882 match_16bit_am | match_am_and_immediates |
2884 /* use 32bit compare mode if possible since the opcode is smaller */
2885 if (upper_bits_clean(am.new_op1, cmp_mode) &&
2886 upper_bits_clean(am.new_op2, cmp_mode)) {
2887 cmp_mode = mode_is_signed(cmp_mode) ? mode_Is : mode_Iu;
2890 if (get_mode_size_bits(cmp_mode) == 8) {
2891 new_node = new_bd_ia32_Cmp8Bit(dbgi, new_block, addr->base,
2892 addr->index, addr->mem, am.new_op1,
2893 am.new_op2, am.ins_permuted,
2896 new_node = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index,
2897 addr->mem, am.new_op1, am.new_op2, am.ins_permuted, cmp_unsigned);
2900 set_am_attributes(new_node, &am);
2901 set_ia32_ls_mode(new_node, cmp_mode);
2903 SET_IA32_ORIG_NODE(new_node, node);
2905 new_node = fix_mem_proj(new_node, &am);
2910 static ir_node *create_CMov(ir_node *node, ir_node *flags, ir_node *new_flags,
2913 dbg_info *dbgi = get_irn_dbg_info(node);
2914 ir_node *block = get_nodes_block(node);
2915 ir_node *new_block = be_transform_node(block);
2916 ir_node *val_true = get_Mux_true(node);
2917 ir_node *val_false = get_Mux_false(node);
2919 ia32_address_mode_t am;
2920 ia32_address_t *addr;
2922 assert(ia32_cg_config.use_cmov);
2923 assert(ia32_mode_needs_gp_reg(get_irn_mode(val_true)));
2927 match_arguments(&am, block, val_false, val_true, flags,
2928 match_commutative | match_am | match_16bit_am | match_mode_neutral);
2930 new_node = new_bd_ia32_CMov(dbgi, new_block, addr->base, addr->index,
2931 addr->mem, am.new_op1, am.new_op2, new_flags,
2932 am.ins_permuted, pnc);
2933 set_am_attributes(new_node, &am);
2935 SET_IA32_ORIG_NODE(new_node, node);
2937 new_node = fix_mem_proj(new_node, &am);
2943 * Creates a ia32 Setcc instruction.
2945 static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block,
2946 ir_node *flags, pn_Cmp pnc, ir_node *orig_node,
2949 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
2950 ir_node *nomem = new_NoMem();
2951 ir_mode *mode = get_irn_mode(orig_node);
2954 new_node = new_bd_ia32_Set(dbgi, new_block, flags, pnc, ins_permuted);
2955 SET_IA32_ORIG_NODE(new_node, orig_node);
2957 /* we might need to conv the result up */
2958 if (get_mode_size_bits(mode) > 8) {
2959 new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg, noreg,
2960 nomem, new_node, mode_Bu);
2961 SET_IA32_ORIG_NODE(new_node, orig_node);
2968 * Create instruction for an unsigned Difference or Zero.
2970 static ir_node *create_Doz(ir_node *psi, ir_node *a, ir_node *b)
2972 ir_graph *irg = current_ir_graph;
2973 ir_mode *mode = get_irn_mode(psi);
2974 ir_node *nomem = new_NoMem();
2975 ir_node *new_node, *sub, *sbb, *eflags, *block, *noreg;
2979 new_node = gen_binop(psi, a, b, new_bd_ia32_Sub,
2980 match_mode_neutral | match_am | match_immediate | match_two_users);
2982 block = get_nodes_block(new_node);
2984 if (is_Proj(new_node)) {
2985 sub = get_Proj_pred(new_node);
2986 assert(is_ia32_Sub(sub));
2989 set_irn_mode(sub, mode_T);
2990 new_node = new_rd_Proj(NULL, irg, block, sub, mode, pn_ia32_res);
2992 eflags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
2994 dbgi = get_irn_dbg_info(psi);
2995 sbb = new_bd_ia32_Sbb0(dbgi, block, eflags);
2997 noreg = ia32_new_NoReg_gp(env_cg);
2998 new_node = new_bd_ia32_And(dbgi, block, noreg, noreg, nomem, new_node, sbb);
2999 set_ia32_commutative(new_node);
3004 * Create an const array of two float consts.
3006 * @param c0 the first constant
3007 * @param c1 the second constant
3008 * @param new_mode IN/OUT for the mode of the constants, if NULL
3009 * smallest possible mode will be used
3011 static ir_entity *ia32_create_const_array(ir_node *c0, ir_node *c1, ir_mode **new_mode) {
3013 ir_mode *mode = *new_mode;
3015 ir_initializer_t *initializer;
3016 tarval *tv0 = get_Const_tarval(c0);
3017 tarval *tv1 = get_Const_tarval(c1);
3020 /* detect the best mode for the constants */
3021 mode = get_tarval_mode(tv0);
3023 if (mode != mode_F) {
3024 if (tarval_ieee754_can_conv_lossless(tv0, mode_F) &&
3025 tarval_ieee754_can_conv_lossless(tv1, mode_F)) {
3027 tv0 = tarval_convert_to(tv0, mode);
3028 tv1 = tarval_convert_to(tv1, mode);
3029 } else if (mode != mode_D) {
3030 if (tarval_ieee754_can_conv_lossless(tv0, mode_D) &&
3031 tarval_ieee754_can_conv_lossless(tv1, mode_D)) {
3033 tv0 = tarval_convert_to(tv0, mode);
3034 tv1 = tarval_convert_to(tv1, mode);
3041 tp = ia32_create_float_type(mode, 4);
3042 tp = ia32_create_float_array(tp);
3044 ent = new_entity(get_glob_type(), ia32_unique_id(".LC%u"), tp);
3046 set_entity_ld_ident(ent, get_entity_ident(ent));
3047 set_entity_visibility(ent, visibility_local);
3048 set_entity_variability(ent, variability_constant);
3049 set_entity_allocation(ent, allocation_static);
3051 initializer = create_initializer_compound(2);
3053 set_initializer_compound_value(initializer, 0, create_initializer_tarval(tv0));
3054 set_initializer_compound_value(initializer, 1, create_initializer_tarval(tv1));
3056 set_entity_initializer(ent, initializer);
3063 * Transforms a Mux node into CMov.
3065 * @return The transformed node.
3067 static ir_node *gen_Mux(ir_node *node)
3069 dbg_info *dbgi = get_irn_dbg_info(node);
3070 ir_node *block = get_nodes_block(node);
3071 ir_node *new_block = be_transform_node(block);
3072 ir_node *mux_true = get_Mux_true(node);
3073 ir_node *mux_false = get_Mux_false(node);
3074 ir_node *cond = get_Mux_sel(node);
3075 ir_mode *mode = get_irn_mode(node);
3080 assert(get_irn_mode(cond) == mode_b);
3082 /* Note: a Mux node uses a Load two times IFF it's used in the compare AND in the result */
3083 if (mode_is_float(mode)) {
3084 ir_node *cmp = get_Proj_pred(cond);
3085 ir_node *cmp_left = get_Cmp_left(cmp);
3086 ir_node *cmp_right = get_Cmp_right(cmp);
3087 pn_Cmp pnc = get_Proj_proj(cond);
3089 if (ia32_cg_config.use_sse2) {
3090 if (pnc == pn_Cmp_Lt || pnc == pn_Cmp_Le) {
3091 if (cmp_left == mux_true && cmp_right == mux_false) {
3092 /* Mux(a <= b, a, b) => MIN */
3093 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3094 match_commutative | match_am | match_two_users);
3095 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3096 /* Mux(a <= b, b, a) => MAX */
3097 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3098 match_commutative | match_am | match_two_users);
3100 } else if (pnc == pn_Cmp_Gt || pnc == pn_Cmp_Ge) {
3101 if (cmp_left == mux_true && cmp_right == mux_false) {
3102 /* Mux(a >= b, a, b) => MAX */
3103 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMax,
3104 match_commutative | match_am | match_two_users);
3105 } else if (cmp_left == mux_false && cmp_right == mux_true) {
3106 /* Mux(a >= b, b, a) => MIN */
3107 return gen_binop(node, cmp_left, cmp_right, new_bd_ia32_xMin,
3108 match_commutative | match_am | match_two_users);
3112 if (is_Const(mux_true) && is_Const(mux_false)) {
3113 ia32_address_mode_t am;
3114 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3115 ir_node *nomem = new_NoMem();
3120 flags = get_flags_node(cond, &pnc);
3121 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3123 if (ia32_cg_config.use_sse2) {
3124 /* cannot load from different mode on SSE */
3127 /* x87 can load any mode */
3131 am.addr.symconst_ent = ia32_create_const_array(mux_false, mux_true, &new_mode);
3133 switch (get_mode_size_bytes(new_mode)) {
3143 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3144 set_ia32_am_scale(new_node, 2);
3149 new_node = new_bd_ia32_Lea(dbgi, new_block, new_node, new_node);
3150 set_ia32_am_scale(new_node, 1);
3153 /* arg, shift 16 NOT supported */
3155 new_node = new_bd_ia32_Add(dbgi, new_block, noreg, noreg, nomem, new_node, new_node);
3158 panic("Unsupported constant size");
3161 am.ls_mode = new_mode;
3162 am.addr.base = noreg;
3163 am.addr.index = new_node;
3164 am.addr.mem = nomem;
3166 am.addr.scale = scale;
3167 am.addr.use_frame = 0;
3168 am.addr.frame_entity = NULL;
3169 am.addr.symconst_sign = 0;
3170 am.mem_proj = am.addr.mem;
3171 am.op_type = ia32_AddrModeS;
3174 am.pinned = op_pin_state_floats;
3176 am.ins_permuted = 0;
3178 if (ia32_cg_config.use_sse2)
3179 load = new_bd_ia32_xLoad(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3181 load = new_bd_ia32_vfld(dbgi, block, am.addr.base, am.addr.index, am.addr.mem, new_mode);
3182 set_am_attributes(load, &am);
3184 return new_rd_Proj(NULL, current_ir_graph, block, load, mode_vfp, pn_ia32_res);
3186 panic("cannot transform floating point Mux");
3189 assert(ia32_mode_needs_gp_reg(mode));
3191 if (is_Proj(cond)) {
3192 ir_node *cmp = get_Proj_pred(cond);
3194 ir_node *cmp_left = get_Cmp_left(cmp);
3195 ir_node *cmp_right = get_Cmp_right(cmp);
3196 pn_Cmp pnc = get_Proj_proj(cond);
3198 /* check for unsigned Doz first */
3199 if ((pnc & pn_Cmp_Gt) && !mode_is_signed(mode) &&
3200 is_Const_0(mux_false) && is_Sub(mux_true) &&
3201 get_Sub_left(mux_true) == cmp_left && get_Sub_right(mux_true) == cmp_right) {
3202 /* Mux(a >=u b, a - b, 0) unsigned Doz */
3203 return create_Doz(node, cmp_left, cmp_right);
3204 } else if ((pnc & pn_Cmp_Lt) && !mode_is_signed(mode) &&
3205 is_Const_0(mux_true) && is_Sub(mux_false) &&
3206 get_Sub_left(mux_false) == cmp_left && get_Sub_right(mux_false) == cmp_right) {
3207 /* Mux(a <=u b, 0, a - b) unsigned Doz */
3208 return create_Doz(node, cmp_left, cmp_right);
3213 flags = get_flags_node(cond, &pnc);
3215 if (is_Const(mux_true) && is_Const(mux_false)) {
3216 /* both are const, good */
3217 if (is_Const_1(mux_true) && is_Const_0(mux_false)) {
3218 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/0);
3219 } else if (is_Const_0(mux_true) && is_Const_1(mux_false)) {
3220 new_node = create_set_32bit(dbgi, new_block, flags, pnc, node, /*is_premuted=*/1);
3222 /* Not that simple. */
3227 new_node = create_CMov(node, cond, flags, pnc);
3235 * Create a conversion from x87 state register to general purpose.
3237 static ir_node *gen_x87_fp_to_gp(ir_node *node)
3239 ir_node *block = be_transform_node(get_nodes_block(node));
3240 ir_node *op = get_Conv_op(node);
3241 ir_node *new_op = be_transform_node(op);
3242 ia32_code_gen_t *cg = env_cg;
3243 ir_graph *irg = current_ir_graph;
3244 dbg_info *dbgi = get_irn_dbg_info(node);
3245 ir_node *noreg = ia32_new_NoReg_gp(cg);
3246 ir_mode *mode = get_irn_mode(node);
3247 ir_node *fist, *load, *mem;
3249 mem = gen_vfist(dbgi, irg, block, get_irg_frame(irg), noreg, new_NoMem(), new_op, &fist);
3250 set_irn_pinned(fist, op_pin_state_floats);
3251 set_ia32_use_frame(fist);
3252 set_ia32_op_type(fist, ia32_AddrModeD);
3254 assert(get_mode_size_bits(mode) <= 32);
3255 /* exception we can only store signed 32 bit integers, so for unsigned
3256 we store a 64bit (signed) integer and load the lower bits */
3257 if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
3258 set_ia32_ls_mode(fist, mode_Ls);
3260 set_ia32_ls_mode(fist, mode_Is);
3262 SET_IA32_ORIG_NODE(fist, node);
3265 load = new_bd_ia32_Load(dbgi, block, get_irg_frame(irg), noreg, mem);
3267 set_irn_pinned(load, op_pin_state_floats);
3268 set_ia32_use_frame(load);
3269 set_ia32_op_type(load, ia32_AddrModeS);
3270 set_ia32_ls_mode(load, mode_Is);
3271 if (get_ia32_ls_mode(fist) == mode_Ls) {
3272 ia32_attr_t *attr = get_ia32_attr(load);
3273 attr->data.need_64bit_stackent = 1;
3275 ia32_attr_t *attr = get_ia32_attr(load);
3276 attr->data.need_32bit_stackent = 1;
3278 SET_IA32_ORIG_NODE(load, node);
3280 return new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
3284 * Creates a x87 strict Conv by placing a Store and a Load
3286 static ir_node *gen_x87_strict_conv(ir_mode *tgt_mode, ir_node *node)
3288 ir_node *block = get_nodes_block(node);
3289 ir_graph *irg = current_ir_graph;
3290 dbg_info *dbgi = get_irn_dbg_info(node);
3291 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3292 ir_node *nomem = new_NoMem();
3293 ir_node *frame = get_irg_frame(irg);
3294 ir_node *store, *load;
3297 store = new_bd_ia32_vfst(dbgi, block, frame, noreg, nomem, node, tgt_mode);
3298 set_ia32_use_frame(store);
3299 set_ia32_op_type(store, ia32_AddrModeD);
3300 SET_IA32_ORIG_NODE(store, node);
3302 load = new_bd_ia32_vfld(dbgi, block, frame, noreg, store, tgt_mode);
3303 set_ia32_use_frame(load);
3304 set_ia32_op_type(load, ia32_AddrModeS);
3305 SET_IA32_ORIG_NODE(load, node);
3307 new_node = new_r_Proj(irg, block, load, mode_E, pn_ia32_vfld_res);
3311 static ir_node *create_Conv_I2I(dbg_info *dbgi, ir_node *block, ir_node *base,
3312 ir_node *index, ir_node *mem, ir_node *val, ir_mode *mode)
3314 ir_node *(*func)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_mode*);
3316 func = get_mode_size_bits(mode) == 8 ?
3317 new_bd_ia32_Conv_I2I8Bit : new_bd_ia32_Conv_I2I;
3318 return func(dbgi, block, base, index, mem, val, mode);
3322 * Create a conversion from general purpose to x87 register
3324 static ir_node *gen_x87_gp_to_fp(ir_node *node, ir_mode *src_mode)
3326 ir_node *src_block = get_nodes_block(node);
3327 ir_node *block = be_transform_node(src_block);
3328 ir_graph *irg = current_ir_graph;
3329 dbg_info *dbgi = get_irn_dbg_info(node);
3330 ir_node *op = get_Conv_op(node);
3331 ir_node *new_op = NULL;
3335 ir_mode *store_mode;
3340 /* fild can use source AM if the operand is a signed 16bit or 32bit integer */
3341 if (possible_int_mode_for_fp(src_mode)) {
3342 ia32_address_mode_t am;
3344 match_arguments(&am, src_block, NULL, op, NULL, match_am | match_try_am | match_16bit_am);
3345 if (am.op_type == ia32_AddrModeS) {
3346 ia32_address_t *addr = &am.addr;
3348 fild = new_bd_ia32_vfild(dbgi, block, addr->base, addr->index,
3350 new_node = new_r_Proj(irg, block, fild, mode_vfp,
3353 set_am_attributes(fild, &am);
3354 SET_IA32_ORIG_NODE(fild, node);
3356 fix_mem_proj(fild, &am);
3361 if (new_op == NULL) {
3362 new_op = be_transform_node(op);
3365 noreg = ia32_new_NoReg_gp(env_cg);
3366 nomem = new_NoMem();
3367 mode = get_irn_mode(op);
3369 /* first convert to 32 bit signed if necessary */
3370 if (get_mode_size_bits(src_mode) < 32) {
3371 if (!upper_bits_clean(new_op, src_mode)) {
3372 new_op = create_Conv_I2I(dbgi, block, noreg, noreg, nomem, new_op, src_mode);
3373 SET_IA32_ORIG_NODE(new_op, node);
3378 assert(get_mode_size_bits(mode) == 32);
3381 store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg), noreg, nomem,
3384 set_ia32_use_frame(store);
3385 set_ia32_op_type(store, ia32_AddrModeD);
3386 set_ia32_ls_mode(store, mode_Iu);
3388 /* exception for 32bit unsigned, do a 64bit spill+load */
3389 if (!mode_is_signed(mode)) {
3392 ir_node *zero_const = create_Immediate(NULL, 0, 0);
3394 ir_node *zero_store = new_bd_ia32_Store(dbgi, block, get_irg_frame(irg),
3395 noreg, nomem, zero_const);
3397 set_ia32_use_frame(zero_store);
3398 set_ia32_op_type(zero_store, ia32_AddrModeD);
3399 add_ia32_am_offs_int(zero_store, 4);
3400 set_ia32_ls_mode(zero_store, mode_Iu);
3405 store = new_rd_Sync(dbgi, irg, block, 2, in);
3406 store_mode = mode_Ls;
3408 store_mode = mode_Is;
3412 fild = new_bd_ia32_vfild(dbgi, block, get_irg_frame(irg), noreg, store);
3414 set_ia32_use_frame(fild);
3415 set_ia32_op_type(fild, ia32_AddrModeS);
3416 set_ia32_ls_mode(fild, store_mode);
3418 new_node = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
3424 * Create a conversion from one integer mode into another one
3426 static ir_node *create_I2I_Conv(ir_mode *src_mode, ir_mode *tgt_mode,
3427 dbg_info *dbgi, ir_node *block, ir_node *op,
3430 ir_node *new_block = be_transform_node(block);
3432 ir_mode *smaller_mode;
3433 ia32_address_mode_t am;
3434 ia32_address_t *addr = &am.addr;
3437 if (get_mode_size_bits(src_mode) < get_mode_size_bits(tgt_mode)) {
3438 smaller_mode = src_mode;
3440 smaller_mode = tgt_mode;
3443 #ifdef DEBUG_libfirm
3445 ir_fprintf(stderr, "Optimisation warning: conv after constant %+F\n",
3450 match_arguments(&am, block, NULL, op, NULL,
3451 match_am | match_8bit_am | match_16bit_am);
3453 if (upper_bits_clean(am.new_op2, smaller_mode)) {
3454 /* unnecessary conv. in theory it shouldn't have been AM */
3455 assert(is_ia32_NoReg_GP(addr->base));
3456 assert(is_ia32_NoReg_GP(addr->index));
3457 assert(is_NoMem(addr->mem));
3458 assert(am.addr.offset == 0);
3459 assert(am.addr.symconst_ent == NULL);
3463 new_node = create_Conv_I2I(dbgi, new_block, addr->base, addr->index,
3464 addr->mem, am.new_op2, smaller_mode);
3465 set_am_attributes(new_node, &am);
3466 /* match_arguments assume that out-mode = in-mode, this isn't true here
3468 set_ia32_ls_mode(new_node, smaller_mode);
3469 SET_IA32_ORIG_NODE(new_node, node);
3470 new_node = fix_mem_proj(new_node, &am);
3475 * Transforms a Conv node.
3477 * @return The created ia32 Conv node
3479 static ir_node *gen_Conv(ir_node *node)
3481 ir_node *block = get_nodes_block(node);
3482 ir_node *new_block = be_transform_node(block);
3483 ir_node *op = get_Conv_op(node);
3484 ir_node *new_op = NULL;
3485 dbg_info *dbgi = get_irn_dbg_info(node);
3486 ir_mode *src_mode = get_irn_mode(op);
3487 ir_mode *tgt_mode = get_irn_mode(node);
3488 int src_bits = get_mode_size_bits(src_mode);
3489 int tgt_bits = get_mode_size_bits(tgt_mode);
3490 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3491 ir_node *nomem = new_NoMem();
3492 ir_node *res = NULL;
3494 assert(!mode_is_int(src_mode) || src_bits <= 32);
3495 assert(!mode_is_int(tgt_mode) || tgt_bits <= 32);
3497 if (src_mode == mode_b) {
3498 assert(mode_is_int(tgt_mode) || mode_is_reference(tgt_mode));
3499 /* nothing to do, we already model bools as 0/1 ints */
3500 return be_transform_node(op);
3503 if (src_mode == tgt_mode) {
3504 if (get_Conv_strict(node)) {
3505 if (ia32_cg_config.use_sse2) {
3506 /* when we are in SSE mode, we can kill all strict no-op conversion */
3507 return be_transform_node(op);
3510 /* this should be optimized already, but who knows... */
3511 DEBUG_ONLY(ir_fprintf(stderr, "Debug warning: conv %+F is pointless\n", node));
3512 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3513 return be_transform_node(op);
3517 if (mode_is_float(src_mode)) {
3518 new_op = be_transform_node(op);
3519 /* we convert from float ... */
3520 if (mode_is_float(tgt_mode)) {
3522 /* Matze: I'm a bit unsure what the following is for? seems wrong
3524 if (src_mode == mode_E && tgt_mode == mode_D
3525 && !get_Conv_strict(node)) {
3526 DB((dbg, LEVEL_1, "killed Conv(mode, mode) ..."));
3532 if (ia32_cg_config.use_sse2) {
3533 DB((dbg, LEVEL_1, "create Conv(float, float) ..."));
3534 res = new_bd_ia32_Conv_FP2FP(dbgi, new_block, noreg, noreg,
3536 set_ia32_ls_mode(res, tgt_mode);
3538 if (get_Conv_strict(node)) {
3539 /* if fp_no_float_fold is not set then we assume that we
3540 * don't have any float operations in a non
3541 * mode_float_arithmetic mode and can skip strict upconvs */
3542 if (src_bits < tgt_bits
3543 && !(get_irg_fp_model(current_ir_graph) & fp_no_float_fold)) {
3544 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3547 res = gen_x87_strict_conv(tgt_mode, new_op);
3548 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3552 DB((dbg, LEVEL_1, "killed Conv(float, float) ..."));
3557 DB((dbg, LEVEL_1, "create Conv(float, int) ..."));
3558 if (ia32_cg_config.use_sse2) {
3559 res = new_bd_ia32_Conv_FP2I(dbgi, new_block, noreg, noreg,
3561 set_ia32_ls_mode(res, src_mode);
3563 return gen_x87_fp_to_gp(node);
3567 /* we convert from int ... */
3568 if (mode_is_float(tgt_mode)) {
3570 DB((dbg, LEVEL_1, "create Conv(int, float) ..."));
3571 if (ia32_cg_config.use_sse2) {
3572 new_op = be_transform_node(op);
3573 res = new_bd_ia32_Conv_I2FP(dbgi, new_block, noreg, noreg,
3575 set_ia32_ls_mode(res, tgt_mode);
3577 res = gen_x87_gp_to_fp(node, src_mode);
3579 /* we need a strict-Conv, if the int mode has more bits than the
3581 size_t int_mantissa = get_mode_size_bits(src_mode) - (mode_is_signed(src_mode) ? 1 : 0);
3582 size_t float_mantissa;
3583 /* FIXME There is no way to get the mantissa size of a mode */
3584 switch (get_mode_size_bits(tgt_mode)) {
3585 case 32: float_mantissa = 23 + 1; break; // + 1 for implicit 1
3586 case 64: float_mantissa = 52 + 1; break;
3588 case 96: float_mantissa = 64; break;
3589 default: float_mantissa = 0; break;
3591 if (float_mantissa < int_mantissa) {
3592 res = gen_x87_strict_conv(tgt_mode, res);
3593 SET_IA32_ORIG_NODE(get_Proj_pred(res), node);
3597 } else if (tgt_mode == mode_b) {
3598 /* mode_b lowering already took care that we only have 0/1 values */
3599 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3600 src_mode, tgt_mode));
3601 return be_transform_node(op);
3604 if (src_bits == tgt_bits) {
3605 DB((dbg, LEVEL_1, "omitting unnecessary Conv(%+F, %+F) ...",
3606 src_mode, tgt_mode));
3607 return be_transform_node(op);
3610 res = create_I2I_Conv(src_mode, tgt_mode, dbgi, block, op, node);
3618 static ir_node *create_immediate_or_transform(ir_node *node,
3619 char immediate_constraint_type)
3621 ir_node *new_node = try_create_Immediate(node, immediate_constraint_type);
3622 if (new_node == NULL) {
3623 new_node = be_transform_node(node);
3629 * Transforms a FrameAddr into an ia32 Add.
3631 static ir_node *gen_be_FrameAddr(ir_node *node)
3633 ir_node *block = be_transform_node(get_nodes_block(node));
3634 ir_node *op = be_get_FrameAddr_frame(node);
3635 ir_node *new_op = be_transform_node(op);
3636 dbg_info *dbgi = get_irn_dbg_info(node);
3637 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
3640 new_node = new_bd_ia32_Lea(dbgi, block, new_op, noreg);
3641 set_ia32_frame_ent(new_node, arch_get_frame_entity(node));
3642 set_ia32_use_frame(new_node);
3644 SET_IA32_ORIG_NODE(new_node, node);
3650 * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return.
3652 static ir_node *gen_be_Return(ir_node *node)
3654 ir_graph *irg = current_ir_graph;
3655 ir_node *ret_val = get_irn_n(node, be_pos_Return_val);
3656 ir_node *ret_mem = get_irn_n(node, be_pos_Return_mem);
3657 ir_entity *ent = get_irg_entity(irg);
3658 ir_type *tp = get_entity_type(ent);
3663 ir_node *frame, *sse_store, *fld, *mproj, *barrier;
3664 ir_node *new_barrier, *new_ret_val, *new_ret_mem;
3667 int pn_ret_val, pn_ret_mem, arity, i;
3669 assert(ret_val != NULL);
3670 if (be_Return_get_n_rets(node) < 1 || ! ia32_cg_config.use_sse2) {
3671 return be_duplicate_node(node);
3674 res_type = get_method_res_type(tp, 0);
3676 if (! is_Primitive_type(res_type)) {
3677 return be_duplicate_node(node);
3680 mode = get_type_mode(res_type);
3681 if (! mode_is_float(mode)) {
3682 return be_duplicate_node(node);
3685 assert(get_method_n_ress(tp) == 1);
3687 pn_ret_val = get_Proj_proj(ret_val);
3688 pn_ret_mem = get_Proj_proj(ret_mem);
3690 /* get the Barrier */
3691 barrier = get_Proj_pred(ret_val);
3693 /* get result input of the Barrier */
3694 ret_val = get_irn_n(barrier, pn_ret_val);
3695 new_ret_val = be_transform_node(ret_val);
3697 /* get memory input of the Barrier */
3698 ret_mem = get_irn_n(barrier, pn_ret_mem);
3699 new_ret_mem = be_transform_node(ret_mem);
3701 frame = get_irg_frame(irg);
3703 dbgi = get_irn_dbg_info(barrier);
3704 block = be_transform_node(get_nodes_block(barrier));
3706 noreg = ia32_new_NoReg_gp(env_cg);
3708 /* store xmm0 onto stack */
3709 sse_store = new_bd_ia32_xStoreSimple(dbgi, block, frame, noreg,
3710 new_ret_mem, new_ret_val);
3711 set_ia32_ls_mode(sse_store, mode);
3712 set_ia32_op_type(sse_store, ia32_AddrModeD);
3713 set_ia32_use_frame(sse_store);
3715 /* load into x87 register */
3716 fld = new_bd_ia32_vfld(dbgi, block, frame, noreg, sse_store, mode);
3717 set_ia32_op_type(fld, ia32_AddrModeS);
3718 set_ia32_use_frame(fld);
3720 mproj = new_r_Proj(irg, block, fld, mode_M, pn_ia32_vfld_M);
3721 fld = new_r_Proj(irg, block, fld, mode_vfp, pn_ia32_vfld_res);
3723 /* create a new barrier */
3724 arity = get_irn_arity(barrier);
3725 in = ALLOCAN(ir_node*, arity);
3726 for (i = 0; i < arity; ++i) {
3729 if (i == pn_ret_val) {
3731 } else if (i == pn_ret_mem) {
3734 ir_node *in = get_irn_n(barrier, i);
3735 new_in = be_transform_node(in);
3740 new_barrier = new_ir_node(dbgi, irg, block,
3741 get_irn_op(barrier), get_irn_mode(barrier),
3743 copy_node_attr(barrier, new_barrier);
3744 be_duplicate_deps(barrier, new_barrier);
3745 be_set_transformed_node(barrier, new_barrier);
3747 /* transform normally */
3748 return be_duplicate_node(node);
3752 * Transform a be_AddSP into an ia32_SubSP.
3754 static ir_node *gen_be_AddSP(ir_node *node)
3756 ir_node *sz = get_irn_n(node, be_pos_AddSP_size);
3757 ir_node *sp = get_irn_n(node, be_pos_AddSP_old_sp);
3759 return gen_binop(node, sp, sz, new_bd_ia32_SubSP,
3760 match_am | match_immediate);
3764 * Transform a be_SubSP into an ia32_AddSP
3766 static ir_node *gen_be_SubSP(ir_node *node)
3768 ir_node *sz = get_irn_n(node, be_pos_SubSP_size);
3769 ir_node *sp = get_irn_n(node, be_pos_SubSP_old_sp);
3771 return gen_binop(node, sp, sz, new_bd_ia32_AddSP,
3772 match_am | match_immediate);
3776 * Change some phi modes
3778 static ir_node *gen_Phi(ir_node *node)
3780 ir_node *block = be_transform_node(get_nodes_block(node));
3781 ir_graph *irg = current_ir_graph;
3782 dbg_info *dbgi = get_irn_dbg_info(node);
3783 ir_mode *mode = get_irn_mode(node);
3786 if (ia32_mode_needs_gp_reg(mode)) {
3787 /* we shouldn't have any 64bit stuff around anymore */
3788 assert(get_mode_size_bits(mode) <= 32);
3789 /* all integer operations are on 32bit registers now */
3791 } else if (mode_is_float(mode)) {
3792 if (ia32_cg_config.use_sse2) {
3799 /* phi nodes allow loops, so we use the old arguments for now
3800 * and fix this later */
3801 phi = new_ir_node(dbgi, irg, block, op_Phi, mode, get_irn_arity(node),
3802 get_irn_in(node) + 1);
3803 copy_node_attr(node, phi);
3804 be_duplicate_deps(node, phi);
3806 be_enqueue_preds(node);
3814 static ir_node *gen_IJmp(ir_node *node)
3816 ir_node *block = get_nodes_block(node);
3817 ir_node *new_block = be_transform_node(block);
3818 dbg_info *dbgi = get_irn_dbg_info(node);
3819 ir_node *op = get_IJmp_target(node);
3821 ia32_address_mode_t am;
3822 ia32_address_t *addr = &am.addr;
3824 assert(get_irn_mode(op) == mode_P);
3826 match_arguments(&am, block, NULL, op, NULL, match_am | match_immediate);
3828 new_node = new_bd_ia32_IJmp(dbgi, new_block, addr->base, addr->index,
3829 addr->mem, am.new_op2);
3830 set_am_attributes(new_node, &am);
3831 SET_IA32_ORIG_NODE(new_node, node);
3833 new_node = fix_mem_proj(new_node, &am);
3839 * Transform a Bound node.
3841 static ir_node *gen_Bound(ir_node *node)
3844 ir_node *lower = get_Bound_lower(node);
3845 dbg_info *dbgi = get_irn_dbg_info(node);
3847 if (is_Const_0(lower)) {
3848 /* typical case for Java */
3849 ir_node *sub, *res, *flags, *block;
3850 ir_graph *irg = current_ir_graph;
3852 res = gen_binop(node, get_Bound_index(node), get_Bound_upper(node),
3853 new_bd_ia32_Sub, match_mode_neutral | match_am | match_immediate);
3855 block = get_nodes_block(res);
3856 if (! is_Proj(res)) {
3858 set_irn_mode(sub, mode_T);
3859 res = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_res);
3861 sub = get_Proj_pred(res);
3863 flags = new_rd_Proj(NULL, irg, block, sub, mode_Iu, pn_ia32_Sub_flags);
3864 new_node = new_bd_ia32_Jcc(dbgi, block, flags, pn_Cmp_Lt | ia32_pn_Cmp_unsigned);
3865 SET_IA32_ORIG_NODE(new_node, node);
3867 panic("generic Bound not supported in ia32 Backend");
3873 static ir_node *gen_ia32_l_ShlDep(ir_node *node)
3875 ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
3876 ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
3878 return gen_shift_binop(node, left, right, new_bd_ia32_Shl,
3879 match_immediate | match_mode_neutral);
3882 static ir_node *gen_ia32_l_ShrDep(ir_node *node)
3884 ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
3885 ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
3886 return gen_shift_binop(node, left, right, new_bd_ia32_Shr,
3890 static ir_node *gen_ia32_l_SarDep(ir_node *node)
3892 ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
3893 ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
3894 return gen_shift_binop(node, left, right, new_bd_ia32_Sar,
3898 static ir_node *gen_ia32_l_Add(ir_node *node)
3900 ir_node *left = get_irn_n(node, n_ia32_l_Add_left);
3901 ir_node *right = get_irn_n(node, n_ia32_l_Add_right);
3902 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Add,
3903 match_commutative | match_am | match_immediate |
3904 match_mode_neutral);
3906 if (is_Proj(lowered)) {
3907 lowered = get_Proj_pred(lowered);
3909 assert(is_ia32_Add(lowered));
3910 set_irn_mode(lowered, mode_T);
3916 static ir_node *gen_ia32_l_Adc(ir_node *node)
3918 return gen_binop_flags(node, new_bd_ia32_Adc,
3919 match_commutative | match_am | match_immediate |
3920 match_mode_neutral);
3924 * Transforms a l_MulS into a "real" MulS node.
3926 * @return the created ia32 Mul node
3928 static ir_node *gen_ia32_l_Mul(ir_node *node)
3930 ir_node *left = get_binop_left(node);
3931 ir_node *right = get_binop_right(node);
3933 return gen_binop(node, left, right, new_bd_ia32_Mul,
3934 match_commutative | match_am | match_mode_neutral);
3938 * Transforms a l_IMulS into a "real" IMul1OPS node.
3940 * @return the created ia32 IMul1OP node
3942 static ir_node *gen_ia32_l_IMul(ir_node *node)
3944 ir_node *left = get_binop_left(node);
3945 ir_node *right = get_binop_right(node);
3947 return gen_binop(node, left, right, new_bd_ia32_IMul1OP,
3948 match_commutative | match_am | match_mode_neutral);
3951 static ir_node *gen_ia32_l_Sub(ir_node *node)
3953 ir_node *left = get_irn_n(node, n_ia32_l_Sub_minuend);
3954 ir_node *right = get_irn_n(node, n_ia32_l_Sub_subtrahend);
3955 ir_node *lowered = gen_binop(node, left, right, new_bd_ia32_Sub,
3956 match_am | match_immediate | match_mode_neutral);
3958 if (is_Proj(lowered)) {
3959 lowered = get_Proj_pred(lowered);
3961 assert(is_ia32_Sub(lowered));
3962 set_irn_mode(lowered, mode_T);
3968 static ir_node *gen_ia32_l_Sbb(ir_node *node)
3970 return gen_binop_flags(node, new_bd_ia32_Sbb,
3971 match_am | match_immediate | match_mode_neutral);
3975 * Transforms a l_ShlD/l_ShrD into a ShlD/ShrD. Those nodes have 3 data inputs:
3976 * op1 - target to be shifted
3977 * op2 - contains bits to be shifted into target
3979 * Only op3 can be an immediate.
3981 static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
3982 ir_node *low, ir_node *count)
3984 ir_node *block = get_nodes_block(node);
3985 ir_node *new_block = be_transform_node(block);
3986 dbg_info *dbgi = get_irn_dbg_info(node);
3987 ir_node *new_high = be_transform_node(high);
3988 ir_node *new_low = be_transform_node(low);
3992 /* the shift amount can be any mode that is bigger than 5 bits, since all
3993 * other bits are ignored anyway */
3994 while (is_Conv(count) &&
3995 get_irn_n_edges(count) == 1 &&
3996 mode_is_int(get_irn_mode(count))) {
3997 assert(get_mode_size_bits(get_irn_mode(count)) >= 5);
3998 count = get_Conv_op(count);
4000 new_count = create_immediate_or_transform(count, 0);
4002 if (is_ia32_l_ShlD(node)) {
4003 new_node = new_bd_ia32_ShlD(dbgi, new_block, new_high, new_low,
4006 new_node = new_bd_ia32_ShrD(dbgi, new_block, new_high, new_low,
4009 SET_IA32_ORIG_NODE(new_node, node);
4014 static ir_node *gen_ia32_l_ShlD(ir_node *node)
4016 ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
4017 ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
4018 ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
4019 return gen_lowered_64bit_shifts(node, high, low, count);
4022 static ir_node *gen_ia32_l_ShrD(ir_node *node)
4024 ir_node *high = get_irn_n(node, n_ia32_l_ShrD_val_high);
4025 ir_node *low = get_irn_n(node, n_ia32_l_ShrD_val_low);
4026 ir_node *count = get_irn_n(node, n_ia32_l_ShrD_count);
4027 return gen_lowered_64bit_shifts(node, high, low, count);
4030 static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
4032 ir_node *src_block = get_nodes_block(node);
4033 ir_node *block = be_transform_node(src_block);
4034 ir_graph *irg = current_ir_graph;
4035 dbg_info *dbgi = get_irn_dbg_info(node);
4036 ir_node *frame = get_irg_frame(irg);
4037 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4038 ir_node *nomem = new_NoMem();
4039 ir_node *val_low = get_irn_n(node, n_ia32_l_LLtoFloat_val_low);
4040 ir_node *val_high = get_irn_n(node, n_ia32_l_LLtoFloat_val_high);
4041 ir_node *new_val_low = be_transform_node(val_low);
4042 ir_node *new_val_high = be_transform_node(val_high);
4044 ir_node *sync, *fild, *res;
4045 ir_node *store_low, *store_high;
4047 if (ia32_cg_config.use_sse2) {
4048 panic("ia32_l_LLtoFloat not implemented for SSE2");
4052 store_low = new_bd_ia32_Store(dbgi, block, frame, noreg, nomem,
4054 store_high = new_bd_ia32_Store(dbgi, block, frame, noreg, nomem,
4056 SET_IA32_ORIG_NODE(store_low, node);
4057 SET_IA32_ORIG_NODE(store_high, node);
4059 set_ia32_use_frame(store_low);
4060 set_ia32_use_frame(store_high);
4061 set_ia32_op_type(store_low, ia32_AddrModeD);
4062 set_ia32_op_type(store_high, ia32_AddrModeD);
4063 set_ia32_ls_mode(store_low, mode_Iu);
4064 set_ia32_ls_mode(store_high, mode_Is);
4065 add_ia32_am_offs_int(store_high, 4);
4069 sync = new_rd_Sync(dbgi, irg, block, 2, in);
4072 fild = new_bd_ia32_vfild(dbgi, block, frame, noreg, sync);
4074 set_ia32_use_frame(fild);
4075 set_ia32_op_type(fild, ia32_AddrModeS);
4076 set_ia32_ls_mode(fild, mode_Ls);
4078 SET_IA32_ORIG_NODE(fild, node);
4080 res = new_r_Proj(irg, block, fild, mode_vfp, pn_ia32_vfild_res);
4082 if (! mode_is_signed(get_irn_mode(val_high))) {
4083 ia32_address_mode_t am;
4085 ir_node *count = create_Immediate(NULL, 0, 31);
4088 am.addr.base = ia32_new_NoReg_gp(env_cg);
4089 am.addr.index = new_bd_ia32_Shr(dbgi, block, new_val_high, count);
4090 am.addr.mem = nomem;
4093 am.addr.symconst_ent = ia32_gen_fp_known_const(ia32_ULLBIAS);
4094 am.addr.use_frame = 0;
4095 am.addr.frame_entity = NULL;
4096 am.addr.symconst_sign = 0;
4097 am.ls_mode = mode_F;
4098 am.mem_proj = nomem;
4099 am.op_type = ia32_AddrModeS;
4101 am.new_op2 = ia32_new_NoReg_vfp(env_cg);
4102 am.pinned = op_pin_state_floats;
4104 am.ins_permuted = 0;
4106 fadd = new_bd_ia32_vfadd(dbgi, block, am.addr.base, am.addr.index, am.addr.mem,
4107 am.new_op1, am.new_op2, get_fpcw());
4108 set_am_attributes(fadd, &am);
4110 set_irn_mode(fadd, mode_T);
4111 res = new_rd_Proj(NULL, irg, block, fadd, mode_vfp, pn_ia32_res);
4116 static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
4118 ir_node *src_block = get_nodes_block(node);
4119 ir_node *block = be_transform_node(src_block);
4120 ir_graph *irg = current_ir_graph;
4121 dbg_info *dbgi = get_irn_dbg_info(node);
4122 ir_node *frame = get_irg_frame(irg);
4123 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4124 ir_node *nomem = new_NoMem();
4125 ir_node *val = get_irn_n(node, n_ia32_l_FloattoLL_val);
4126 ir_node *new_val = be_transform_node(val);
4127 ir_node *fist, *mem;
4129 mem = gen_vfist(dbgi, irg, block, frame, noreg, nomem, new_val, &fist);
4130 SET_IA32_ORIG_NODE(fist, node);
4131 set_ia32_use_frame(fist);
4132 set_ia32_op_type(fist, ia32_AddrModeD);
4133 set_ia32_ls_mode(fist, mode_Ls);
4139 * the BAD transformer.
4141 static ir_node *bad_transform(ir_node *node)
4143 panic("No transform function for %+F available.", node);
4147 static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
4149 ir_graph *irg = current_ir_graph;
4150 ir_node *block = be_transform_node(get_nodes_block(node));
4151 ir_node *pred = get_Proj_pred(node);
4152 ir_node *new_pred = be_transform_node(pred);
4153 ir_node *frame = get_irg_frame(irg);
4154 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4155 dbg_info *dbgi = get_irn_dbg_info(node);
4156 long pn = get_Proj_proj(node);
4161 load = new_bd_ia32_Load(dbgi, block, frame, noreg, new_pred);
4162 SET_IA32_ORIG_NODE(load, node);
4163 set_ia32_use_frame(load);
4164 set_ia32_op_type(load, ia32_AddrModeS);
4165 set_ia32_ls_mode(load, mode_Iu);
4166 /* we need a 64bit stackslot (fist stores 64bit) even though we only load
4167 * 32 bit from it with this particular load */
4168 attr = get_ia32_attr(load);
4169 attr->data.need_64bit_stackent = 1;
4171 if (pn == pn_ia32_l_FloattoLL_res_high) {
4172 add_ia32_am_offs_int(load, 4);
4174 assert(pn == pn_ia32_l_FloattoLL_res_low);
4177 proj = new_r_Proj(irg, block, load, mode_Iu, pn_ia32_Load_res);
4183 * Transform the Projs of an AddSP.
4185 static ir_node *gen_Proj_be_AddSP(ir_node *node)
4187 ir_node *block = be_transform_node(get_nodes_block(node));
4188 ir_node *pred = get_Proj_pred(node);
4189 ir_node *new_pred = be_transform_node(pred);
4190 ir_graph *irg = current_ir_graph;
4191 dbg_info *dbgi = get_irn_dbg_info(node);
4192 long proj = get_Proj_proj(node);
4194 if (proj == pn_be_AddSP_sp) {
4195 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4196 pn_ia32_SubSP_stack);
4197 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4199 } else if (proj == pn_be_AddSP_res) {
4200 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4201 pn_ia32_SubSP_addr);
4202 } else if (proj == pn_be_AddSP_M) {
4203 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_SubSP_M);
4206 panic("No idea how to transform proj->AddSP");
4210 * Transform the Projs of a SubSP.
4212 static ir_node *gen_Proj_be_SubSP(ir_node *node)
4214 ir_node *block = be_transform_node(get_nodes_block(node));
4215 ir_node *pred = get_Proj_pred(node);
4216 ir_node *new_pred = be_transform_node(pred);
4217 ir_graph *irg = current_ir_graph;
4218 dbg_info *dbgi = get_irn_dbg_info(node);
4219 long proj = get_Proj_proj(node);
4221 if (proj == pn_be_SubSP_sp) {
4222 ir_node *res = new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu,
4223 pn_ia32_AddSP_stack);
4224 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4226 } else if (proj == pn_be_SubSP_M) {
4227 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_AddSP_M);
4230 panic("No idea how to transform proj->SubSP");
4234 * Transform and renumber the Projs from a Load.
4236 static ir_node *gen_Proj_Load(ir_node *node)
4239 ir_node *block = be_transform_node(get_nodes_block(node));
4240 ir_node *pred = get_Proj_pred(node);
4241 ir_graph *irg = current_ir_graph;
4242 dbg_info *dbgi = get_irn_dbg_info(node);
4243 long proj = get_Proj_proj(node);
4245 /* loads might be part of source address mode matches, so we don't
4246 * transform the ProjMs yet (with the exception of loads whose result is
4249 if (is_Load(pred) && proj == pn_Load_M && get_irn_n_edges(pred) > 1) {
4252 /* this is needed, because sometimes we have loops that are only
4253 reachable through the ProjM */
4254 be_enqueue_preds(node);
4255 /* do it in 2 steps, to silence firm verifier */
4256 res = new_rd_Proj(dbgi, irg, block, pred, mode_M, pn_Load_M);
4257 set_Proj_proj(res, pn_ia32_mem);
4261 /* renumber the proj */
4262 new_pred = be_transform_node(pred);
4263 if (is_ia32_Load(new_pred)) {
4266 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Load_res);
4268 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Load_M);
4269 case pn_Load_X_regular:
4270 return new_rd_Jmp(dbgi, irg, block);
4271 case pn_Load_X_except:
4272 /* This Load might raise an exception. Mark it. */
4273 set_ia32_exc_label(new_pred, 1);
4274 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Load_X_exc);
4278 } else if (is_ia32_Conv_I2I(new_pred) ||
4279 is_ia32_Conv_I2I8Bit(new_pred)) {
4280 set_irn_mode(new_pred, mode_T);
4281 if (proj == pn_Load_res) {
4282 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_res);
4283 } else if (proj == pn_Load_M) {
4284 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_mem);
4286 } else if (is_ia32_xLoad(new_pred)) {
4289 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xLoad_res);
4291 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xLoad_M);
4292 case pn_Load_X_regular:
4293 return new_rd_Jmp(dbgi, irg, block);
4294 case pn_Load_X_except:
4295 /* This Load might raise an exception. Mark it. */
4296 set_ia32_exc_label(new_pred, 1);
4297 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4301 } else if (is_ia32_vfld(new_pred)) {
4304 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfld_res);
4306 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfld_M);
4307 case pn_Load_X_regular:
4308 return new_rd_Jmp(dbgi, irg, block);
4309 case pn_Load_X_except:
4310 /* This Load might raise an exception. Mark it. */
4311 set_ia32_exc_label(new_pred, 1);
4312 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_xLoad_X_exc);
4317 /* can happen for ProJMs when source address mode happened for the
4320 /* however it should not be the result proj, as that would mean the
4321 load had multiple users and should not have been used for
4323 if (proj != pn_Load_M) {
4324 panic("internal error: transformed node not a Load");
4326 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, 1);
4329 panic("No idea how to transform proj");
4333 * Transform and renumber the Projs from a DivMod like instruction.
4335 static ir_node *gen_Proj_DivMod(ir_node *node)
4337 ir_node *block = be_transform_node(get_nodes_block(node));
4338 ir_node *pred = get_Proj_pred(node);
4339 ir_node *new_pred = be_transform_node(pred);
4340 ir_graph *irg = current_ir_graph;
4341 dbg_info *dbgi = get_irn_dbg_info(node);
4342 long proj = get_Proj_proj(node);
4344 assert(is_ia32_Div(new_pred) || is_ia32_IDiv(new_pred));
4346 switch (get_irn_opcode(pred)) {
4350 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4352 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4353 case pn_Div_X_regular:
4354 return new_rd_Jmp(dbgi, irg, block);
4355 case pn_Div_X_except:
4356 set_ia32_exc_label(new_pred, 1);
4357 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4365 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4367 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4368 case pn_Mod_X_except:
4369 set_ia32_exc_label(new_pred, 1);
4370 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4378 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_Div_M);
4379 case pn_DivMod_res_div:
4380 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_div_res);
4381 case pn_DivMod_res_mod:
4382 return new_rd_Proj(dbgi, irg, block, new_pred, mode_Iu, pn_ia32_Div_mod_res);
4383 case pn_DivMod_X_regular:
4384 return new_rd_Jmp(dbgi, irg, block);
4385 case pn_DivMod_X_except:
4386 set_ia32_exc_label(new_pred, 1);
4387 return new_rd_Proj(dbgi, irg, block, new_pred, mode_X, pn_ia32_Div_X_exc);
4396 panic("No idea how to transform proj->DivMod");
4400 * Transform and renumber the Projs from a CopyB.
4402 static ir_node *gen_Proj_CopyB(ir_node *node)
4404 ir_node *block = be_transform_node(get_nodes_block(node));
4405 ir_node *pred = get_Proj_pred(node);
4406 ir_node *new_pred = be_transform_node(pred);
4407 ir_graph *irg = current_ir_graph;
4408 dbg_info *dbgi = get_irn_dbg_info(node);
4409 long proj = get_Proj_proj(node);
4412 case pn_CopyB_M_regular:
4413 if (is_ia32_CopyB_i(new_pred)) {
4414 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_i_M);
4415 } else if (is_ia32_CopyB(new_pred)) {
4416 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_CopyB_M);
4423 panic("No idea how to transform proj->CopyB");
4427 * Transform and renumber the Projs from a Quot.
4429 static ir_node *gen_Proj_Quot(ir_node *node)
4431 ir_node *block = be_transform_node(get_nodes_block(node));
4432 ir_node *pred = get_Proj_pred(node);
4433 ir_node *new_pred = be_transform_node(pred);
4434 ir_graph *irg = current_ir_graph;
4435 dbg_info *dbgi = get_irn_dbg_info(node);
4436 long proj = get_Proj_proj(node);
4440 if (is_ia32_xDiv(new_pred)) {
4441 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_xDiv_M);
4442 } else if (is_ia32_vfdiv(new_pred)) {
4443 return new_rd_Proj(dbgi, irg, block, new_pred, mode_M, pn_ia32_vfdiv_M);
4447 if (is_ia32_xDiv(new_pred)) {
4448 return new_rd_Proj(dbgi, irg, block, new_pred, mode_xmm, pn_ia32_xDiv_res);
4449 } else if (is_ia32_vfdiv(new_pred)) {
4450 return new_rd_Proj(dbgi, irg, block, new_pred, mode_vfp, pn_ia32_vfdiv_res);
4453 case pn_Quot_X_regular:
4454 case pn_Quot_X_except:
4459 panic("No idea how to transform proj->Quot");
4462 static ir_node *gen_be_Call(ir_node *node)
4464 dbg_info *const dbgi = get_irn_dbg_info(node);
4465 ir_graph *const irg = current_ir_graph;
4466 ir_node *const src_block = get_nodes_block(node);
4467 ir_node *const block = be_transform_node(src_block);
4468 ir_node *const src_mem = get_irn_n(node, be_pos_Call_mem);
4469 ir_node *const src_sp = get_irn_n(node, be_pos_Call_sp);
4470 ir_node *const sp = be_transform_node(src_sp);
4471 ir_node *const src_ptr = get_irn_n(node, be_pos_Call_ptr);
4472 ir_node *const noreg = ia32_new_NoReg_gp(env_cg);
4473 ia32_address_mode_t am;
4474 ia32_address_t *const addr = &am.addr;
4479 ir_node * eax = noreg;
4480 ir_node * ecx = noreg;
4481 ir_node * edx = noreg;
4482 unsigned const pop = be_Call_get_pop(node);
4483 ir_type *const call_tp = be_Call_get_type(node);
4485 /* Run the x87 simulator if the call returns a float value */
4486 if (get_method_n_ress(call_tp) > 0) {
4487 ir_type *const res_type = get_method_res_type(call_tp, 0);
4488 ir_mode *const res_mode = get_type_mode(res_type);
4490 if (res_mode != NULL && mode_is_float(res_mode)) {
4491 env_cg->do_x87_sim = 1;
4495 /* We do not want be_Call direct calls */
4496 assert(be_Call_get_entity(node) == NULL);
4498 match_arguments(&am, src_block, NULL, src_ptr, src_mem,
4499 match_am | match_immediate);
4501 i = get_irn_arity(node) - 1;
4502 fpcw = be_transform_node(get_irn_n(node, i--));
4503 for (; i >= be_pos_Call_first_arg; --i) {
4504 arch_register_req_t const *const req = arch_get_register_req(node, i);
4505 ir_node *const reg_parm = be_transform_node(get_irn_n(node, i));
4507 assert(req->type == arch_register_req_type_limited);
4508 assert(req->cls == &ia32_reg_classes[CLASS_ia32_gp]);
4510 switch (*req->limited) {
4511 case 1 << REG_EAX: assert(eax == noreg); eax = reg_parm; break;
4512 case 1 << REG_ECX: assert(ecx == noreg); ecx = reg_parm; break;
4513 case 1 << REG_EDX: assert(edx == noreg); edx = reg_parm; break;
4514 default: panic("Invalid GP register for register parameter");
4518 mem = transform_AM_mem(irg, block, src_ptr, src_mem, addr->mem);
4519 call = new_bd_ia32_Call(dbgi, block, addr->base, addr->index, mem,
4520 am.new_op2, sp, fpcw, eax, ecx, edx, pop, call_tp);
4521 set_am_attributes(call, &am);
4522 call = fix_mem_proj(call, &am);
4524 if (get_irn_pinned(node) == op_pin_state_pinned)
4525 set_irn_pinned(call, op_pin_state_pinned);
4527 SET_IA32_ORIG_NODE(call, node);
4531 static ir_node *gen_be_IncSP(ir_node *node)
4533 ir_node *res = be_duplicate_node(node);
4534 arch_irn_add_flags(res, arch_irn_flags_modify_flags);
4540 * Transform the Projs from a be_Call.
4542 static ir_node *gen_Proj_be_Call(ir_node *node)
4544 ir_node *block = be_transform_node(get_nodes_block(node));
4545 ir_node *call = get_Proj_pred(node);
4546 ir_node *new_call = be_transform_node(call);
4547 ir_graph *irg = current_ir_graph;
4548 dbg_info *dbgi = get_irn_dbg_info(node);
4549 ir_type *method_type = be_Call_get_type(call);
4550 int n_res = get_method_n_ress(method_type);
4551 long proj = get_Proj_proj(node);
4552 ir_mode *mode = get_irn_mode(node);
4556 /* The following is kinda tricky: If we're using SSE, then we have to
4557 * move the result value of the call in floating point registers to an
4558 * xmm register, we therefore construct a GetST0 -> xLoad sequence
4559 * after the call, we have to make sure to correctly make the
4560 * MemProj and the result Proj use these 2 nodes
4562 if (proj == pn_be_Call_M_regular) {
4563 // get new node for result, are we doing the sse load/store hack?
4564 ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res);
4565 ir_node *call_res_new;
4566 ir_node *call_res_pred = NULL;
4568 if (call_res != NULL) {
4569 call_res_new = be_transform_node(call_res);
4570 call_res_pred = get_Proj_pred(call_res_new);
4573 if (call_res_pred == NULL || is_ia32_Call(call_res_pred)) {
4574 return new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4577 assert(is_ia32_xLoad(call_res_pred));
4578 return new_rd_Proj(dbgi, irg, block, call_res_pred, mode_M,
4582 if (ia32_cg_config.use_sse2 && proj >= pn_be_Call_first_res
4583 && proj < (pn_be_Call_first_res + n_res) && mode_is_float(mode)) {
4585 ir_node *frame = get_irg_frame(irg);
4586 ir_node *noreg = ia32_new_NoReg_gp(env_cg);
4588 ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular);
4591 /* in case there is no memory output: create one to serialize the copy
4593 call_mem = new_rd_Proj(dbgi, irg, block, new_call, mode_M,
4594 pn_be_Call_M_regular);
4595 call_res = new_rd_Proj(dbgi, irg, block, new_call, mode,
4596 pn_be_Call_first_res);
4598 /* store st(0) onto stack */
4599 fstp = new_bd_ia32_vfst(dbgi, block, frame, noreg, call_mem,
4601 set_ia32_op_type(fstp, ia32_AddrModeD);
4602 set_ia32_use_frame(fstp);
4604 /* load into SSE register */
4605 sse_load = new_bd_ia32_xLoad(dbgi, block, frame, noreg, fstp, mode);
4606 set_ia32_op_type(sse_load, ia32_AddrModeS);
4607 set_ia32_use_frame(sse_load);
4609 sse_load = new_rd_Proj(dbgi, irg, block, sse_load, mode_xmm,
4615 /* transform call modes */
4616 if (mode_is_data(mode)) {
4617 const arch_register_class_t *cls = arch_get_irn_reg_class_out(node);
4621 /* Map from be_Call to ia32_Call proj number */
4622 if (proj == pn_be_Call_sp) {
4623 proj = pn_ia32_Call_stack;
4624 } else if (proj == pn_be_Call_M_regular) {
4625 proj = pn_ia32_Call_M;
4627 arch_register_req_t const *const req = arch_get_register_req_out(node);
4628 int const n_outs = arch_irn_get_n_outs(new_call);
4631 assert(proj >= pn_be_Call_first_res);
4632 assert(req->type & arch_register_req_type_limited);
4634 for (i = 0; i < n_outs; ++i) {
4635 arch_register_req_t const *const new_req = get_ia32_out_req(new_call, i);
4637 if (!(new_req->type & arch_register_req_type_limited) ||
4638 new_req->cls != req->cls ||
4639 *new_req->limited != *req->limited)
4648 res = new_rd_Proj(dbgi, irg, block, new_call, mode, proj);
4650 /* TODO arch_set_irn_register() only operates on Projs, need variant with index */
4652 case pn_ia32_Call_stack:
4653 arch_set_irn_register(res, &ia32_gp_regs[REG_ESP]);
4656 case pn_ia32_Call_fpcw:
4657 arch_set_irn_register(res, &ia32_fp_cw_regs[REG_FPCW]);
4665 * Transform the Projs from a Cmp.
4667 static ir_node *gen_Proj_Cmp(ir_node *node)
4669 /* this probably means not all mode_b nodes were lowered... */
4670 panic("trying to directly transform Proj_Cmp %+F (mode_b not lowered?)",
4675 * Transform the Projs from a Bound.
4677 static ir_node *gen_Proj_Bound(ir_node *node)
4679 ir_node *new_node, *block;
4680 ir_node *pred = get_Proj_pred(node);
4682 switch (get_Proj_proj(node)) {
4684 return be_transform_node(get_Bound_mem(pred));
4685 case pn_Bound_X_regular:
4686 new_node = be_transform_node(pred);
4687 block = get_nodes_block(new_node);
4688 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_true);
4689 case pn_Bound_X_except:
4690 new_node = be_transform_node(pred);
4691 block = get_nodes_block(new_node);
4692 return new_r_Proj(current_ir_graph, block, new_node, mode_X, pn_ia32_Jcc_false);
4694 return be_transform_node(get_Bound_index(pred));
4696 panic("unsupported Proj from Bound");
4700 static ir_node *gen_Proj_ASM(ir_node *node)
4706 if (get_irn_mode(node) != mode_M)
4707 return be_duplicate_node(node);
4709 pred = get_Proj_pred(node);
4710 new_pred = be_transform_node(pred);
4711 block = get_nodes_block(new_pred);
4712 return new_r_Proj(current_ir_graph, block, new_pred, mode_M,
4713 arch_irn_get_n_outs(new_pred) + 1);
4717 * Transform and potentially renumber Proj nodes.
4719 static ir_node *gen_Proj(ir_node *node)
4721 ir_node *pred = get_Proj_pred(node);
4724 switch (get_irn_opcode(pred)) {
4726 proj = get_Proj_proj(node);
4727 if (proj == pn_Store_M) {
4728 return be_transform_node(pred);
4730 panic("No idea how to transform proj->Store");
4733 return gen_Proj_Load(node);
4735 return gen_Proj_ASM(node);
4739 return gen_Proj_DivMod(node);
4741 return gen_Proj_CopyB(node);
4743 return gen_Proj_Quot(node);
4745 return gen_Proj_be_SubSP(node);
4747 return gen_Proj_be_AddSP(node);
4749 return gen_Proj_be_Call(node);
4751 return gen_Proj_Cmp(node);
4753 return gen_Proj_Bound(node);
4755 proj = get_Proj_proj(node);
4757 case pn_Start_X_initial_exec: {
4758 ir_node *block = get_nodes_block(pred);
4759 ir_node *new_block = be_transform_node(block);
4760 dbg_info *dbgi = get_irn_dbg_info(node);
4761 /* we exchange the ProjX with a jump */
4762 ir_node *jump = new_rd_Jmp(dbgi, current_ir_graph, new_block);
4767 case pn_Start_P_tls:
4768 return gen_Proj_tls(node);
4773 if (is_ia32_l_FloattoLL(pred)) {
4774 return gen_Proj_l_FloattoLL(node);
4776 } else if (!is_ia32_irn(pred)) { // Quick hack for SIMD optimization
4780 ir_mode *mode = get_irn_mode(node);
4781 if (ia32_mode_needs_gp_reg(mode)) {
4782 ir_node *new_pred = be_transform_node(pred);
4783 ir_node *block = be_transform_node(get_nodes_block(node));
4784 ir_node *new_proj = new_r_Proj(current_ir_graph, block, new_pred,
4785 mode_Iu, get_Proj_proj(node));
4786 #ifdef DEBUG_libfirm
4787 new_proj->node_nr = node->node_nr;
4793 return be_duplicate_node(node);
4797 * Enters all transform functions into the generic pointer
4799 static void register_transformers(void)
4803 /* first clear the generic function pointer for all ops */
4804 clear_irp_opcodes_generic_func();
4806 #define GEN(a) { be_transform_func *func = gen_##a; op_##a->ops.generic = (op_func) func; }
4807 #define BAD(a) op_##a->ops.generic = (op_func)bad_transform
4845 /* transform ops from intrinsic lowering */
4857 GEN(ia32_l_LLtoFloat);
4858 GEN(ia32_l_FloattoLL);
4864 /* we should never see these nodes */
4879 /* handle generic backend nodes */
4888 op_Mulh = get_op_Mulh();
4897 * Pre-transform all unknown and noreg nodes.
4899 static void ia32_pretransform_node(void)
4901 ia32_code_gen_t *cg = env_cg;
4903 cg->unknown_gp = be_pre_transform_node(cg->unknown_gp);
4904 cg->unknown_vfp = be_pre_transform_node(cg->unknown_vfp);
4905 cg->unknown_xmm = be_pre_transform_node(cg->unknown_xmm);
4906 cg->noreg_gp = be_pre_transform_node(cg->noreg_gp);
4907 cg->noreg_vfp = be_pre_transform_node(cg->noreg_vfp);
4908 cg->noreg_xmm = be_pre_transform_node(cg->noreg_xmm);
4913 * Walker, checks if all ia32 nodes producing more than one result have their
4914 * Projs, otherwise creates new Projs and keeps them using a be_Keep node.
4916 static void add_missing_keep_walker(ir_node *node, void *data)
4919 unsigned found_projs = 0;
4920 const ir_edge_t *edge;
4921 ir_mode *mode = get_irn_mode(node);
4926 if (!is_ia32_irn(node))
4929 n_outs = arch_irn_get_n_outs(node);
4932 if (is_ia32_SwitchJmp(node))
4935 assert(n_outs < (int) sizeof(unsigned) * 8);
4936 foreach_out_edge(node, edge) {
4937 ir_node *proj = get_edge_src_irn(edge);
4940 /* The node could be kept */
4944 if (get_irn_mode(proj) == mode_M)
4947 pn = get_Proj_proj(proj);
4948 assert(pn < n_outs);
4949 found_projs |= 1 << pn;
4953 /* are keeps missing? */
4955 for (i = 0; i < n_outs; ++i) {
4958 const arch_register_req_t *req;
4959 const arch_register_class_t *cls;
4961 if (found_projs & (1 << i)) {
4965 req = get_ia32_out_req(node, i);
4970 if (cls == &ia32_reg_classes[CLASS_ia32_flags]) {
4974 block = get_nodes_block(node);
4975 in[0] = new_r_Proj(current_ir_graph, block, node,
4976 arch_register_class_mode(cls), i);
4977 if (last_keep != NULL) {
4978 be_Keep_add_node(last_keep, cls, in[0]);
4980 last_keep = be_new_Keep(cls, current_ir_graph, block, 1, in);
4981 if (sched_is_scheduled(node)) {
4982 sched_add_after(node, last_keep);
4989 * Adds missing keeps to nodes. Adds missing Proj nodes for unused outputs
4992 void ia32_add_missing_keeps(ia32_code_gen_t *cg)
4994 ir_graph *irg = be_get_birg_irg(cg->birg);
4995 irg_walk_graph(irg, add_missing_keep_walker, NULL, NULL);
4998 /* do the transformation */
4999 void ia32_transform_graph(ia32_code_gen_t *cg)
5003 register_transformers();
5005 initial_fpcw = NULL;
5007 BE_TIMER_PUSH(t_heights);
5008 heights = heights_new(cg->irg);
5009 BE_TIMER_POP(t_heights);
5010 ia32_calculate_non_address_mode_nodes(cg->birg);
5012 /* the transform phase is not safe for CSE (yet) because several nodes get
5013 * attributes set after their creation */
5014 cse_last = get_opt_cse();
5017 be_transform_graph(cg->birg, ia32_pretransform_node);
5019 set_opt_cse(cse_last);
5021 ia32_free_non_address_mode_nodes();
5022 heights_free(heights);
5026 void ia32_init_transform(void)
5028 FIRM_DBG_REGISTER(dbg, "firm.be.ia32.transform");