implemented a function to retrieve estimated costs of an op

[libfirm] / ir / be / ia32 / bearch_ia32.c
diff --git a/ir/be/ia32/bearch_ia32.c b/ir/be/ia32/bearch_ia32.c

index c64dbb5..714c495 100644 (file)
--- a/ir/be/ia32/bearch_ia32.c
+++ b/ir/be/ia32/bearch_ia32.c
@@ -1,6 +1,6 @@
  /**
   * This is the main ia32 firm backend driver.
- *
+ * @author Christian Wuerdig
   * $Id$
   */
  
@@ -232,6 +232,10 @@ static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) {
                 return arch_irn_class_branch;
         else if (is_ia32_Cnst(irn))
                 return arch_irn_class_const;
+       else if (is_ia32_Ld(irn))
+               return arch_irn_class_load;
+       else if (is_ia32_St(irn) || is_ia32_Store8Bit(irn))
+               return arch_irn_class_store;
         else if (is_ia32_irn(irn))
                 return arch_irn_class_normal;
         else
@@ -452,6 +456,176 @@ static ir_type *ia32_abi_get_between_type(void *self)
         return env->flags.try_omit_fp ? omit_fp_between_type : between_type;
  }
  
+/**
+ * Get the estimated cycle count for @p irn.
+ *
+ * @param self The this pointer.
+ * @param irn  The node.
+ *
+ * @return     The estimated cycle count for this operation
+ */
+static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn)
+{
+  int cost;
+  switch (get_ia32_irn_opcode(irn)) {
+    case iro_ia32_xDiv:
+    case iro_ia32_DivMod:
+      cost = 8;
+      break;
+
+    case iro_ia32_xLoad:
+    case iro_ia32_l_Load:
+    case iro_ia32_Load:
+    case iro_ia32_Push:
+    case iro_ia32_Pop:
+      cost = 10;
+      break;
+
+    case iro_ia32_xStore:
+    case iro_ia32_l_Store:
+    case iro_ia32_Store:
+    case iro_ia32_Store8Bit:
+      cost = 50;
+      break;
+
+    case iro_ia32_MulS:
+    case iro_ia32_Mul:
+    case iro_ia32_Mulh:
+    case iro_ia32_xMul:
+    case iro_ia32_l_MulS:
+    case iro_ia32_l_Mul:
+      cost = 2;
+      break;
+
+    default:
+      cost = 1;
+  }
+
+  return cost;
+}
+
+/**
+ * Returns the inverse operation if @p irn, recalculating the argument at position @p i.
+ *
+ * @param irn       The original operation
+ * @param i         Index of the argument we want the inverse operation to yield
+ * @param inverse   struct to be filled with the resulting inverse op
+ * @param obstack   The obstack to use for allocation of the returned nodes array
+ * @return          The inverse operation or NULL if operation invertible
+ */
+static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, int i, arch_inverse_t *inverse, struct obstack *obst) {
+       ir_graph *irg;
+       ir_mode  *mode;
+       ir_node  *block, *noreg, *nomem;
+       int      pnc;
+
+       /* we cannot invert non-ia32 irns */
+       if (! is_ia32_irn(irn))
+               return NULL;
+
+       /* operand must always be a real operand (not base, index or mem) */
+       if (i != 2 && i != 3)
+               return NULL;
+
+       /* we don't invert address mode operations */
+       if (get_ia32_op_type(irn) != ia32_Normal)
+               return NULL;
+
+       irg   = get_irn_irg(irn);
+       block = get_nodes_block(irn);
+       mode  = get_ia32_res_mode(irn);
+       noreg = get_irn_n(irn, 0);
+       nomem = new_r_NoMem(irg);
+
+       /* initialize structure */
+       inverse->nodes = obstack_alloc(obst, 2 * sizeof(inverse->nodes[0]));
+       inverse->costs = 0;
+       inverse->n     = 2;
+
+       switch (get_ia32_irn_opcode(irn)) {
+               case iro_ia32_Add:
+                       if (get_ia32_immop_type(irn) == ia32_ImmConst) {
+                               /* we have an add with a const here */
+                               /* invers == add with negated const */
+                               inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
+                               pnc               = pn_ia32_Add_res;
+                               inverse->costs   += 1;
+                               copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
+                               set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn)));
+                               set_ia32_commutative(inverse->nodes[0]);
+                       }
+                       else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) {
+                               /* we have an add with a symconst here */
+                               /* invers == sub with const */
+                               inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
+                               pnc               = pn_ia32_Sub_res;
+                               inverse->costs   += 5;
+                               copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
+                       }
+                       else {
+                               /* normal add: inverse == sub */
+                               inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, (ir_node *)irn, get_irn_n(irn, i ^ 1), nomem);
+                               pnc               = pn_ia32_Sub_res;
+                               inverse->costs   += 5;
+                       }
+                       break;
+               case iro_ia32_Sub:
+                       if (get_ia32_immop_type(irn) != ia32_ImmNone) {
+                               /* we have a sub with a const/symconst here */
+                               /* invers == add with this const */
+                               inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
+                               pnc               = pn_ia32_Add_res;
+                               inverse->costs   += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
+                               copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
+                       }
+                       else {
+                               /* normal sub */
+                               if (i == 2) {
+                                       inverse->nodes[0] = new_rd_ia32_Add(NULL, irg, block, noreg, noreg, (ir_node *)irn, get_irn_n(irn, 3), nomem);
+                               }
+                               else {
+                                       inverse->nodes[0] = new_rd_ia32_Sub(NULL, irg, block, noreg, noreg, get_irn_n(irn, 2), (ir_node *)irn, nomem);
+                               }
+                               pnc             = pn_ia32_Sub_res;
+                               inverse->costs += 1;
+                       }
+                       break;
+               case iro_ia32_Eor:
+                       if (get_ia32_immop_type(irn) != ia32_ImmNone) {
+                               /* xor with const: inverse = xor */
+                               inverse->nodes[0] = new_rd_ia32_Eor(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem);
+                               pnc               = pn_ia32_Eor_res;
+                               inverse->costs   += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1;
+                               copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn);
+                       }
+                       else {
+                               /* normal xor */
+                               inverse->nodes[0] = new_rd_ia32_Eor(NULL, irg, block, noreg, noreg, (ir_node *)irn, get_irn_n(irn, i), nomem);
+                               pnc               = pn_ia32_Eor_res;
+                               inverse->costs   += 1;
+                       }
+                       break;
+               case iro_ia32_Not:
+                       inverse->nodes[0] = new_rd_ia32_Not(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), nomem);
+                       pnc = pn_ia32_Not_res;
+                       inverse->costs   += 1;
+                       break;
+               case iro_ia32_Minus:
+                       inverse->nodes[0] = new_rd_ia32_Minus(NULL, irg, block, noreg, noreg, get_irn_n(irn, i), nomem);
+                       pnc = pn_ia32_Minus_res;
+                       inverse->costs   += 1;
+                       break;
+               default:
+                       /* inverse operation not supported */
+                       return NULL;
+       }
+
+       set_ia32_res_mode(inverse->nodes[0], mode);
+       inverse->nodes[1] = new_r_Proj(irg, block, inverse->nodes[0], mode, pnc);
+
+       return inverse;
+}
+
  static const be_abi_callbacks_t ia32_abi_callbacks = {
         ia32_abi_init,
         free,
@@ -470,7 +644,9 @@ static const arch_irn_ops_if_t ia32_irn_ops_if = {
         ia32_classify,
         ia32_get_flags,
         ia32_get_frame_entity,
-       ia32_set_stack_bias
+       ia32_set_stack_bias,
+       ia32_get_inverse,
+       ia32_get_op_estimated_cost
  };
  
  ia32_irn_ops_t ia32_irn_ops = {
@@ -497,6 +673,7 @@ ia32_irn_ops_t ia32_irn_ops = {
   */
  static void ia32_prepare_graph(void *self) {
         ia32_code_gen_t *cg = self;
+       dom_front_info_t *dom;
         DEBUG_ONLY(firm_dbg_module_t *old_mod = cg->mod;)
  
         FIRM_DBG_REGISTER(cg->mod, "firm.be.ia32.transform");
@@ -506,7 +683,9 @@ static void ia32_prepare_graph(void *self) {
  
         /* 2nd: transform all remaining nodes */
         ia32_register_transformers();
+       dom = be_compute_dominance_frontiers(cg->irg);
         irg_walk_blkwise_graph(cg->irg, NULL, ia32_transform_node, cg);
+       be_free_dominance_frontiers(dom);
         be_dump(cg->irg, "-transformed", dump_ir_block_graph_sched);
  
         /* 3rd: optimize address mode */
@@ -616,7 +795,7 @@ insert_copy:
                 ia32_transform_sub_to_neg_add(irn, cg);
  
                 /* transform a LEA into an Add if possible */
-               //ia32_transform_lea_to_add(irn, cg);
+               ia32_transform_lea_to_add(irn, cg);
         }
  end:
  
@@ -711,7 +890,7 @@ static void transform_to_Load(ia32_transform_env_t *env) {
         reg = arch_get_irn_register(env->cg->arch_env, irn);
         arch_set_irn_register(env->cg->arch_env, new_op, reg);
  
-       SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, new_op));
+       SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
  
         exchange(irn, proj);
  }
@@ -765,7 +944,7 @@ static void transform_to_Store(ia32_transform_env_t *env) {
                 sched_remove(irn);
         }
  
-       SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, new_op));
+       SET_IA32_ORIG_NODE(new_op, ia32_get_old_node_name(env->cg, irn));
  
         exchange(irn, proj);
  }
@@ -831,7 +1010,7 @@ static void ia32_after_ra(void *self) {
         irg_block_walk_graph(cg->irg, NULL, ia32_after_ra_walker, self);
  
         /* if we do x87 code generation, rewrite all the virtual instructions and registers */
-       if (cg->used_fp == fp_x87) {
+       if (cg->used_fp == fp_x87 || cg->force_sim) {
                 x87_simulate_graph(cg->arch_env, cg->irg, cg->blk_sched);
         }
  }
@@ -1013,6 +1192,8 @@ static void *ia32_init(FILE *file_handle) {
         isa->name_obst_size = 0;
  #endif /* NDEBUG */
  
+       ia32_handle_intrinsics();
+       ia32_switch_section(NULL, NO_SECTION);
         fprintf(isa->out, "\t.intel_syntax\n");
  
         inited = 1;
@@ -1049,11 +1230,11 @@ static void ia32_done(void *self) {
   * Return the number of register classes for this architecture.
   * We report always these:
   *  - the general purpose registers
- *  - the floating point register set (depending on the unit used for FP)
- *  - MMX/SSE registers (currently not supported)
+ *  - the SSE floating point register set
+ *  - the virtual floating point registers
   */
  static int ia32_get_n_reg_class(const void *self) {
-       return 2;
+       return 3;
  }
  
  /**
@@ -1061,10 +1242,13 @@ static int ia32_get_n_reg_class(const void *self) {
   */
  static const arch_register_class_t *ia32_get_reg_class(const void *self, int i) {
         const ia32_isa_t *isa = self;
-       assert(i >= 0 && i < 2 && "Invalid ia32 register class requested.");
+       assert(i >= 0 && i < 3 && "Invalid ia32 register class requested.");
         if (i == 0)
                 return &ia32_reg_classes[CLASS_ia32_gp];
-       return USE_SSE2(isa) ? &ia32_reg_classes[CLASS_ia32_xmm] : &ia32_reg_classes[CLASS_ia32_vfp];
+       else if (i == 1)
+               return &ia32_reg_classes[CLASS_ia32_xmm];
+       else
+               return &ia32_reg_classes[CLASS_ia32_vfp];
  }
  
  /**
@@ -1226,6 +1410,32 @@ static int ia32_get_reg_class_alignment(const void *self, const arch_register_cl
         return bytes;
  }
  
+static ia32_intrinsic_env_t intrinsic_env = { NULL, NULL };
+
+/**
+ * Returns the libFirm configuration parameter for this backend.
+ */
+static const backend_params *ia32_get_libfirm_params(void) {
+       static const arch_dep_params_t ad = {
+               1, /* also use subs */
+               4, /* maximum shifts */
+               31, /* maximum shift amount */
+
+               1, /* allow Mulhs */
+               1, /* allow Mulus */
+               32  /* Mulh allowed up to 32 bit */
+       };
+       static backend_params p = {
+               NULL,  /* no additional opcodes */
+               NULL,  /* will be set later */
+               1,     /* need dword lowering */
+               ia32_create_intrinsic_fkt,
+               &intrinsic_env,  /* context for ia32_create_intrinsic_fkt */
+       };
+
+       p.dep_param = &ad;
+       return &p;
+}
  #ifdef WITH_LIBCORE
  
  /* instruction set architectures. */
@@ -1328,6 +1538,7 @@ const arch_isa_if_t ia32_isa_if = {
         ia32_get_code_generator_if,
         ia32_get_list_sched_selector,
         ia32_get_reg_class_alignment,
+       ia32_get_libfirm_params,
  #ifdef WITH_LIBCORE
         ia32_register_options
  #endif