begnuas: let user specify elf variants

[libfirm] / ir / be / ia32 / bearch_ia32.c
diff --git a/ir/be/ia32/bearch_ia32.c b/ir/be/ia32/bearch_ia32.c

index 534c4b6..c586722 100644 (file)
--- a/ir/be/ia32/bearch_ia32.c
+++ b/ir/be/ia32/bearch_ia32.c
@@ -66,6 +66,7 @@
  #include "../be_dbgout.h"
  #include "../beblocksched.h"
  #include "../bemachine.h"
+#include "../bespillutil.h"
  #include "../bespillslots.h"
  #include "../bemodule.h"
  #include "../begnuas.h"
@@ -74,6 +75,7 @@
  #include "../betranshlp.h"
  #include "../belistsched.h"
  #include "../beabihelper.h"
+#include "../bestack.h"
  
  #include "bearch_ia32_t.h"
  
@@ -176,7 +178,7 @@ ir_node *ia32_new_Fpu_truncate(ir_graph *irg)
  static ir_node *ia32_get_admissible_noreg(ir_node *irn, int pos)
  {
         ir_graph                  *irg = get_irn_irg(irn);
-       const arch_register_req_t *req = arch_get_register_req(irn, pos);
+       const arch_register_req_t *req = arch_get_irn_register_req_in(irn, pos);
  
         assert(req != NULL && "Missing register requirements");
         if (req->cls == &ia32_reg_classes[CLASS_ia32_gp])
@@ -565,7 +567,7 @@ static int ia32_possible_memory_operand(const ir_node *irn, unsigned int i)
                                         /* we can't swap left/right for limited registers
                                          * (As this (currently) breaks constraint handling copies)
                                          */
-                                       req = arch_get_in_register_req(irn, n_ia32_binary_left);
+                                       req = arch_get_irn_register_req_in(irn, n_ia32_binary_left);
                                         if (req->type & arch_register_req_type_limited)
                                                 return 0;
                                         break;
@@ -1276,10 +1278,10 @@ static void introduce_prolog_epilog(ir_graph *irg)
                 sched_add_after(start, push);
  
                 /* move esp to ebp */
-               curr_bp = be_new_Copy(bp->reg_class, block, curr_sp);
+               curr_bp = be_new_Copy(block, curr_sp);
                 sched_add_after(push, curr_bp);
                 be_set_constr_single_reg_out(curr_bp, 0, bp, arch_register_req_type_ignore);
-               curr_sp = be_new_CopyKeep_single(sp->reg_class, block, curr_sp, curr_bp, mode_gp);
+               curr_sp = be_new_CopyKeep_single(block, curr_sp, curr_bp);
                 sched_add_after(curr_bp, curr_sp);
                 be_set_constr_single_reg_out(curr_sp, 0, sp, arch_register_req_type_produces_sp);
                 edges_reroute(initial_bp, curr_bp);
@@ -1290,6 +1292,13 @@ static void introduce_prolog_epilog(ir_graph *irg)
                 set_irn_n(push, n_ia32_Push_stack, initial_sp);
                 sched_add_after(curr_sp, incsp);
  
+               /* make sure the initial IncSP is really used by someone */
+               if (get_irn_n_edges(incsp) <= 1) {
+                       ir_node *in[] = { incsp };
+                       ir_node *keep = be_new_Keep(block, 1, in);
+                       sched_add_after(incsp, keep);
+               }
+
                 layout->initial_bias = -4;
         } else {
                 ir_node *incsp = be_new_IncSP(sp, block, curr_sp, frame_size, 0);
@@ -1313,11 +1322,13 @@ static void introduce_prolog_epilog(ir_graph *irg)
  }
  
  /**
- * We transform Spill and Reload here. This needs to be done before
- * stack biasing otherwise we would miss the corrected offset for these nodes.
+ * Last touchups for the graph before emit: x87 simulation to replace the
+ * virtual with real x87 instructions, creating a block schedule and peephole
+ * optimisations.
   */
-static void ia32_after_ra(ir_graph *irg)
+static void ia32_finish(ir_graph *irg)
  {
+       ia32_irg_data_t   *irg_data     = ia32_get_irg_data(irg);
         be_stack_layout_t *stack_layout = be_get_irg_stack_layout(irg);
         bool               at_begin     = stack_layout->sp_relative ? true : false;
         be_fec_env_t      *fec_env      = be_new_frame_entity_coalescer(irg);
@@ -1330,17 +1341,12 @@ static void ia32_after_ra(ir_graph *irg)
         irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, NULL);
  
         introduce_prolog_epilog(irg);
-}
  
-/**
- * Last touchups for the graph before emit: x87 simulation to replace the
- * virtual with real x87 instructions, creating a block schedule and peephole
- * optimisations.
- */
-static void ia32_finish(ir_graph *irg)
-{
-       ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
+       /* fix stack entity offsets */
+       be_abi_fix_stack_nodes(irg);
+       be_abi_fix_stack_bias(irg);
  
+       /* fix 2-address code constraints */
         ia32_finish_irg(irg);
  
         /* we might have to rewrite x87 virtual registers */
@@ -1351,6 +1357,8 @@ static void ia32_finish(ir_graph *irg)
         /* do peephole optimisations */
         ia32_peephole_optimization(irg);
  
+       be_remove_dead_nodes_from_schedule(irg);
+
         /* create block schedule, this also removes empty blocks which might
          * produce critical edges */
         irg_data->blk_sched = be_create_block_schedule(irg);
@@ -2041,6 +2049,14 @@ static void ia32_lower_for_target(void)
                 /* break up switches with wide ranges */
                 lower_switch(irg, 4, 256, false);
         }
+
+       for (i = 0; i < n_irgs; ++i) {
+               ir_graph *irg = get_irp_irg(i);
+               /* Turn all small CopyBs into loads/stores, keep medium-sized CopyBs,
+                * so we can generate rep movs later, and turn all big CopyBs into
+                * memcpy calls. */
+               lower_CopyB(irg, 64, 8193);
+       }
  }
  
  /**
@@ -2048,25 +2064,27 @@ static void ia32_lower_for_target(void)
   */
  static ir_node *ia32_create_trampoline_fkt(ir_node *block, ir_node *mem, ir_node *trampoline, ir_node *env, ir_node *callee)
  {
-       ir_graph *irg  = get_irn_irg(block);
-       ir_node  *p    = trampoline;
-       ir_mode  *mode = get_irn_mode(p);
-       ir_node  *st;
+       ir_graph *const irg  = get_irn_irg(block);
+       ir_node  *      p    = trampoline;
+       ir_mode  *const mode = get_irn_mode(p);
+       ir_node  *const one  = new_r_Const(irg, get_mode_one(mode_Iu));
+       ir_node  *const four = new_r_Const_long(irg, mode_Iu, 4);
+       ir_node  *      st;
  
         /* mov  ecx,<env> */
         st  = new_r_Store(block, mem, p, new_r_Const_long(irg, mode_Bu, 0xb9), cons_none);
         mem = new_r_Proj(st, mode_M, pn_Store_M);
-       p   = new_r_Add(block, p, new_r_Const_long(irg, mode_Iu, 1), mode);
+       p   = new_r_Add(block, p, one, mode);
         st  = new_r_Store(block, mem, p, env, cons_none);
         mem = new_r_Proj(st, mode_M, pn_Store_M);
-       p   = new_r_Add(block, p, new_r_Const_long(irg, mode_Iu, 4), mode);
+       p   = new_r_Add(block, p, four, mode);
         /* jmp  <callee> */
         st  = new_r_Store(block, mem, p, new_r_Const_long(irg, mode_Bu, 0xe9), cons_none);
         mem = new_r_Proj(st, mode_M, pn_Store_M);
-       p   = new_r_Add(block, p, new_r_Const_long(irg, mode_Iu, 1), mode);
+       p   = new_r_Add(block, p, one, mode);
         st  = new_r_Store(block, mem, p, callee, cons_none);
         mem = new_r_Proj(st, mode_M, pn_Store_M);
-       p   = new_r_Add(block, p, new_r_Const_long(irg, mode_Iu, 4), mode);
+       p   = new_r_Add(block, p, four, mode);
  
         return mem;
  }
@@ -2232,10 +2250,11 @@ const arch_isa_if_t ia32_isa_if = {
         ia32_before_abi,     /* before abi introduce hook */
         ia32_prepare_graph,
         ia32_before_ra,      /* before register allocation hook */
-       ia32_after_ra,       /* after register allocation hook */
         ia32_finish,         /* called before codegen */
         ia32_emit,           /* emit && done */
         ia32_register_saved_by,
+       be_new_spill,
+       be_new_reload
  };
  
  BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_ia32)