ia32: Use a more logical specification of operand sizes in the binary emitter.
[libfirm] / ir / be / sparc / sparc_emitter.c
index bceb5ab..15de55d 100644 (file)
 /**
  * @file
  * @brief   emit assembler for a backend graph
- * @version $Id$
+ * @author  Hannes Rapp, Matthias Braun
  */
 #include "config.h"
 
 #include <limits.h>
 
+#include "bitfiddle.h"
 #include "xmalloc.h"
 #include "tv.h"
 #include "iredges.h"
 #include "error.h"
 #include "raw_bitset.h"
 #include "dbginfo.h"
-
-#include "../besched.h"
-#include "../beblocksched.h"
-#include "../beirg.h"
-#include "../begnuas.h"
-#include "../be_dbgout.h"
-#include "../benode.h"
+#include "heights.h"
+#include "pmap.h"
+#include "execfreq_t.h"
+
+#include "besched.h"
+#include "beblocksched.h"
+#include "beirg.h"
+#include "begnuas.h"
+#include "bedwarf.h"
+#include "benode.h"
+#include "bestack.h"
+#include "bepeephole.h"
 
 #include "sparc_emitter.h"
 #include "gen_sparc_emitter.h"
 #include "sparc_new_nodes.h"
 #include "gen_sparc_regalloc_if.h"
 
-
-#define SNPRINTF_BUF_LEN 128
 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
 
-/**
- * attribute of SAVE node which follows immediatelly after the START node
- * we need this to correct all offsets since SPARC expects
- * some reserved stack space after the stackpointer
- */
-const sparc_save_attr_t *save_attr;
+static ir_heights_t *heights;
+static unsigned     *delay_slot_fillers;
+static pmap         *delay_slots;
+
+static bool emitting_delay_slot;
 
 /**
- * Returns the register at in position pos.
+ * indent before instruction. (Adds additional indentation when emitting
+ * delay slots)
  */
-static const arch_register_t *get_in_reg(const ir_node *node, int pos)
+static void sparc_emit_indent(void)
 {
-       ir_node                *op;
-       const arch_register_t  *reg = NULL;
-
-       assert(get_irn_arity(node) > pos && "Invalid IN position");
-
-       /* The out register of the operator at position pos is the
-          in register we need. */
-       op = get_irn_n(node, pos);
-
-       reg = arch_get_irn_register(op);
-
-       assert(reg && "no in register found");
-       return reg;
+       be_emit_char('\t');
+       if (emitting_delay_slot)
+               be_emit_char(' ');
 }
 
-/**
- * Returns the register at out position pos.
- */
-static const arch_register_t *get_out_reg(const ir_node *node, int pos)
+static void sparc_emit_immediate(ir_node const *const node)
 {
-       ir_node                *proj;
-       const arch_register_t  *reg = NULL;
+       const sparc_attr_t *attr   = get_sparc_attr_const(node);
+       ir_entity          *entity = attr->immediate_value_entity;
 
-       /* 1st case: irn is not of mode_T, so it has only                 */
-       /*           one OUT register -> good                             */
-       /* 2nd case: irn is of mode_T -> collect all Projs and ask the    */
-       /*           Proj with the corresponding projnum for the register */
-
-       if (get_irn_mode(node) != mode_T) {
-               reg = arch_get_irn_register(node);
-       } else if (is_sparc_irn(node)) {
-               reg = arch_irn_get_register(node, pos);
+       if (entity == NULL) {
+               int32_t value = attr->immediate_value;
+               assert(sparc_is_value_imm_encodeable(value));
+               be_emit_irprintf("%d", value);
        } else {
-               const ir_edge_t *edge;
-
-               foreach_out_edge(node, edge) {
-                       proj = get_edge_src_irn(edge);
-                       assert(is_Proj(proj) && "non-Proj from mode_T node");
-                       if (get_Proj_proj(proj) == pos) {
-                               reg = arch_get_irn_register(proj);
-                               break;
-                       }
+               if (get_entity_owner(entity) == get_tls_type()) {
+                       be_emit_cstring("%tle_lox10(");
+               } else {
+                       be_emit_cstring("%lo(");
+               }
+               be_gas_emit_entity(entity);
+               if (attr->immediate_value != 0) {
+                       be_emit_irprintf("%+d", attr->immediate_value);
                }
+               be_emit_char(')');
        }
-
-       assert(reg && "no out register found");
-       return reg;
 }
 
-/*************************************************************
- *             _       _    __   _          _
- *            (_)     | |  / _| | |        | |
- *  _ __  _ __ _ _ __ | |_| |_  | |__   ___| |_ __   ___ _ __
- * | '_ \| '__| | '_ \| __|  _| | '_ \ / _ \ | '_ \ / _ \ '__|
- * | |_) | |  | | | | | |_| |   | | | |  __/ | |_) |  __/ |
- * | .__/|_|  |_|_| |_|\__|_|   |_| |_|\___|_| .__/ \___|_|
- * | |                                       | |
- * |_|                                       |_|
- *************************************************************/
-
-void sparc_emit_immediate(const ir_node *node)
+static void sparc_emit_high_immediate(ir_node const *node)
 {
-       int const val = get_sparc_attr_const(node)->immediate_value;
-       assert(-4096 <= val && val < 4096);
-       be_emit_irprintf("%d", val);
+       const sparc_attr_t *attr   = get_sparc_attr_const(node);
+       ir_entity          *entity = attr->immediate_value_entity;
+
+       if (entity == NULL) {
+               uint32_t value = (uint32_t) attr->immediate_value;
+               be_emit_irprintf("%%hi(0x%X)", value);
+       } else {
+               if (get_entity_owner(entity) == get_tls_type()) {
+                       be_emit_cstring("%tle_hix22(");
+               } else {
+                       be_emit_cstring("%hi(");
+               }
+               be_gas_emit_entity(entity);
+               if (attr->immediate_value != 0) {
+                       be_emit_irprintf("%+d", attr->immediate_value);
+               }
+               be_emit_char(')');
+       }
 }
 
-void sparc_emit_source_register(const ir_node *node, int pos)
+static void sparc_emit_source_register(ir_node const *node, int const pos)
 {
-       const arch_register_t *reg = get_in_reg(node, pos);
+       const arch_register_t *reg = arch_get_irn_register_in(node, pos);
        be_emit_char('%');
-       be_emit_string(arch_register_get_name(reg));
+       be_emit_string(reg->name);
 }
 
-void sparc_emit_dest_register(const ir_node *node, int pos)
+static void sparc_emit_dest_register(ir_node const *const node, int const pos)
 {
-       const arch_register_t *reg = get_out_reg(node, pos);
+       const arch_register_t *reg = arch_get_irn_register_out(node, pos);
        be_emit_char('%');
-       be_emit_string(arch_register_get_name(reg));
+       be_emit_string(reg->name);
 }
 
 /**
- * Emits either a imm or register depending on arity of node
- * @param node
- * @param register no (-1 if no register)
+ * emit SP offset
  */
-void sparc_emit_reg_or_imm(const ir_node *node, int pos)
+static void sparc_emit_offset(const ir_node *node, int offset_node_pos)
 {
-       if (get_irn_arity(node) > pos) {
-               // we have reg input
-               sparc_emit_source_register(node, pos);
-       } else {
-               // we have a imm input
+       const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
+
+       if (attr->is_reg_reg) {
+               assert(!attr->is_frame_entity);
+               assert(attr->base.immediate_value == 0);
+               assert(attr->base.immediate_value_entity == NULL);
+               be_emit_char('+');
+               sparc_emit_source_register(node, offset_node_pos);
+       } else if (attr->is_frame_entity) {
+               int32_t offset = attr->base.immediate_value;
+               if (offset != 0) {
+                       assert(sparc_is_value_imm_encodeable(offset));
+                       be_emit_irprintf("%+ld", offset);
+               }
+       } else if (attr->base.immediate_value != 0
+                       || attr->base.immediate_value_entity != NULL) {
+               be_emit_char('+');
                sparc_emit_immediate(node);
        }
 }
 
 /**
- * emit SP offset
+ *  Emit load mode
  */
-void sparc_emit_offset(const ir_node *node)
+static void sparc_emit_load_mode(ir_node const *const node)
 {
        const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
-       assert(attr->base.is_load_store);
-
-       if (attr->offset > 0)
-               be_emit_irprintf("+%ld", attr->offset);
+       ir_mode *mode      = attr->load_store_mode;
+       int      bits      = get_mode_size_bits(mode);
+       bool     is_signed = mode_is_signed(mode);
+
+       switch (bits) {
+       case   8: be_emit_string(is_signed ? "sb" : "ub"); break;
+       case  16: be_emit_string(is_signed ? "sh" : "uh"); break;
+       case  32: break;
+       case  64: be_emit_char('d'); break;
+       case 128: be_emit_char('q'); break;
+       default:  panic("invalid load/store mode %+F", mode);
+       }
 }
 
-
 /**
- *  Emit load mode char
+ * Emit store mode char
  */
-void sparc_emit_load_mode(const ir_node *node)
+static void sparc_emit_store_mode(ir_node const *const node)
 {
        const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
-    ir_mode *mode      = attr->load_store_mode;
-    int      bits      = get_mode_size_bits(mode);
-    bool     is_signed = mode_is_signed(mode);
+       ir_mode *mode      = attr->load_store_mode;
+       int      bits      = get_mode_size_bits(mode);
+
+       switch (bits) {
+       case   8: be_emit_char('b'); break;
+       case  16: be_emit_char('h'); break;
+       case  32: break;
+       case  64: be_emit_char('d'); break;
+       case 128: be_emit_char('q'); break;
+       default:  panic("invalid load/store mode %+F", mode);
+       }
+}
 
-    if (bits == 16) {
-        be_emit_string(is_signed ? "sh" : "uh");
-    } else if (bits == 8) {
-        be_emit_string(is_signed ? "sb" : "ub");
-    } else if (bits == 64) {
-        be_emit_string("d");
-    } else {
-        assert(bits == 32);
-    }
+static void emit_fp_suffix(const ir_mode *mode)
+{
+       assert(mode_is_float(mode));
+       switch (get_mode_size_bits(mode)) {
+       case  32: be_emit_char('s'); break;
+       case  64: be_emit_char('d'); break;
+       case 128: be_emit_char('q'); break;
+       default:  panic("invalid FP mode");
+       }
 }
 
-/**
- * Emit store mode char
- */
-void sparc_emit_store_mode(const ir_node *node)
+static void set_jump_target(ir_node *jump, ir_node *target)
 {
-       const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
-    ir_mode *mode      = attr->load_store_mode;
-    int      bits      = get_mode_size_bits(mode);
+       set_irn_link(jump, target);
+}
 
-    if (bits == 16) {
-        be_emit_string("h");
-    } else if (bits == 8) {
-        be_emit_string("b");
-    } else if (bits == 64) {
-        be_emit_string("d");
-    } else {
-        assert(bits == 32);
-    }
+static ir_node *get_jump_target(const ir_node *jump)
+{
+       return (ir_node*)get_irn_link(jump);
 }
 
 /**
- * emit integer signed/unsigned prefix char
+ * Returns the target label for a control flow node.
  */
-void sparc_emit_mode_sign_prefix(const ir_node *node)
+static void sparc_emit_cfop_target(const ir_node *node)
 {
-    ir_mode *mode      = get_irn_mode(node);
-    bool     is_signed = mode_is_signed(mode);
-    be_emit_string(is_signed ? "s" : "u");
+       ir_node *block = get_jump_target(node);
+       be_gas_emit_block_name(block);
 }
 
 /**
- * emit FP load mode char
+ * returns true if a sparc_call calls a register and not an immediate
  */
-void sparc_emit_fp_load_mode(const ir_node *node)
+static bool is_sparc_reg_call(const ir_node *node)
 {
-       const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
-    ir_mode *mode      = attr->load_store_mode;
-    int      bits      = get_mode_size_bits(mode);
+       const sparc_attr_t *attr = get_sparc_attr_const(node);
+       return attr->immediate_value_entity == NULL;
+}
 
-    assert(mode_is_float(mode));
+static int get_sparc_Call_dest_addr_pos(const ir_node *node)
+{
+       assert(is_sparc_reg_call(node));
+       return get_irn_arity(node)-1;
+}
 
-    if (bits == 32) {
-        be_emit_string("f");
-    } else if (bits == 64) {
-        be_emit_string("df");
-    } else {
-               panic("FP load mode > 64bits not implemented yet");
-    }
+static bool ba_is_fallthrough(const ir_node *node)
+{
+       ir_node *block      = get_nodes_block(node);
+       ir_node *next_block = (ir_node*)get_irn_link(block);
+       return get_jump_target(node) == next_block;
 }
 
-/**
- * emit FP store mode char
- */
-void sparc_emit_fp_store_mode(const ir_node *node)
+static bool is_no_instruction(const ir_node *node)
 {
-       const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node);
-    ir_mode *mode      = attr->load_store_mode;
-    int      bits      = get_mode_size_bits(mode);
+       /* copies are nops if src_reg == dest_reg */
+       if (be_is_Copy(node) || be_is_CopyKeep(node)) {
+               const arch_register_t *src_reg  = arch_get_irn_register_in(node, 0);
+               const arch_register_t *dest_reg = arch_get_irn_register_out(node, 0);
 
-    assert(mode_is_float(mode));
+               if (src_reg == dest_reg)
+                       return true;
+       }
+       if (be_is_IncSP(node) && be_get_IncSP_offset(node) == 0)
+               return true;
+       /* Ba is not emitted if it is a simple fallthrough */
+       if (is_sparc_Ba(node) && ba_is_fallthrough(node))
+               return true;
 
-    if (bits == 32) {
-        be_emit_string("f");
-    } else if (bits == 64) {
-        be_emit_string("df");
-    } else {
-               panic("FP store mode > 64bits not implemented yet");
-    }
+       return be_is_Keep(node) || be_is_Start(node) || is_Phi(node);
 }
 
-/**
- * emits the FP mode suffix char
+static bool has_delay_slot(const ir_node *node)
+{
+       if (is_sparc_Ba(node)) {
+               return !ba_is_fallthrough(node);
+       }
+
+       return arch_get_irn_flags(node) & sparc_arch_irn_flag_has_delay_slot;
+}
+
+/** returns true if the emitter for this sparc node can produce more than one
+ * actual sparc instruction.
+ * Usually it is a bad sign if we have to add instructions here. We should
+ * rather try to get them lowered down. So we can actually put them into
+ * delay slots and make them more accessible to the scheduler.
  */
-void sparc_emit_fp_mode_suffix(const ir_node *node)
+static bool emits_multiple_instructions(const ir_node *node)
 {
-    ir_mode *mode      = get_irn_mode(node);
-    int      bits      = get_mode_size_bits(mode);
+       if (has_delay_slot(node))
+               return true;
 
-    assert(mode_is_float(mode));
+       if (is_sparc_Call(node))
+               return arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return;
 
-    if (bits == 32) {
-               be_emit_string("s");
-    } else if (bits == 64) {
-               be_emit_string("d");
-    } else {
-               panic("FP mode > 64bits not implemented yet");
-    }
+       return is_sparc_SMulh(node) || is_sparc_UMulh(node)
+               || is_sparc_SDiv(node) || is_sparc_UDiv(node)
+               || be_is_MemPerm(node) || be_is_Perm(node)
+               || is_sparc_SubSP(node);
 }
 
-/**
- * Returns the target label for a control flow node.
- */
-static void sparc_emit_cfop_target(const ir_node *node)
+static bool uses_reg(const ir_node *node, unsigned reg_index, unsigned width)
 {
-       ir_node *block = get_irn_link(node);
-       be_gas_emit_block_name(block);
+       int arity = get_irn_arity(node);
+       for (int i = 0; i < arity; ++i) {
+               const arch_register_t     *in_reg = arch_get_irn_register_in(node, i);
+               const arch_register_req_t *in_req = arch_get_irn_register_req_in(node, i);
+               if (in_reg == NULL)
+                       continue;
+               if (reg_index < (unsigned)in_reg->global_index + in_req->width
+                       && reg_index + width > in_reg->global_index)
+                       return true;
+       }
+       return false;
 }
 
-/**
- * Emit single entity
- */
-static void sparc_emit_entity(ir_entity *entity)
+static bool writes_reg(const ir_node *node, unsigned reg_index, unsigned width)
 {
-       be_gas_emit_entity(entity);
+       be_foreach_out(node, o) {
+               const arch_register_t     *out_reg = arch_get_irn_register_out(node, o);
+               if (out_reg == NULL)
+                       continue;
+               const arch_register_req_t *out_req = arch_get_irn_register_req_out(node, o);
+               if (reg_index < (unsigned)out_reg->global_index + out_req->width
+                       && reg_index + width > out_reg->global_index)
+                       return true;
+       }
+       return false;
 }
 
-/***********************************************************************************
- *                  _          __                                             _
- *                 (_)        / _|                                           | |
- *  _ __ ___   __ _ _ _ __   | |_ _ __ __ _ _ __ ___   _____      _____  _ __| | __
- * | '_ ` _ \ / _` | | '_ \  |  _| '__/ _` | '_ ` _ \ / _ \ \ /\ / / _ \| '__| |/ /
- * | | | | | | (_| | | | | | | | | | | (_| | | | | | |  __/\ V  V / (_) | |  |   <
- * |_| |_| |_|\__,_|_|_| |_| |_| |_|  \__,_|_| |_| |_|\___| \_/\_/ \___/|_|  |_|\_\
- *
- ***********************************************************************************/
+static bool is_legal_delay_slot_filler(const ir_node *node)
+{
+       if (is_no_instruction(node))
+               return false;
+       if (emits_multiple_instructions(node))
+               return false;
+       if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
+               return false;
+       return true;
+}
 
+static bool can_move_down_into_delayslot(const ir_node *node, const ir_node *to)
+{
+       if (!is_legal_delay_slot_filler(node))
+               return false;
+
+       if (!be_can_move_down(heights, node, to))
+               return false;
+
+       if (is_sparc_Call(to)) {
+               ir_node *check;
+               /** all inputs are used after the delay slot so, we're fine */
+               if (!is_sparc_reg_call(to))
+                       return true;
+
+               check = get_irn_n(to, get_sparc_Call_dest_addr_pos(to));
+               if (skip_Proj(check) == node)
+                       return false;
+
+               /* the Call also destroys the value of %o7, but since this is
+                * currently marked as ignore register in the backend, it
+                * should never be used by the instruction in the delay slot. */
+               if (uses_reg(node, REG_O7, 1))
+                       return false;
+               return true;
+       } else if (is_sparc_Return(to)) {
+               /* return uses the value of %o7, all other values are not
+                * immediately used */
+               if (writes_reg(node, REG_O7, 1))
+                       return false;
+               return true;
+       } else {
+               /* the node must not use our computed values */
+               int arity = get_irn_arity(to);
+               for (int i = 0; i < arity; ++i) {
+                       ir_node *in = get_irn_n(to, i);
+                       if (skip_Proj(in) == node)
+                               return false;
+               }
+               return true;
+       }
+}
 
-/**
- * Emits code for stack space management
- */
-static void emit_be_IncSP(const ir_node *irn)
+static bool can_move_up_into_delayslot(const ir_node *node, const ir_node *to)
 {
-       int offs = -be_get_IncSP_offset(irn);
+       if (!be_can_move_up(heights, node, to))
+               return false;
+
+       /* node must not use any results of 'to' */
+       int arity = get_irn_arity(node);
+       for (int i = 0; i < arity; ++i) {
+               ir_node *in      = get_irn_n(node, i);
+               ir_node *skipped = skip_Proj(in);
+               if (skipped == to)
+                       return false;
+       }
 
-       if (offs == 0)
-                       return;
+       /* register window cycling effects at Restore aren't correctly represented
+        * in the graph yet so we need this exception here */
+       if (is_sparc_Restore(node) || is_sparc_RestoreZero(node)) {
+               return false;
+       } else if (is_sparc_Call(to)) {
+               /* node must not overwrite any of the inputs of the call,
+                * (except for the dest_addr) */
+               int dest_addr_pos = is_sparc_reg_call(to)
+                       ? get_sparc_Call_dest_addr_pos(to) : -1;
+
+               int call_arity = get_irn_arity(to);
+               for (int i = 0; i < call_arity; ++i) {
+                       if (i == dest_addr_pos)
+                               continue;
+                       const arch_register_t *reg = arch_get_irn_register_in(to, i);
+                       if (reg == NULL)
+                               continue;
+                       const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
+                       if (writes_reg(node, reg->global_index, req->width))
+                               return false;
+               }
 
-       /* SPARC stack grows downwards */
-       if (offs < 0) {
-               be_emit_cstring("\tsub ");
-               offs = -offs;
-       } else {
-               be_emit_cstring("\tadd ");
+               /* node must not write to one of the call outputs */
+               be_foreach_out(to, o) {
+                       const arch_register_t *reg = arch_get_irn_register_out(to, o);
+                       if (reg == NULL)
+                               continue;
+                       const arch_register_req_t *req = arch_get_irn_register_req_out(to, o);
+                       if (writes_reg(node, reg->global_index, req->width))
+                               return false;
+               }
+       } else if (is_sparc_SDiv(to) || is_sparc_UDiv(to)) {
+               /* node will be inserted between wr and div so it must not overwrite
+                * anything except the wr input */
+               int arity = get_irn_arity(to);
+               for (int i = 0; i < arity; ++i) {
+                       assert((long)n_sparc_SDiv_dividend_high == (long)n_sparc_UDiv_dividend_high);
+                       if (i == n_sparc_SDiv_dividend_high)
+                               continue;
+                       const arch_register_t *reg = arch_get_irn_register_in(to, i);
+                       if (reg == NULL)
+                               continue;
+                       const arch_register_req_t *req = arch_get_irn_register_req_in(to, i);
+                       if (writes_reg(node, reg->global_index, req->width))
+                               return false;
+               }
+}
+       return true;
+}
+
+static void optimize_fallthrough(ir_node *node)
+{
+       ir_node *proj_true  = NULL;
+       ir_node *proj_false = NULL;
+
+       assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
+       assert((long)pn_sparc_Bicc_true  == (long)pn_sparc_fbfcc_true);
+       foreach_out_edge(node, edge) {
+               ir_node *proj = get_edge_src_irn(edge);
+               long nr = get_Proj_proj(proj);
+               if (nr == pn_sparc_Bicc_true) {
+                       proj_true = proj;
+               } else {
+                       assert(nr == pn_sparc_Bicc_false);
+                       proj_false = proj;
+               }
        }
+       assert(proj_true != NULL && proj_false != NULL);
+
+       /* for now, the code works for scheduled and non-schedules blocks */
+       const ir_node *block = get_nodes_block(node);
+
+       /* we have a block schedule */
+       const ir_node *next_block = (ir_node*)get_irn_link(block);
+
+       if (get_jump_target(proj_true) == next_block) {
+               /* exchange both proj destinations so the second one can be omitted */
+               set_Proj_proj(proj_true,  pn_sparc_Bicc_false);
+               set_Proj_proj(proj_false, pn_sparc_Bicc_true);
 
-       sparc_emit_source_register(irn, 0);
-       be_emit_irprintf(", %d", offs);
-       be_emit_cstring(", ");
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
+               sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
+               attr->relation = get_negated_relation(attr->relation);
+       }
 }
 
 /**
- * emits code for save instruction with min. required stack space
+ * search for an instruction that can fill the delay slot of @p node
  */
-static void emit_sparc_Save(const ir_node *irn)
+static ir_node *pick_delay_slot_for(ir_node *node)
 {
-       save_attr = get_sparc_save_attr_const(irn);
-       be_emit_cstring("\tsave ");
-       sparc_emit_source_register(irn, 0);
-       be_emit_irprintf(", %d, ", -save_attr->initial_stacksize);
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
+       static const unsigned PICK_DELAY_SLOT_MAX_DISTANCE = 10;
+       assert(has_delay_slot(node));
+
+       if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
+               optimize_fallthrough(node);
+       }
+
+       unsigned tries = 0;
+       sched_foreach_reverse_from(sched_prev(node), schedpoint) {
+               if (has_delay_slot(schedpoint))
+                       break;
+               if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
+                       break;
+
+               if (!can_move_down_into_delayslot(schedpoint, node))
+                       continue;
+
+               /* found something */
+               return schedpoint;
+       }
+
+       /* search after the current position */
+       tries = 0;
+       sched_foreach_from(sched_next(node), schedpoint) {
+               if (has_delay_slot(schedpoint))
+                       break;
+               if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
+                       break;
+               if (!is_legal_delay_slot_filler(schedpoint))
+                       continue;
+               if (!can_move_up_into_delayslot(schedpoint, node))
+                       continue;
+
+               /* found something */
+               return schedpoint;
+       }
+
+       /* look in successor blocks */
+       ir_node *block = get_nodes_block(node);
+       /* TODO: sort succs by execution frequency */
+       foreach_block_succ(block, edge) {
+               ir_node *succ = get_edge_src_irn(edge);
+               /* we can't easily move up stuff from blocks with multiple predecessors
+                * since the instruction is lacking for the other preds then.
+                * (We also don't have to do any phi translation) */
+               if (get_Block_n_cfgpreds(succ) > 1)
+                       continue;
+
+               tries = 0;
+               sched_foreach(succ, schedpoint) {
+                       if (has_delay_slot(schedpoint))
+                               break;
+                       /* can't move pinned nodes accross blocks */
+                       if (get_irn_pinned(schedpoint) == op_pin_state_pinned)
+                               continue;
+                       /* restore doesn't model register window switching correctly,
+                        * so it appears like we could move it, which is not true */
+                       if (is_sparc_Restore(schedpoint)
+                           || is_sparc_RestoreZero(schedpoint))
+                               continue;
+                       if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE)
+                               break;
+                       if (!is_legal_delay_slot_filler(schedpoint))
+                               continue;
+                       if (can_move_up_into_delayslot(schedpoint, node)) {
+                               /* it's fine to move the insn accross blocks */
+                               return schedpoint;
+                       } else if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) {
+                               ir_node *proj = get_Block_cfgpred(succ, 0);
+                               long     nr   = get_Proj_proj(proj);
+                               if ((nr == pn_sparc_Bicc_true || nr == pn_sparc_fbfcc_true)
+                                       && be_can_move_up(heights, schedpoint, succ)) {
+                                       /* we can use it with the annul flag */
+                                       sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node);
+                                       attr->annul_delay_slot = true;
+                                       return schedpoint;
+                               }
+                       }
+               }
+       }
+
+       return NULL;
 }
 
-/**
- * emits code to load hi 22 bit of a constant
- */
-static void emit_sparc_HiImm(const ir_node *irn)
+void sparc_emitf(ir_node const *const node, char const *fmt, ...)
 {
-       const sparc_attr_t *attr = get_sparc_attr_const(irn);
-       be_emit_cstring("\tsethi ");
-       be_emit_irprintf("%%hi(%d), ", attr->immediate_value);
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
+       va_list ap;
+       va_start(ap, fmt);
+       sparc_emit_indent();
+       for (;;) {
+               char const *start = fmt;
+
+               while (*fmt != '%' && *fmt != '\0')
+                       ++fmt;
+               be_emit_string_len(start, fmt - start);
+               if (*fmt == '\0')
+                       break;
+               ++fmt;
+
+               bool plus = false;
+               if (*fmt == '+') {
+                       plus = true;
+                       ++fmt;
+               }
+
+               switch (*fmt++) {
+               case '%':
+                       be_emit_char('%');
+                       break;
+
+               case 'A': {
+                       const sparc_jmp_cond_attr_t *attr
+                               = get_sparc_jmp_cond_attr_const(node);
+                       if (attr->annul_delay_slot) {
+                               be_emit_cstring(",a");
+                       }
+                       break;
+               }
+
+               case 'D':
+                       if (*fmt < '0' || '9' <= *fmt)
+                               goto unknown;
+                       sparc_emit_dest_register(node, *fmt++ - '0');
+                       break;
+
+               case 'E': {
+                       sparc_attr_t const *const attr = get_sparc_attr_const(node);
+                       be_gas_emit_entity(attr->immediate_value_entity);
+                       if (attr->immediate_value != 0) {
+                               be_emit_irprintf(plus ? "%+d" : "%d", attr->immediate_value);
+                       }
+                       break;
+               }
+
+               case 'F': {
+                       ir_mode *mode;
+                       switch (*fmt++) {
+                       case 'D': mode = get_sparc_fp_conv_attr_const(node)->dest_mode; break;
+                       case 'M': mode = get_sparc_fp_attr_const(node)->fp_mode;        break;
+                       case 'S': mode = get_sparc_fp_conv_attr_const(node)->src_mode;  break;
+                       default:  goto unknown;
+                       }
+                       emit_fp_suffix(mode);
+                       break;
+               }
+
+               case 'H':
+                       sparc_emit_high_immediate(node);
+                       break;
+
+               case 'L': {
+                       ir_node *n = va_arg(ap, ir_node*);
+                       sparc_emit_cfop_target(n);
+                       break;
+               }
+
+               case 'M':
+                       switch (*fmt++) {
+                       case 'L': sparc_emit_load_mode(node);  break;
+                       case 'S': sparc_emit_store_mode(node); break;
+                       default:  goto unknown;
+                       }
+                       break;
+
+               case 'O':
+                       if (*fmt < '0' || '9' <= *fmt)
+                               goto unknown;
+                       sparc_emit_offset(node, *fmt++ - '0');
+                       break;
+
+               case 'R': {
+                       arch_register_t const *const reg = va_arg(ap, const arch_register_t*);
+                       be_emit_char('%');
+                       be_emit_string(reg->name);
+                       break;
+               }
+
+               case 'S': {
+                       bool imm = false;
+                       if (*fmt == 'I') {
+                               imm = true;
+                               ++fmt;
+                       }
+                       if (*fmt < '0' || '9' <= *fmt)
+                               goto unknown;
+                       unsigned const pos = *fmt++ - '0';
+                       if (imm && arch_get_irn_flags(node) & (arch_irn_flags_t)sparc_arch_irn_flag_immediate_form) {
+                               sparc_emit_immediate(node);
+                       } else {
+                               sparc_emit_source_register(node, pos);
+                       }
+                       break;
+               }
+
+               case 'd': {
+                       int const num = va_arg(ap, int);
+                       be_emit_irprintf(plus ? "%+d" : "%d", num);
+                       break;
+               }
+
+               case 's': {
+                       char const *const str = va_arg(ap, char const*);
+                       be_emit_string(str);
+                       break;
+               }
+
+               case 'u': {
+                       unsigned const num = va_arg(ap, unsigned);
+                       be_emit_irprintf(plus ? "%+u" : "%u", num);
+                       break;
+               }
+
+               default:
+unknown:
+                       panic("unknown format conversion in sparc_emitf()");
+               }
+       }
+       be_emit_finish_line_gas(node);
+       va_end(ap);
 }
 
 /**
- * emits code to load lo 10bits of a constant
+ * Emits code for stack space management
  */
-static void emit_sparc_LoImm(const ir_node *irn)
+static void emit_be_IncSP(const ir_node *irn)
 {
-       const sparc_attr_t *attr = get_sparc_attr_const(irn);
-       be_emit_cstring("\tor ");
-       sparc_emit_source_register(irn, 0);
-       be_emit_irprintf(", %%lo(%d), ", attr->immediate_value);
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
+       int offset = be_get_IncSP_offset(irn);
+
+       if (offset == 0)
+               return;
+
+       /* SPARC stack grows downwards */
+       char const *const insn = offset > 0 ? offset = -offset, "add" : "sub";
+       sparc_emitf(irn, "%s %S0, %d, %D0", insn, offset);
 }
 
 /**
- * emit code for div with the correct sign prefix
+ * Emits code for stack space management.
  */
-static void emit_sparc_Div(const ir_node *irn)
+static void emit_sparc_SubSP(const ir_node *irn)
 {
-       be_emit_cstring("\t");
-       sparc_emit_mode_sign_prefix(irn);
-       be_emit_cstring("div ");
+       sparc_emitf(irn, "sub %S0, %SI1, %D0");
+       sparc_emitf(irn, "add %S0, %u, %D1", SPARC_MIN_STACKSIZE);
+}
 
-       sparc_emit_source_register(irn, 0);
-       be_emit_cstring(", ");
-       sparc_emit_reg_or_imm(irn, 1);
-       be_emit_cstring(", ");
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
+static void fill_delay_slot(const ir_node *node)
+{
+       emitting_delay_slot = true;
+       const ir_node *filler = pmap_get(ir_node, delay_slots, node);
+       if (filler != NULL) {
+               assert(!is_no_instruction(filler));
+               assert(!emits_multiple_instructions(filler));
+               be_emit_node(filler);
+       } else {
+               sparc_emitf(NULL, "nop");
+       }
+       emitting_delay_slot = false;
 }
 
-/**
- * emit code for mul with the correct sign prefix
- */
-static void emit_sparc_Mul(const ir_node *irn)
+static void emit_sparc_Div(const ir_node *node, char const *const insn)
 {
-       be_emit_cstring("\t");
-       sparc_emit_mode_sign_prefix(irn);
-       be_emit_cstring("mul ");
+       sparc_emitf(node, "wr %S0, 0, %%y");
+
+       /* TODO: we should specify number of delayslots in an architecture
+        * specification */
+       unsigned wry_delay_count = 3;
+       for (unsigned i = 0; i < wry_delay_count; ++i) {
+               if (i == 0) {
+                       fill_delay_slot(node);
+               } else {
+                       emitting_delay_slot = true;
+                       sparc_emitf(NULL, "nop");
+                       emitting_delay_slot = false;
+               }
+       }
 
-       sparc_emit_source_register(irn, 0);
-       be_emit_cstring(", ");
-       sparc_emit_reg_or_imm(irn, 1);
-       be_emit_cstring(", ");
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
+       sparc_emitf(node, "%s %S1, %SI2, %D0", insn);
 }
 
-/**
- * emits code for mulh
- */
-static void emit_sparc_Mulh(const ir_node *irn)
-{
-       be_emit_cstring("\t");
-       sparc_emit_mode_sign_prefix(irn);
-       be_emit_cstring("mul ");
-
-       sparc_emit_source_register(irn, 0);
-       be_emit_cstring(", ");
-       sparc_emit_reg_or_imm(irn, 1);
-       be_emit_cstring(", ");
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
-
-       // our result is in the y register now
-       // we just copy it to the assigned target reg
-       be_emit_cstring("\tmov ");
-       be_emit_char('%');
-       be_emit_string(arch_register_get_name(&sparc_flags_regs[REG_Y]));
-       be_emit_cstring(", ");
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
+static void emit_sparc_SDiv(const ir_node *node)
+{
+       emit_sparc_Div(node, "sdiv");
 }
 
-/**
- * Emits code for return node
- */
-static void emit_be_Return(const ir_node *irn)
+static void emit_sparc_UDiv(const ir_node *node)
 {
-       be_emit_cstring("\tret");
-       //be_emit_cstring("\tjmp %i7+8");
-       be_emit_finish_line_gas(irn);
-       be_emit_cstring("\trestore");
-       be_emit_finish_line_gas(irn);
+       emit_sparc_Div(node, "udiv");
 }
 
-/**
- * Emits code for Call node
- */
-static void emit_be_Call(const ir_node *irn)
-{
-       ir_entity *entity = be_Call_get_entity(irn);
-
-       if (entity != NULL) {
-               be_emit_cstring("\tcall ");
-           sparc_emit_entity(entity);
-               be_emit_cstring(", 0");
-               be_emit_finish_line_gas(irn);
-               be_emit_cstring("\tnop");
-               be_emit_pad_comment();
-               be_emit_cstring("/* TODO: use delay slot */\n");
+static void emit_sparc_Call(const ir_node *node)
+{
+       if (is_sparc_reg_call(node)) {
+               int dest_addr = get_sparc_Call_dest_addr_pos(node);
+               sparc_emitf(node, "call %R", arch_get_irn_register_in(node, dest_addr));
        } else {
-               be_emit_cstring("\tnop\n");
-               be_emit_pad_comment();
-               be_emit_cstring("/* TODO: Entity == NULL */\n");
-               be_emit_finish_line_gas(irn);
+               sparc_emitf(node, "call %E, 0");
+       }
+
+       fill_delay_slot(node);
+
+       if (arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return) {
+               sparc_emitf(NULL, "unimp 8");
        }
 }
 
-/**
- * Emit code for Perm node
- */
 static void emit_be_Perm(const ir_node *irn)
 {
-       be_emit_cstring("\txor ");
-       sparc_emit_source_register(irn, 1);
-       be_emit_cstring(", ");
-       sparc_emit_source_register(irn, 0);
-       be_emit_cstring(", ");
-       sparc_emit_source_register(irn, 0);
-       be_emit_finish_line_gas(NULL);
-
-       be_emit_cstring("\txor ");
-       sparc_emit_source_register(irn, 1);
-       be_emit_cstring(", ");
-       sparc_emit_source_register(irn, 0);
-       be_emit_cstring(", ");
-       sparc_emit_source_register(irn, 1);
-       be_emit_finish_line_gas(NULL);
-
-       be_emit_cstring("\txor ");
-       sparc_emit_source_register(irn, 1);
-       be_emit_cstring(", ");
-       sparc_emit_source_register(irn, 0);
-       be_emit_cstring(", ");
-       sparc_emit_source_register(irn, 0);
-       be_emit_finish_line_gas(irn);
+       ir_mode *mode = get_irn_mode(get_irn_n(irn, 0));
+       if (mode_is_float(mode)) {
+               const arch_register_t *reg0 = arch_get_irn_register_in(irn, 0);
+               const arch_register_t *reg1 = arch_get_irn_register_in(irn, 1);
+               unsigned reg_idx0 = reg0->global_index;
+               unsigned reg_idx1 = reg1->global_index;
+               unsigned width    = arch_get_irn_register_req_in(irn, 0)->width;
+               for (unsigned i = 0; i < width; ++i) {
+                       const arch_register_t *r0 = &sparc_registers[reg_idx0+i];
+                       const arch_register_t *r1 = &sparc_registers[reg_idx1+i];
+                       sparc_emitf(irn, "fmovs %R, %%f31", r0);
+                       sparc_emitf(irn, "fmovs %R, %R", r1, r0);
+                       sparc_emitf(irn, "fmovs %%f31, %R", r1);
+               }
+       } else {
+               sparc_emitf(irn, "xor %S1, %S0, %S0");
+               sparc_emitf(irn, "xor %S1, %S0, %S1");
+               sparc_emitf(irn, "xor %S1, %S0, %S0");
+       }
 }
 
-/**
- * TODO: not really tested but seems to work with memperm_arity == 1
- */
-static void emit_be_MemPerm(const ir_node *node)
+/* The stack pointer must always be SPARC_STACK_ALIGNMENT bytes aligned, so get
+ * the next bigger integer that's evenly divisible by it. */
+static unsigned get_aligned_sp_change(const unsigned num_regs)
 {
-       int i;
-       int memperm_arity;
-       int sp_change = 0;
-
-       /* TODO: this implementation is slower than necessary.
-          The longterm goal is however to avoid the memperm node completely */
+       const unsigned bytes = num_regs * SPARC_REGISTER_SIZE;
+       return round_up2(bytes, SPARC_STACK_ALIGNMENT);
+}
 
-       memperm_arity = be_get_MemPerm_entity_arity(node);
-       // we use our local registers - so this is limited to 8 inputs !
-       if (memperm_arity > 8)
-               panic("memperm with more than 8 inputs not supported yet");
+/* Spill register l0 or both l0 and l1, depending on n_spilled and n_to_spill.*/
+static void memperm_emit_spill_registers(const ir_node *node, int n_spilled,
+                                         int n_to_spill)
+{
+       assert(n_spilled < n_to_spill);
 
-       for (i = 0; i < memperm_arity; ++i) {
-               int offset;
-               ir_entity *entity = be_get_MemPerm_in_entity(node, i);
+       if (n_spilled == 0) {
+               /* We always reserve stack space for two registers because during copy
+                * processing we don't know yet if we also need to handle a cycle which
+                * needs two registers.  More complicated code in emit_MemPerm would
+                * prevent wasting SPARC_REGISTER_SIZE bytes of stack space but
+                * it is not worth the worse readability of emit_MemPerm. */
 
-               /* spill register */
-               sp_change += 4;
-               be_emit_irprintf("\tst %%l%d, [%%sp-%d]", i, sp_change);
-               be_emit_finish_line_gas(node);
+               /* Keep stack pointer aligned. */
+               unsigned sp_change = get_aligned_sp_change(2);
+               sparc_emitf(node, "sub %%sp, %u, %%sp", sp_change);
 
-               /* load from entity */
-               offset = get_entity_offset(entity) + sp_change;
-               be_emit_irprintf("\tld [%%sp+%d], %%l%d", offset, i);
-               be_emit_finish_line_gas(node);
+               /* Spill register l0. */
+               sparc_emitf(node, "st %%l0, [%%sp%+d]", SPARC_MIN_STACKSIZE);
        }
 
-       for (i = memperm_arity-1; i >= 0; --i) {
-               int        offset;
-               ir_entity *entity = be_get_MemPerm_out_entity(node, i);
+       if (n_to_spill == 2) {
+               /* Spill register l1. */
+               sparc_emitf(node, "st %%l1, [%%sp%+d]", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
+       }
+}
 
-               /* store to new entity */
-               offset = get_entity_offset(entity) + sp_change;
-               be_emit_irprintf("\tst %%l%d, [%%sp+%d]", i, offset);
-               be_emit_finish_line_gas(node);
-               /* restore register */
-               be_emit_irprintf("\tld [%%sp-%d], %%l%d", sp_change, i);
-               sp_change -= 4;
-               be_emit_finish_line_gas(node);
+/* Restore register l0 or both l0 and l1, depending on n_spilled. */
+static void memperm_emit_restore_registers(const ir_node *node, int n_spilled)
+{
+       if (n_spilled == 2) {
+               /* Restore register l1. */
+               sparc_emitf(node, "ld [%%sp%+d], %%l1", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE);
        }
-       assert(sp_change == 0);
+
+       /* Restore register l0. */
+       sparc_emitf(node, "ld [%%sp%+d], %%l0", SPARC_MIN_STACKSIZE);
+
+       /* Restore stack pointer. */
+       unsigned sp_change = get_aligned_sp_change(2);
+       sparc_emitf(node, "add %%sp, %u, %%sp", sp_change);
 }
 
-/**
- * Emit a SymConst.
- */
-static void emit_sparc_SymConst(const ir_node *irn)
+/* Emit code to copy in_ent to out_ent.  Only uses l0. */
+static void memperm_emit_copy(const ir_node *node, ir_entity *in_ent,
+                              ir_entity *out_ent)
 {
-       const sparc_symconst_attr_t *attr = get_sparc_symconst_attr_const(irn);
+       ir_graph          *irg     = get_irn_irg(node);
+       be_stack_layout_t *layout  = be_get_irg_stack_layout(irg);
+       int                off_in  = be_get_stack_entity_offset(layout, in_ent, 0);
+       int                off_out = be_get_stack_entity_offset(layout, out_ent, 0);
+
+       /* Load from input entity. */
+       sparc_emitf(node, "ld [%%fp%+d], %%l0", off_in);
+       /* Store to output entity. */
+       sparc_emitf(node, "st %%l0, [%%fp%+d]", off_out);
+}
 
-       //sethi %hi(const32),%reg
-       //or    %reg,%lo(const32),%reg
+/* Emit code to swap ent1 and ent2.  Uses l0 and l1. */
+static void memperm_emit_swap(const ir_node *node, ir_entity *ent1,
+                              ir_entity *ent2)
+{
+       ir_graph          *irg     = get_irn_irg(node);
+       be_stack_layout_t *layout  = be_get_irg_stack_layout(irg);
+       int                off1    = be_get_stack_entity_offset(layout, ent1, 0);
+       int                off2    = be_get_stack_entity_offset(layout, ent2, 0);
+
+       /* Load from first input entity. */
+       sparc_emitf(node, "ld [%%fp%+d], %%l0", off1);
+       /* Load from second input entity. */
+       sparc_emitf(node, "ld [%%fp%+d], %%l1", off2);
+       /* Store first value to second output entity. */
+       sparc_emitf(node, "st %%l0, [%%fp%+d]", off2);
+       /* Store second value to first output entity. */
+       sparc_emitf(node, "st %%l1, [%%fp%+d]", off1);
+}
 
-       be_emit_cstring("\tsethi %hi(");
-       be_gas_emit_entity(attr->entity);
-       be_emit_cstring("), ");
-       sparc_emit_dest_register(irn, 0);
-       be_emit_cstring("\n ");
+/* Find the index of ent in ents or return -1 if not found. */
+static int get_index(ir_entity **ents, int n, ir_entity *ent)
+{
+       for (int i = 0; i < n; ++i) {
+               if (ents[i] == ent)
+                       return i;
+       }
 
-       // TODO: could be combined with the following load/store instruction
-       be_emit_cstring("\tor ");
-       sparc_emit_dest_register(irn, 0);
-       be_emit_cstring(", %lo(");
-       be_gas_emit_entity(attr->entity);
-       be_emit_cstring("), ");
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
+       return -1;
 }
 
-/**
- * Emits code for FrameAddr fix
+/*
+ * Emit code for a MemPerm node.
+ *
+ * Analyze MemPerm for copy chains and cyclic swaps and resolve them using
+ * loads and stores.
+ * This function is conceptually very similar to permute_values in
+ * beprefalloc.c.
  */
-static void emit_sparc_FrameAddr(const ir_node *irn)
+static void emit_be_MemPerm(const ir_node *node)
 {
-       const sparc_symconst_attr_t *attr = get_irn_generic_attr_const(irn);
+       int         memperm_arity = be_get_MemPerm_entity_arity(node);
+       /* Upper limit for the number of participating entities is twice the
+        * arity, e.g., for a simple copying MemPerm node with one input/output. */
+       int         max_size      = 2 * memperm_arity;
+       ir_entity **entities      = ALLOCANZ(ir_entity *, max_size);
+       /* sourceof contains the input entity for each entity.  If an entity is
+        * never used as an output, its entry in sourceof is a fix point. */
+       int        *sourceof      = ALLOCANZ(int,         max_size);
+       /* n_users counts how many output entities use this entity as their input.*/
+       int        *n_users       = ALLOCANZ(int,         max_size);
+       /* n_spilled records the number of spilled registers, either 1 or 2. */
+       int         n_spilled     = 0;
+
+       /* This implementation currently only works with frame pointers. */
+       ir_graph          *irg    = get_irn_irg(node);
+       be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
+       assert(!layout->sp_relative && "MemPerms currently do not work without frame pointers");
+
+       for (int i = 0; i < max_size; ++i) {
+               sourceof[i] = i;
+       }
 
-       // no need to fix offset as we are adressing via the framepointer
-       if (attr->fp_offset >= 0) {
-               be_emit_cstring("\tadd ");
-               sparc_emit_source_register(irn, 0);
-               be_emit_cstring(", ");
-               be_emit_irprintf("%ld", attr->fp_offset + save_attr->initial_stacksize);
-       } else {
-               be_emit_cstring("\tsub ");
-               sparc_emit_source_register(irn, 0);
-               be_emit_cstring(", ");
-               be_emit_irprintf("%ld", -attr->fp_offset);
+       int n = 0;
+       for (int i = 0; i < memperm_arity; ++i) {
+               ir_entity *out  = be_get_MemPerm_out_entity(node, i);
+               ir_entity *in   = be_get_MemPerm_in_entity(node, i);
+
+               /* Insert into entities to be able to operate on unique indices. */
+               if (get_index(entities, n, out) == -1)
+                       entities[n++] = out;
+               if (get_index(entities, n, in) == -1)
+                       entities[n++] = in;
+
+               int oidx = get_index(entities, n, out);
+               int iidx = get_index(entities, n, in);
+
+               sourceof[oidx] = iidx; /* Remember the source. */
+               ++n_users[iidx]; /* Increment number of users of this entity. */
        }
 
-       be_emit_cstring(", ");
-       sparc_emit_dest_register(irn, 0);
-       be_emit_finish_line_gas(irn);
-}
+       /* First do all the copies. */
+       for (int oidx = 0; oidx < n; /* empty */) {
+               int iidx = sourceof[oidx];
 
+               /* Nothing to do for fix points.
+                * Also, if entities[oidx] is used as an input by another copy, we
+                * can't overwrite entities[oidx] yet.*/
+               if (iidx == oidx || n_users[oidx] > 0) {
+                       ++oidx;
+                       continue;
+               }
 
-/**
- * Emits code for Branch
- */
-static void emit_sparc_Branch(const ir_node *irn)
-{
-       const ir_edge_t *edge;
-       const ir_node *proj_true  = NULL;
-       const ir_node *proj_false = NULL;
-       const ir_node *block;
-       const ir_node *next_block;
-       ir_node *op1 = get_irn_n(irn, 0);
-       const char *suffix;
-       int proj_num = get_sparc_jmp_cond_proj_num(irn);
-       const sparc_cmp_attr_t *cmp_attr = get_irn_generic_attr_const(op1);
-       // bool is_signed = !cmp_attr->is_unsigned;
-
-       assert(is_sparc_Cmp(op1) || is_sparc_Tst(op1));
-
-       foreach_out_edge(irn, edge) {
-               ir_node *proj = get_edge_src_irn(edge);
-               long nr = get_Proj_proj(proj);
-               if (nr == pn_Cond_true) {
-                       proj_true = proj;
+               /* We found the end of a 'chain', so do the copy. */
+               if (n_spilled == 0) {
+                       memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/1);
+                       n_spilled = 1;
+               }
+               memperm_emit_copy(node, entities[iidx], entities[oidx]);
+
+               /* Mark as done. */
+               sourceof[oidx] = oidx;
+
+               assert(n_users[iidx] > 0);
+               /* Decrementing the number of users might enable us to do another
+                * copy. */
+               --n_users[iidx];
+
+               if (iidx < oidx && n_users[iidx] == 0) {
+                       oidx = iidx;
                } else {
-                       proj_false = proj;
+                       ++oidx;
                }
        }
 
-       if (cmp_attr->ins_permuted) {
-               proj_num = get_mirrored_pnc(proj_num);
-       }
+       /* The rest are cycles. */
+       for (int oidx = 0; oidx < n; /* empty */) {
+               int iidx = sourceof[oidx];
 
-       /* for now, the code works for scheduled and non-schedules blocks */
-       block = get_nodes_block(irn);
+               /* Nothing to do for fix points. */
+               if (iidx == oidx) {
+                       ++oidx;
+                       continue;
+               }
 
-       /* we have a block schedule */
-       next_block = get_irn_link(block);
+               assert(n_users[iidx] == 1);
+
+               /* Swap the two values to resolve the cycle. */
+               if (n_spilled < 2) {
+                       memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/2);
+                       n_spilled = 2;
+               }
+               memperm_emit_swap(node, entities[iidx], entities[oidx]);
 
-       assert(proj_num != pn_Cmp_False);
-       assert(proj_num != pn_Cmp_True);
+               int tidx = sourceof[iidx];
+               /* Mark as done. */
+               sourceof[iidx] = iidx;
 
-       if (get_irn_link(proj_true) == next_block) {
-               /* exchange both proj's so the second one can be omitted */
-               const ir_node *t = proj_true;
+               /* The source of oidx is now the old source of iidx, because we swapped
+                * the two entities. */
+               sourceof[oidx] = tidx;
+       }
 
-               proj_true  = proj_false;
-               proj_false = t;
-               proj_num   = get_negated_pnc(proj_num, mode_Iu);
+#ifdef DEBUG_libfirm
+       /* Only fix points should remain. */
+       for (int i = 0; i < max_size; ++i) {
+               assert(sourceof[i] == i);
        }
+#endif
+
+       assert(n_spilled > 0 && "Useless MemPerm node");
 
+       memperm_emit_restore_registers(node, n_spilled);
+}
 
-       switch (proj_num) {
-               case pn_Cmp_Eq:  suffix = "e"; break;
-               case pn_Cmp_Lt:  suffix = "l"; break;
-               case pn_Cmp_Le:  suffix = "le"; break;
-               case pn_Cmp_Gt:  suffix = "g"; break;
-               case pn_Cmp_Ge:  suffix = "ge"; break;
-               case pn_Cmp_Lg:  suffix = "ne"; break;
-               case pn_Cmp_Leg: suffix = "a"; break;
-               default: panic("Cmp has unsupported pnc");
+static void emit_sparc_Return(const ir_node *node)
+{
+       ir_graph  *irg    = get_irn_irg(node);
+       ir_entity *entity = get_irg_entity(irg);
+       ir_type   *type   = get_entity_type(entity);
+
+       const char *destreg = "%o7";
+
+       /* hack: we don't explicitely model register changes because of the
+        * restore node. So we have to do it manually here */
+       const ir_node *delay_slot = pmap_get(ir_node, delay_slots, node);
+       if (delay_slot != NULL &&
+           (is_sparc_Restore(delay_slot) || is_sparc_RestoreZero(delay_slot))) {
+               destreg = "%i7";
        }
+       char const *const offset = get_method_calling_convention(type) & cc_compound_ret ? "12" : "8";
+       sparc_emitf(node, "jmp %s+%s", destreg, offset);
+       fill_delay_slot(node);
+}
 
-       /* emit the true proj */
-       be_emit_irprintf("\tb%s ", suffix);
-       sparc_emit_cfop_target(proj_true);
-       be_emit_finish_line_gas(proj_true);
-
-       be_emit_cstring("\tnop");
-       be_emit_pad_comment();
-       be_emit_cstring("/* TODO: use delay slot */\n");
-
-       if (get_irn_link(proj_false) == next_block) {
-               be_emit_cstring("\t/* false-fallthrough to ");
-               sparc_emit_cfop_target(proj_false);
-               be_emit_cstring(" */");
-               be_emit_finish_line_gas(proj_false);
-       } else {
-               be_emit_cstring("\tba ");
-               sparc_emit_cfop_target(proj_false);
-               be_emit_finish_line_gas(proj_false);
-               be_emit_cstring("\tnop\t\t/* TODO: use delay slot */\n");
-               be_emit_finish_line_gas(proj_false);
+static const arch_register_t *map_i_to_o_reg(const arch_register_t *reg)
+{
+       unsigned idx = reg->global_index;
+       if (idx < REG_I0 || idx > REG_I7)
+               return reg;
+       idx += REG_O0 - REG_I0;
+       assert(REG_O0 <= idx && idx <= REG_O7);
+       return &sparc_registers[idx];
+}
+
+static void emit_sparc_Restore(const ir_node *node)
+{
+       const arch_register_t *destreg
+               = arch_get_irn_register_out(node, pn_sparc_Restore_res);
+       sparc_emitf(node, "restore %S2, %SI3, %R", map_i_to_o_reg(destreg));
+}
+
+static void emit_sparc_FrameAddr(const ir_node *node)
+{
+       const sparc_attr_t *attr   = get_sparc_attr_const(node);
+       int32_t             offset = attr->immediate_value;
+
+       char const *const insn = offset > 0 ? offset = -offset, "sub" : "add";
+       assert(sparc_is_value_imm_encodeable(offset));
+       sparc_emitf(node, "%s %S0, %d, %D0", insn, offset);
+}
+
+static const char *get_icc_unsigned(ir_relation relation)
+{
+       switch (relation & (ir_relation_less_equal_greater)) {
+       case ir_relation_false:              return "bn";
+       case ir_relation_equal:              return "be";
+       case ir_relation_less:               return "blu";
+       case ir_relation_less_equal:         return "bleu";
+       case ir_relation_greater:            return "bgu";
+       case ir_relation_greater_equal:      return "bgeu";
+       case ir_relation_less_greater:       return "bne";
+       case ir_relation_less_equal_greater: return "ba";
+       default: panic("Cmp has unsupported relation");
        }
 }
 
-/**
- * emit Jmp (which actually is a branch always (ba) instruction)
- */
-static void emit_sparc_Jmp(const ir_node *node)
+static const char *get_icc_signed(ir_relation relation)
 {
-       ir_node *block, *next_block;
+       switch (relation & (ir_relation_less_equal_greater)) {
+       case ir_relation_false:              return "bn";
+       case ir_relation_equal:              return "be";
+       case ir_relation_less:               return "bl";
+       case ir_relation_less_equal:         return "ble";
+       case ir_relation_greater:            return "bg";
+       case ir_relation_greater_equal:      return "bge";
+       case ir_relation_less_greater:       return "bne";
+       case ir_relation_less_equal_greater: return "ba";
+       default: panic("Cmp has unsupported relation");
+       }
+}
 
-       /* for now, the code works for scheduled and non-schedules blocks */
-       block = get_nodes_block(node);
+static const char *get_fcc(ir_relation relation)
+{
+       switch (relation) {
+       case ir_relation_false:                   return "fbn";
+       case ir_relation_equal:                   return "fbe";
+       case ir_relation_less:                    return "fbl";
+       case ir_relation_less_equal:              return "fble";
+       case ir_relation_greater:                 return "fbg";
+       case ir_relation_greater_equal:           return "fbge";
+       case ir_relation_less_greater:            return "fblg";
+       case ir_relation_less_equal_greater:      return "fbo";
+       case ir_relation_unordered:               return "fbu";
+       case ir_relation_unordered_equal:         return "fbue";
+       case ir_relation_unordered_less:          return "fbul";
+       case ir_relation_unordered_less_equal:    return "fbule";
+       case ir_relation_unordered_greater:       return "fbug";
+       case ir_relation_unordered_greater_equal: return "fbuge";
+       case ir_relation_unordered_less_greater:  return "fbne";
+       case ir_relation_true:                    return "fba";
+       }
+       panic("invalid relation");
+}
 
-       /* we have a block schedule */
-       next_block = get_irn_link(block);
-       if (get_irn_link(node) != next_block) {
-               be_emit_cstring("\tba ");
-               sparc_emit_cfop_target(node);
-               be_emit_finish_line_gas(node);
-               be_emit_cstring("\tnop\t\t/* TODO: use delay slot */\n");
+typedef const char* (*get_cc_func)(ir_relation relation);
+
+static void emit_sparc_branch(const ir_node *node, get_cc_func get_cc)
+{
+       const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
+       ir_relation    relation    = attr->relation;
+       const ir_node *proj_true   = NULL;
+       const ir_node *proj_false  = NULL;
+
+       assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false);
+       assert((long)pn_sparc_Bicc_true  == (long)pn_sparc_fbfcc_true);
+       foreach_out_edge(node, edge) {
+               ir_node *proj = get_edge_src_irn(edge);
+               long nr = get_Proj_proj(proj);
+               if (nr == pn_sparc_Bicc_true) {
+                       proj_true = proj;
+               } else {
+                       assert(nr == pn_sparc_Bicc_false);
+                       proj_false = proj;
+               }
+       }
+
+       /* emit the true proj */
+       sparc_emitf(node, "%s%A %L", get_cc(relation), proj_true);
+       fill_delay_slot(node);
+
+       const ir_node *block      = get_nodes_block(node);
+       const ir_node *next_block = (ir_node*)get_irn_link(block);
+
+       if (get_jump_target(proj_false) == next_block) {
+               if (be_options.verbose_asm) {
+                       sparc_emitf(node, "/* fallthrough to %L */", proj_false);
+               }
        } else {
-               be_emit_cstring("\t/* fallthrough to ");
-               sparc_emit_cfop_target(node);
-               be_emit_cstring(" */");
+               sparc_emitf(node, "ba %L", proj_false);
+               /* TODO: fill this slot as well */
+               emitting_delay_slot = true;
+               sparc_emitf(NULL, "nop");
+               emitting_delay_slot = false;
        }
-       be_emit_finish_line_gas(node);
 }
 
-/**
- * emit copy node
- */
-static void emit_be_Copy(const ir_node *irn)
+static void emit_sparc_Bicc(const ir_node *node)
 {
-       ir_mode *mode = get_irn_mode(irn);
+       const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node);
+       bool             is_unsigned = attr->is_unsigned;
+       emit_sparc_branch(node, is_unsigned ? get_icc_unsigned : get_icc_signed);
+}
 
-       if (get_in_reg(irn, 0) == get_out_reg(irn, 0)) {
-               /* omitted Copy */
-               return;
+static void emit_sparc_fbfcc(const ir_node *node)
+{
+       /* if the flags producing node was immediately in front of us, emit
+        * a nop */
+       ir_node *flags = get_irn_n(node, n_sparc_fbfcc_flags);
+       ir_node *prev  = sched_prev(node);
+       if (is_Block(prev)) {
+               /* TODO: when the flags come from another block, then we have to do
+                * more complicated tests to see wether the flag producing node is
+                * potentially in front of us (could happen for fallthroughs) */
+               panic("TODO: fbfcc flags come from other block");
+       }
+       if (skip_Proj(flags) == prev) {
+               sparc_emitf(NULL, "nop");
        }
+       emit_sparc_branch(node, get_fcc);
+}
 
-       if (mode_is_float(mode)) {
-               panic("emit_be_Copy: move not supported for FP");
-       } else if (mode_is_data(mode)) {
-               be_emit_cstring("\tmov ");
-               sparc_emit_source_register(irn, 0);
-               be_emit_cstring(", ");
-               sparc_emit_dest_register(irn, 0);
-               be_emit_finish_line_gas(irn);
+static void emit_sparc_Ba(const ir_node *node)
+{
+       if (ba_is_fallthrough(node)) {
+               if (be_options.verbose_asm) {
+                       sparc_emitf(node, "/* fallthrough to %L */", node);
+               }
        } else {
-               panic("emit_be_Copy: move not supported for this mode");
+               sparc_emitf(node, "ba %L", node);
+               fill_delay_slot(node);
        }
 }
 
+static void emit_sparc_SwitchJmp(const ir_node *node)
+{
+       const sparc_switch_jmp_attr_t *attr = get_sparc_switch_jmp_attr_const(node);
 
-/**
- * dummy emitter for ignored nodes
- */
-static void emit_nothing(const ir_node *irn)
+       sparc_emitf(node, "jmp %S0");
+       fill_delay_slot(node);
+
+       be_emit_jump_table(node, attr->table, attr->table_entity, get_jump_target);
+}
+
+static void emit_fmov(const ir_node *node, const arch_register_t *src_reg,
+                      const arch_register_t *dst_reg)
 {
-       (void) irn;
+       sparc_emitf(node, "fmovs %R, %R", src_reg, dst_reg);
 }
 
+static const arch_register_t *get_next_fp_reg(const arch_register_t *reg)
+{
+       unsigned idx = reg->global_index;
+       assert(reg == &sparc_registers[idx]);
+       idx++;
+       assert(idx - REG_F0 < N_sparc_fp_REGS);
+       return &sparc_registers[idx];
+}
 
+static void emit_be_Copy(const ir_node *node)
+{
+       ir_mode               *mode    = get_irn_mode(node);
+       const arch_register_t *src_reg = arch_get_irn_register_in(node, 0);
+       const arch_register_t *dst_reg = arch_get_irn_register_out(node, 0);
 
-/**
- * type of emitter function
- */
-typedef void (*emit_func) (const ir_node *);
+       if (src_reg == dst_reg)
+               return;
 
-/**
- * Set a node emitter. Make it a bit more type safe.
- */
-static inline void set_emitter(ir_op *op, emit_func sparc_emit_node)
-{
-       op->ops.generic = (op_func)sparc_emit_node;
+       if (mode_is_float(mode)) {
+               unsigned bits = get_mode_size_bits(mode);
+               int      n    = bits > 32 ? bits > 64 ? 3 : 1 : 0;
+               emit_fmov(node, src_reg, dst_reg);
+               for (int i = 0; i < n; ++i) {
+                       src_reg = get_next_fp_reg(src_reg);
+                       dst_reg = get_next_fp_reg(dst_reg);
+                       emit_fmov(node, src_reg, dst_reg);
+               }
+       } else if (mode_is_data(mode)) {
+               sparc_emitf(node, "mov %S0, %D0");
+       } else {
+               panic("invalid mode");
+       }
 }
 
 /**
@@ -770,66 +1271,51 @@ static inline void set_emitter(ir_op *op, emit_func sparc_emit_node)
  */
 static void sparc_register_emitters(void)
 {
-
        /* first clear the generic function pointer for all ops */
-       clear_irp_opcodes_generic_func();
-
+       ir_clear_opcodes_generic_func();
        /* register all emitter functions defined in spec */
        sparc_register_spec_emitters();
 
        /* custom emitter */
-    set_emitter(op_be_IncSP,                   emit_be_IncSP);
-    set_emitter(op_be_Return,                  emit_be_Return);
-    set_emitter(op_be_Call,                            emit_be_Call);
-    set_emitter(op_sparc_FrameAddr,            emit_sparc_FrameAddr);
-    set_emitter(op_sparc_Branch,               emit_sparc_Branch);
-    set_emitter(op_sparc_SymConst,             emit_sparc_SymConst);
-    set_emitter(op_sparc_Jmp,                  emit_sparc_Jmp);
-    set_emitter(op_sparc_Save,                 emit_sparc_Save);
-
-    set_emitter(op_sparc_HiImm,                        emit_sparc_HiImm);
-    set_emitter(op_sparc_LoImm,                        emit_sparc_LoImm);
-    set_emitter(op_sparc_Div,                  emit_sparc_Div);
-    set_emitter(op_sparc_Mul,                  emit_sparc_Mul);
-    set_emitter(op_sparc_Mulh,                 emit_sparc_Mulh);
-
-    set_emitter(op_be_Copy,                            emit_be_Copy);
-    set_emitter(op_be_CopyKeep,                        emit_be_Copy);
-
-    set_emitter(op_be_Perm,                            emit_be_Perm);
-    set_emitter(op_be_MemPerm,                 emit_be_MemPerm);
-
-/*
-    set_emitter(op_arm_B,          emit_arm_B);
-    set_emitter(op_arm_CopyB,      emit_arm_CopyB);
-    set_emitter(op_arm_fpaConst,   emit_arm_fpaConst);
-    set_emitter(op_arm_fpaDbl2GP,  emit_arm_fpaDbl2GP);
-    set_emitter(op_arm_LdTls,      emit_arm_LdTls);
-    set_emitter(op_arm_SwitchJmp,  emit_arm_SwitchJmp);
-
-*/
-    /* no need to emit anything for the following nodes */
-       set_emitter(op_Phi,            emit_nothing);
-       set_emitter(op_be_Keep,        emit_nothing);
-       set_emitter(op_be_Start,       emit_nothing);
-       set_emitter(op_be_Barrier,     emit_nothing);
-
+       be_set_emitter(op_be_Copy,         emit_be_Copy);
+       be_set_emitter(op_be_CopyKeep,     emit_be_Copy);
+       be_set_emitter(op_be_IncSP,        emit_be_IncSP);
+       be_set_emitter(op_be_MemPerm,      emit_be_MemPerm);
+       be_set_emitter(op_be_Perm,         emit_be_Perm);
+       be_set_emitter(op_sparc_Ba,        emit_sparc_Ba);
+       be_set_emitter(op_sparc_Bicc,      emit_sparc_Bicc);
+       be_set_emitter(op_sparc_Call,      emit_sparc_Call);
+       be_set_emitter(op_sparc_FrameAddr, emit_sparc_FrameAddr);
+       be_set_emitter(op_sparc_Restore,   emit_sparc_Restore);
+       be_set_emitter(op_sparc_Return,    emit_sparc_Return);
+       be_set_emitter(op_sparc_SDiv,      emit_sparc_SDiv);
+       be_set_emitter(op_sparc_SubSP,     emit_sparc_SubSP);
+       be_set_emitter(op_sparc_SwitchJmp, emit_sparc_SwitchJmp);
+       be_set_emitter(op_sparc_UDiv,      emit_sparc_UDiv);
+       be_set_emitter(op_sparc_fbfcc,     emit_sparc_fbfcc);
+
+       /* no need to emit anything for the following nodes */
+       be_set_emitter(op_Phi,         be_emit_nothing);
+       be_set_emitter(op_be_Keep,     be_emit_nothing);
+       be_set_emitter(op_sparc_Start, be_emit_nothing);
 }
 
-/**
- * Emits code for a node.
- */
-static void sparc_emit_node(const ir_node *node)
+static bool block_needs_label(const ir_node *block, const ir_node *sched_prev)
 {
-       ir_op               *op       = get_irn_op(node);
-
-       if (op->ops.generic) {
-               emit_func func = (emit_func) op->ops.generic;
-               be_dbg_set_dbg_info(get_irn_dbg_info(node));
-               (*func) (node);
+       if (get_Block_entity(block) != NULL)
+               return true;
+
+       int n_cfgpreds = get_Block_n_cfgpreds(block);
+       if (n_cfgpreds == 0) {
+               return false;
+       } else if (n_cfgpreds > 1) {
+               return true;
        } else {
-               panic("Error: No emit handler for node %+F (graph %+F)\n",
-                       node, current_ir_graph);
+               ir_node *cfgpred       = get_Block_cfgpred(block, 0);
+               ir_node *cfgpred_block = get_nodes_block(cfgpred);
+               if (is_Proj(cfgpred) && is_sparc_SwitchJmp(get_Proj_pred(cfgpred)))
+                       return true;
+               return sched_prev != cfgpred_block || get_jump_target(cfgpred) != block;
        }
 }
 
@@ -837,32 +1323,25 @@ static void sparc_emit_node(const ir_node *node)
  * Walks over the nodes in a block connected by scheduling edges
  * and emits code for each node.
  */
-static void sparc_gen_block(ir_node *block, void *data)
+static void sparc_emit_block(ir_node *block, ir_node *prev)
 {
-       ir_node *node;
-       (void) data;
-
-       if (! is_Block(block))
-               return;
-
-       be_gas_emit_block_name(block);
-       be_emit_cstring(":\n");
-       be_emit_write_line();
+       bool needs_label = block_needs_label(block, prev);
+       be_gas_begin_block(block, needs_label);
 
        sched_foreach(block, node) {
-               sparc_emit_node(node);
+               if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node)))
+                       continue;
+               be_emit_node(node);
        }
 }
 
-
 /**
  * Emits code for function start.
  */
 static void sparc_emit_func_prolog(ir_graph *irg)
 {
-       ir_entity *ent = get_irg_entity(irg);
-       be_gas_emit_function_prolog(ent, 4);
-       be_emit_write_line();
+       ir_entity *entity = get_irg_entity(irg);
+       be_gas_emit_function_prolog(entity, 4, NULL);
 }
 
 /**
@@ -870,81 +1349,95 @@ static void sparc_emit_func_prolog(ir_graph *irg)
  */
 static void sparc_emit_func_epilog(ir_graph *irg)
 {
-       ir_entity *ent = get_irg_entity(irg);
-       const char *irg_name = get_entity_ld_name(ent);
-       be_emit_write_line();
-       be_emit_irprintf("\t.size  %s, .-%s\n", irg_name, irg_name);
-       be_emit_cstring("# -- End ");
-       be_emit_string(irg_name);
-       be_emit_cstring("\n");
-       be_emit_write_line();
+       ir_entity *entity = get_irg_entity(irg);
+       be_gas_emit_function_epilog(entity);
 }
 
-/**
- * Block-walker:
- * TODO: Sets labels for control flow nodes (jump target).
- * Links control predecessors to there destination blocks.
- */
-static void sparc_gen_labels(ir_node *block, void *env)
+static void init_jump_links(ir_node *block, void *env)
 {
-       ir_node *pred;
-       int n = get_Block_n_cfgpreds(block);
        (void) env;
 
+       int n = get_Block_n_cfgpreds(block);
        for (n--; n >= 0; n--) {
-               pred = get_Block_cfgpred(block, n);
-               set_irn_link(pred, block); // link the pred of a block (which is a jmp)
+               ir_node *pred = get_Block_cfgpred(block, n);
+               set_jump_target(pred, block);
        }
 }
 
+static int cmp_block_execfreqs(const void *d1, const void *d2)
+{
+       ir_node **p1 = (ir_node**)d1;
+       ir_node **p2 = (ir_node**)d2;
+       double freq1 = get_block_execfreq(*p1);
+       double freq2 = get_block_execfreq(*p2);
+       if (freq1 < freq2)
+               return -1;
+       if (freq1 > freq2)
+               return 1;
+       return get_irn_node_nr(*p2)-get_irn_node_nr(*p1);
+}
 
-/**
- * Main driver
- */
-void sparc_gen_routine(const sparc_code_gen_t *cg, ir_graph *irg)
+static void pick_delay_slots(size_t n_blocks, ir_node **blocks)
 {
-       ir_node **blk_sched;
-       ir_node *last_block = NULL;
-       ir_entity *entity     = get_irg_entity(irg);
-       int i, n;
-       (void) cg;
+       /* create blocklist sorted by execution frequency */
+       ir_node **sorted_blocks = XMALLOCN(ir_node*, n_blocks);
+       memcpy(sorted_blocks, blocks, n_blocks*sizeof(sorted_blocks[0]));
+       qsort(sorted_blocks, n_blocks, sizeof(sorted_blocks[0]),
+             cmp_block_execfreqs);
+
+       for (size_t i = 0; i < n_blocks; ++i) {
+               const ir_node *block = sorted_blocks[i];
+               sched_foreach(block, node) {
+                       if (!has_delay_slot(node))
+                               continue;
+                       ir_node *filler = pick_delay_slot_for(node);
+                       if (filler == NULL)
+                               continue;
+                       rbitset_set(delay_slot_fillers, get_irn_idx(filler));
+                       pmap_insert(delay_slots, node, filler);
+               }
+       }
+}
 
-       be_gas_elf_type_char = '#';
-       be_gas_object_file_format = OBJECT_FILE_FORMAT_ELF_SPARC;
+void sparc_emit_routine(ir_graph *irg)
+{
+       heights            = heights_new(irg);
+       delay_slot_fillers = rbitset_malloc(get_irg_last_idx(irg));
+       delay_slots        = pmap_create();
 
        /* register all emitter functions */
        sparc_register_emitters();
-       be_dbg_method_begin(entity);
 
        /* create the block schedule. For now, we don't need it earlier. */
-       blk_sched = be_create_block_schedule(irg);
+       ir_node **block_schedule = be_create_block_schedule(irg);
 
-       // emit function prolog
        sparc_emit_func_prolog(irg);
-
-       // generate BLOCK labels
-       irg_block_walk_graph(irg, sparc_gen_labels, NULL, NULL);
-
-       // inject block scheduling links & emit code of each block
-       n = ARR_LEN(blk_sched);
-       for (i = 0; i < n;) {
-               ir_node *block, *next_bl;
-
-               block = blk_sched[i];
-               ++i;
-               next_bl = i < n ? blk_sched[i] : NULL;
-
-               /* set here the link. the emitter expects to find the next block here */
-               set_irn_link(block, next_bl);
-               sparc_gen_block(block, last_block);
-               last_block = block;
+       irg_block_walk_graph(irg, init_jump_links, NULL, NULL);
+
+       /* inject block scheduling links & emit code of each block */
+       size_t n_blocks = ARR_LEN(block_schedule);
+       for (size_t i = 0; i < n_blocks; ++i) {
+               ir_node *block      = block_schedule[i];
+               ir_node *next_block = i+1 < n_blocks ? block_schedule[i+1] : NULL;
+               set_irn_link(block, next_block);
        }
 
+       pick_delay_slots(n_blocks, block_schedule);
 
-       //irg_walk_blkwise_graph(irg, NULL, sparc_gen_block, NULL);
+       for (size_t i = 0; i < n_blocks; ++i) {
+               ir_node *block = block_schedule[i];
+               ir_node *prev  = i>=1 ? block_schedule[i-1] : NULL;
+               if (block == get_irg_end_block(irg))
+                       continue;
+               sparc_emit_block(block, prev);
+       }
 
-       // emit function epilog
+       /* emit function epilog */
        sparc_emit_func_epilog(irg);
+
+       pmap_destroy(delay_slots);
+       xfree(delay_slot_fillers);
+       heights_free(heights);
 }
 
 void sparc_init_emitter(void)