X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fsparc%2Fsparc_emitter.c;h=15de55d3a254f8e10b052cbc7bc7f19abff4983f;hb=29087feac9466883c278e53eec325d5e3099df1d;hp=a09be00ace1711a5af995c378709ddfc695932db;hpb=b2ff72059540a209702c028757442be041d93f47;p=libfirm diff --git a/ir/be/sparc/sparc_emitter.c b/ir/be/sparc/sparc_emitter.c index a09be00ac..15de55d3a 100644 --- a/ir/be/sparc/sparc_emitter.c +++ b/ir/be/sparc/sparc_emitter.c @@ -21,12 +21,12 @@ * @file * @brief emit assembler for a backend graph * @author Hannes Rapp, Matthias Braun - * @version $Id$ */ #include "config.h" #include +#include "bitfiddle.h" #include "xmalloc.h" #include "tv.h" #include "iredges.h" @@ -41,14 +41,17 @@ #include "raw_bitset.h" #include "dbginfo.h" #include "heights.h" - -#include "../besched.h" -#include "../beblocksched.h" -#include "../beirg.h" -#include "../begnuas.h" -#include "../be_dbgout.h" -#include "../benode.h" -#include "../bestack.h" +#include "pmap.h" +#include "execfreq_t.h" + +#include "besched.h" +#include "beblocksched.h" +#include "beirg.h" +#include "begnuas.h" +#include "bedwarf.h" +#include "benode.h" +#include "bestack.h" +#include "bepeephole.h" #include "sparc_emitter.h" #include "gen_sparc_emitter.h" @@ -58,13 +61,24 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) -static ir_heights_t *heights; -static const ir_node *delay_slot_filler; /**< this node has been choosen to fill - the next delay slot */ +static ir_heights_t *heights; +static unsigned *delay_slot_fillers; +static pmap *delay_slots; + +static bool emitting_delay_slot; -static void sparc_emit_node(const ir_node *node); +/** + * indent before instruction. (Adds additional indentation when emitting + * delay slots) + */ +static void sparc_emit_indent(void) +{ + be_emit_char('\t'); + if (emitting_delay_slot) + be_emit_char(' '); +} -void sparc_emit_immediate(const ir_node *node) +static void sparc_emit_immediate(ir_node const *const node) { const sparc_attr_t *attr = get_sparc_attr_const(node); ir_entity *entity = attr->immediate_value_entity; @@ -74,7 +88,11 @@ void sparc_emit_immediate(const ir_node *node) assert(sparc_is_value_imm_encodeable(value)); be_emit_irprintf("%d", value); } else { - be_emit_cstring("%lo("); + if (get_entity_owner(entity) == get_tls_type()) { + be_emit_cstring("%tle_lox10("); + } else { + be_emit_cstring("%lo("); + } be_gas_emit_entity(entity); if (attr->immediate_value != 0) { be_emit_irprintf("%+d", attr->immediate_value); @@ -83,58 +101,46 @@ void sparc_emit_immediate(const ir_node *node) } } -void sparc_emit_high_immediate(const ir_node *node) +static void sparc_emit_high_immediate(ir_node const *node) { const sparc_attr_t *attr = get_sparc_attr_const(node); ir_entity *entity = attr->immediate_value_entity; - be_emit_cstring("%hi("); if (entity == NULL) { uint32_t value = (uint32_t) attr->immediate_value; - be_emit_irprintf("0x%X", value); + be_emit_irprintf("%%hi(0x%X)", value); } else { + if (get_entity_owner(entity) == get_tls_type()) { + be_emit_cstring("%tle_hix22("); + } else { + be_emit_cstring("%hi("); + } be_gas_emit_entity(entity); if (attr->immediate_value != 0) { be_emit_irprintf("%+d", attr->immediate_value); } + be_emit_char(')'); } - be_emit_char(')'); } -void sparc_emit_source_register(const ir_node *node, int pos) +static void sparc_emit_source_register(ir_node const *node, int const pos) { const arch_register_t *reg = arch_get_irn_register_in(node, pos); be_emit_char('%'); - be_emit_string(arch_register_get_name(reg)); + be_emit_string(reg->name); } -void sparc_emit_dest_register(const ir_node *node, int pos) +static void sparc_emit_dest_register(ir_node const *const node, int const pos) { const arch_register_t *reg = arch_get_irn_register_out(node, pos); be_emit_char('%'); - be_emit_string(arch_register_get_name(reg)); -} - -/** - * Emits either a imm or register depending on arity of node - * @param node - * @param register no (-1 if no register) - */ -void sparc_emit_reg_or_imm(const ir_node *node, int pos) -{ - if (arch_get_irn_flags(node) & ((arch_irn_flags_t)sparc_arch_irn_flag_immediate_form)) { - // we have a imm input - sparc_emit_immediate(node); - } else { - // we have reg input - sparc_emit_source_register(node, pos); - } + be_emit_string(reg->name); } /** * emit SP offset */ -void sparc_emit_offset(const ir_node *node, int offset_node_pos) +static void sparc_emit_offset(const ir_node *node, int offset_node_pos) { const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node); @@ -157,108 +163,59 @@ void sparc_emit_offset(const ir_node *node, int offset_node_pos) } } -void sparc_emit_float_load_store_mode(const ir_node *node) -{ - const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node); - ir_mode *mode = attr->load_store_mode; - int bits = get_mode_size_bits(mode); - - assert(mode_is_float(mode)); - - switch (bits) { - case 32: return; - case 64: be_emit_char('d'); return; - case 128: be_emit_char('q'); return; - } - panic("invalid flaot load/store mode %+F", mode); -} - /** - * Emit load mode char + * Emit load mode */ -void sparc_emit_load_mode(const ir_node *node) +static void sparc_emit_load_mode(ir_node const *const node) { const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node); ir_mode *mode = attr->load_store_mode; int bits = get_mode_size_bits(mode); bool is_signed = mode_is_signed(mode); - if (bits == 16) { - be_emit_string(is_signed ? "sh" : "uh"); - } else if (bits == 8) { - be_emit_string(is_signed ? "sb" : "ub"); - } else if (bits == 64) { - be_emit_char('d'); - } else { - assert(bits == 32); + switch (bits) { + case 8: be_emit_string(is_signed ? "sb" : "ub"); break; + case 16: be_emit_string(is_signed ? "sh" : "uh"); break; + case 32: break; + case 64: be_emit_char('d'); break; + case 128: be_emit_char('q'); break; + default: panic("invalid load/store mode %+F", mode); } } /** * Emit store mode char */ -void sparc_emit_store_mode(const ir_node *node) +static void sparc_emit_store_mode(ir_node const *const node) { const sparc_load_store_attr_t *attr = get_sparc_load_store_attr_const(node); ir_mode *mode = attr->load_store_mode; int bits = get_mode_size_bits(mode); - if (bits == 16) { - be_emit_string("h"); - } else if (bits == 8) { - be_emit_string("b"); - } else if (bits == 64) { - be_emit_char('d'); - } else { - assert(bits == 32); + switch (bits) { + case 8: be_emit_char('b'); break; + case 16: be_emit_char('h'); break; + case 32: break; + case 64: be_emit_char('d'); break; + case 128: be_emit_char('q'); break; + default: panic("invalid load/store mode %+F", mode); } } -/** - * emit integer signed/unsigned prefix char - */ -void sparc_emit_mode_sign_prefix(const ir_node *node) -{ - ir_mode *mode = get_irn_mode(node); - bool is_signed = mode_is_signed(mode); - be_emit_string(is_signed ? "s" : "u"); -} - static void emit_fp_suffix(const ir_mode *mode) { - unsigned bits = get_mode_size_bits(mode); assert(mode_is_float(mode)); - - if (bits == 32) { - be_emit_char('s'); - } else if (bits == 64) { - be_emit_char('d'); - } else if (bits == 128) { - be_emit_char('q'); - } else { - panic("invalid FP mode"); + switch (get_mode_size_bits(mode)) { + case 32: be_emit_char('s'); break; + case 64: be_emit_char('d'); break; + case 128: be_emit_char('q'); break; + default: panic("invalid FP mode"); } } -void sparc_emit_fp_conv_source(const ir_node *node) -{ - const sparc_fp_conv_attr_t *attr = get_sparc_fp_conv_attr_const(node); - emit_fp_suffix(attr->src_mode); -} - -void sparc_emit_fp_conv_destination(const ir_node *node) -{ - const sparc_fp_conv_attr_t *attr = get_sparc_fp_conv_attr_const(node); - emit_fp_suffix(attr->dest_mode); -} - -/** - * emits the FP mode suffix char - */ -void sparc_emit_fp_mode_suffix(const ir_node *node) +static void set_jump_target(ir_node *jump, ir_node *target) { - const sparc_fp_attr_t *attr = get_sparc_fp_attr_const(node); - emit_fp_suffix(attr->fp_mode); + set_irn_link(jump, target); } static ir_node *get_jump_target(const ir_node *jump) @@ -275,8 +232,18 @@ static void sparc_emit_cfop_target(const ir_node *node) be_gas_emit_block_name(block); } +/** + * returns true if a sparc_call calls a register and not an immediate + */ +static bool is_sparc_reg_call(const ir_node *node) +{ + const sparc_attr_t *attr = get_sparc_attr_const(node); + return attr->immediate_value_entity == NULL; +} + static int get_sparc_Call_dest_addr_pos(const ir_node *node) { + assert(is_sparc_reg_call(node)); return get_irn_arity(node)-1; } @@ -284,7 +251,7 @@ static bool ba_is_fallthrough(const ir_node *node) { ir_node *block = get_nodes_block(node); ir_node *next_block = (ir_node*)get_irn_link(block); - return get_irn_link(node) == next_block; + return get_jump_target(node) == next_block; } static bool is_no_instruction(const ir_node *node) @@ -308,13 +275,11 @@ static bool is_no_instruction(const ir_node *node) static bool has_delay_slot(const ir_node *node) { - if (is_sparc_Ba(node) && ba_is_fallthrough(node)) - return false; + if (is_sparc_Ba(node)) { + return !ba_is_fallthrough(node); + } - return is_sparc_Bicc(node) || is_sparc_fbfcc(node) || is_sparc_Ba(node) - || is_sparc_SwitchJmp(node) || is_sparc_Call(node) - || is_sparc_SDiv(node) || is_sparc_UDiv(node) - || is_sparc_Return(node); + return arch_get_irn_flags(node) & sparc_arch_irn_flag_has_delay_slot; } /** returns true if the emitter for this sparc node can produce more than one @@ -328,94 +293,422 @@ static bool emits_multiple_instructions(const ir_node *node) if (has_delay_slot(node)) return true; - if (is_sparc_Call(node)) { + if (is_sparc_Call(node)) return arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return; + + return is_sparc_SMulh(node) || is_sparc_UMulh(node) + || is_sparc_SDiv(node) || is_sparc_UDiv(node) + || be_is_MemPerm(node) || be_is_Perm(node) + || is_sparc_SubSP(node); +} + +static bool uses_reg(const ir_node *node, unsigned reg_index, unsigned width) +{ + int arity = get_irn_arity(node); + for (int i = 0; i < arity; ++i) { + const arch_register_t *in_reg = arch_get_irn_register_in(node, i); + const arch_register_req_t *in_req = arch_get_irn_register_req_in(node, i); + if (in_reg == NULL) + continue; + if (reg_index < (unsigned)in_reg->global_index + in_req->width + && reg_index + width > in_reg->global_index) + return true; + } + return false; +} + +static bool writes_reg(const ir_node *node, unsigned reg_index, unsigned width) +{ + be_foreach_out(node, o) { + const arch_register_t *out_reg = arch_get_irn_register_out(node, o); + if (out_reg == NULL) + continue; + const arch_register_req_t *out_req = arch_get_irn_register_req_out(node, o); + if (reg_index < (unsigned)out_reg->global_index + out_req->width + && reg_index + width > out_reg->global_index) + return true; } + return false; +} - return is_sparc_Mulh(node) || is_sparc_SDiv(node) || is_sparc_UDiv(node) - || be_is_MemPerm(node) || be_is_Perm(node); +static bool is_legal_delay_slot_filler(const ir_node *node) +{ + if (is_no_instruction(node)) + return false; + if (emits_multiple_instructions(node)) + return false; + if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node))) + return false; + return true; } -/** - * search for an instruction that can fill the delay slot of @p node - */ -static const ir_node *pick_delay_slot_for(const ir_node *node) +static bool can_move_down_into_delayslot(const ir_node *node, const ir_node *to) { - const ir_node *check = node; - const ir_node *schedpoint = node; - unsigned tries = 0; - /* currently we don't track which registers are still alive, so we can't - * pick any other instructions other than the one directly preceding */ - static const unsigned PICK_DELAY_SLOT_MAX_DISTANCE = 1; + if (!is_legal_delay_slot_filler(node)) + return false; - assert(has_delay_slot(node)); + if (!be_can_move_down(heights, node, to)) + return false; - if (is_sparc_Call(node)) { - const sparc_attr_t *attr = get_sparc_attr_const(node); - ir_entity *entity = attr->immediate_value_entity; - if (entity != NULL) { - check = NULL; /* pick any instruction, dependencies on Call - don't matter */ - } else { - /* we only need to check the value for the call destination */ - check = get_irn_n(node, get_sparc_Call_dest_addr_pos(node)); + if (is_sparc_Call(to)) { + ir_node *check; + /** all inputs are used after the delay slot so, we're fine */ + if (!is_sparc_reg_call(to)) + return true; + + check = get_irn_n(to, get_sparc_Call_dest_addr_pos(to)); + if (skip_Proj(check) == node) + return false; + + /* the Call also destroys the value of %o7, but since this is + * currently marked as ignore register in the backend, it + * should never be used by the instruction in the delay slot. */ + if (uses_reg(node, REG_O7, 1)) + return false; + return true; + } else if (is_sparc_Return(to)) { + /* return uses the value of %o7, all other values are not + * immediately used */ + if (writes_reg(node, REG_O7, 1)) + return false; + return true; + } else { + /* the node must not use our computed values */ + int arity = get_irn_arity(to); + for (int i = 0; i < arity; ++i) { + ir_node *in = get_irn_n(to, i); + if (skip_Proj(in) == node) + return false; } + return true; + } +} - /* the Call also destroys the value of %o7, but since this is currently - * marked as ignore register in the backend, it should never be used by - * the instruction in the delay slot. */ - } else if (is_sparc_Return(node)) { - /* we only have to check the jump destination value */ - int arity = get_irn_arity(node); - int i; - - check = NULL; - for (i = 0; i < arity; ++i) { - ir_node *in = get_irn_n(node, i); - const arch_register_t *reg = arch_get_irn_register(in); - if (reg == &sparc_registers[REG_O7]) { - check = skip_Proj(in); - break; - } +static bool can_move_up_into_delayslot(const ir_node *node, const ir_node *to) +{ + if (!be_can_move_up(heights, node, to)) + return false; + + /* node must not use any results of 'to' */ + int arity = get_irn_arity(node); + for (int i = 0; i < arity; ++i) { + ir_node *in = get_irn_n(node, i); + ir_node *skipped = skip_Proj(in); + if (skipped == to) + return false; + } + + /* register window cycling effects at Restore aren't correctly represented + * in the graph yet so we need this exception here */ + if (is_sparc_Restore(node) || is_sparc_RestoreZero(node)) { + return false; + } else if (is_sparc_Call(to)) { + /* node must not overwrite any of the inputs of the call, + * (except for the dest_addr) */ + int dest_addr_pos = is_sparc_reg_call(to) + ? get_sparc_Call_dest_addr_pos(to) : -1; + + int call_arity = get_irn_arity(to); + for (int i = 0; i < call_arity; ++i) { + if (i == dest_addr_pos) + continue; + const arch_register_t *reg = arch_get_irn_register_in(to, i); + if (reg == NULL) + continue; + const arch_register_req_t *req = arch_get_irn_register_req_in(to, i); + if (writes_reg(node, reg->global_index, req->width)) + return false; + } + + /* node must not write to one of the call outputs */ + be_foreach_out(to, o) { + const arch_register_t *reg = arch_get_irn_register_out(to, o); + if (reg == NULL) + continue; + const arch_register_req_t *req = arch_get_irn_register_req_out(to, o); + if (writes_reg(node, reg->global_index, req->width)) + return false; + } + } else if (is_sparc_SDiv(to) || is_sparc_UDiv(to)) { + /* node will be inserted between wr and div so it must not overwrite + * anything except the wr input */ + int arity = get_irn_arity(to); + for (int i = 0; i < arity; ++i) { + assert((long)n_sparc_SDiv_dividend_high == (long)n_sparc_UDiv_dividend_high); + if (i == n_sparc_SDiv_dividend_high) + continue; + const arch_register_t *reg = arch_get_irn_register_in(to, i); + if (reg == NULL) + continue; + const arch_register_req_t *req = arch_get_irn_register_req_in(to, i); + if (writes_reg(node, reg->global_index, req->width)) + return false; + } +} + return true; +} + +static void optimize_fallthrough(ir_node *node) +{ + ir_node *proj_true = NULL; + ir_node *proj_false = NULL; + + assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false); + assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true); + foreach_out_edge(node, edge) { + ir_node *proj = get_edge_src_irn(edge); + long nr = get_Proj_proj(proj); + if (nr == pn_sparc_Bicc_true) { + proj_true = proj; + } else { + assert(nr == pn_sparc_Bicc_false); + proj_false = proj; } - } else { - check = node; } + assert(proj_true != NULL && proj_false != NULL); - while (sched_has_prev(schedpoint)) { - schedpoint = sched_prev(schedpoint); + /* for now, the code works for scheduled and non-schedules blocks */ + const ir_node *block = get_nodes_block(node); - if (has_delay_slot(schedpoint)) - break; + /* we have a block schedule */ + const ir_node *next_block = (ir_node*)get_irn_link(block); - /* skip things which don't really result in instructions */ - if (is_no_instruction(schedpoint)) - continue; + if (get_jump_target(proj_true) == next_block) { + /* exchange both proj destinations so the second one can be omitted */ + set_Proj_proj(proj_true, pn_sparc_Bicc_false); + set_Proj_proj(proj_false, pn_sparc_Bicc_true); + + sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node); + attr->relation = get_negated_relation(attr->relation); + } +} +/** + * search for an instruction that can fill the delay slot of @p node + */ +static ir_node *pick_delay_slot_for(ir_node *node) +{ + static const unsigned PICK_DELAY_SLOT_MAX_DISTANCE = 10; + assert(has_delay_slot(node)); + + if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) { + optimize_fallthrough(node); + } + + unsigned tries = 0; + sched_foreach_reverse_from(sched_prev(node), schedpoint) { + if (has_delay_slot(schedpoint)) + break; if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE) break; - if (emits_multiple_instructions(schedpoint)) + if (!can_move_down_into_delayslot(schedpoint, node)) continue; - /* if check and schedpoint are not in the same block, give up. */ - if (check != NULL - && get_nodes_block(check) != get_nodes_block(schedpoint)) - break; + /* found something */ + return schedpoint; + } - /* allowed for delayslot: any instruction which is not necessary to - * compute an input to the branch. */ - if (check != NULL - && heights_reachable_in_block(heights, check, schedpoint)) + /* search after the current position */ + tries = 0; + sched_foreach_from(sched_next(node), schedpoint) { + if (has_delay_slot(schedpoint)) + break; + if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE) + break; + if (!is_legal_delay_slot_filler(schedpoint)) + continue; + if (!can_move_up_into_delayslot(schedpoint, node)) continue; /* found something */ return schedpoint; } + /* look in successor blocks */ + ir_node *block = get_nodes_block(node); + /* TODO: sort succs by execution frequency */ + foreach_block_succ(block, edge) { + ir_node *succ = get_edge_src_irn(edge); + /* we can't easily move up stuff from blocks with multiple predecessors + * since the instruction is lacking for the other preds then. + * (We also don't have to do any phi translation) */ + if (get_Block_n_cfgpreds(succ) > 1) + continue; + + tries = 0; + sched_foreach(succ, schedpoint) { + if (has_delay_slot(schedpoint)) + break; + /* can't move pinned nodes accross blocks */ + if (get_irn_pinned(schedpoint) == op_pin_state_pinned) + continue; + /* restore doesn't model register window switching correctly, + * so it appears like we could move it, which is not true */ + if (is_sparc_Restore(schedpoint) + || is_sparc_RestoreZero(schedpoint)) + continue; + if (tries++ >= PICK_DELAY_SLOT_MAX_DISTANCE) + break; + if (!is_legal_delay_slot_filler(schedpoint)) + continue; + if (can_move_up_into_delayslot(schedpoint, node)) { + /* it's fine to move the insn accross blocks */ + return schedpoint; + } else if (is_sparc_Bicc(node) || is_sparc_fbfcc(node)) { + ir_node *proj = get_Block_cfgpred(succ, 0); + long nr = get_Proj_proj(proj); + if ((nr == pn_sparc_Bicc_true || nr == pn_sparc_fbfcc_true) + && be_can_move_up(heights, schedpoint, succ)) { + /* we can use it with the annul flag */ + sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr(node); + attr->annul_delay_slot = true; + return schedpoint; + } + } + } + } + return NULL; } +void sparc_emitf(ir_node const *const node, char const *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + sparc_emit_indent(); + for (;;) { + char const *start = fmt; + + while (*fmt != '%' && *fmt != '\0') + ++fmt; + be_emit_string_len(start, fmt - start); + if (*fmt == '\0') + break; + ++fmt; + + bool plus = false; + if (*fmt == '+') { + plus = true; + ++fmt; + } + + switch (*fmt++) { + case '%': + be_emit_char('%'); + break; + + case 'A': { + const sparc_jmp_cond_attr_t *attr + = get_sparc_jmp_cond_attr_const(node); + if (attr->annul_delay_slot) { + be_emit_cstring(",a"); + } + break; + } + + case 'D': + if (*fmt < '0' || '9' <= *fmt) + goto unknown; + sparc_emit_dest_register(node, *fmt++ - '0'); + break; + + case 'E': { + sparc_attr_t const *const attr = get_sparc_attr_const(node); + be_gas_emit_entity(attr->immediate_value_entity); + if (attr->immediate_value != 0) { + be_emit_irprintf(plus ? "%+d" : "%d", attr->immediate_value); + } + break; + } + + case 'F': { + ir_mode *mode; + switch (*fmt++) { + case 'D': mode = get_sparc_fp_conv_attr_const(node)->dest_mode; break; + case 'M': mode = get_sparc_fp_attr_const(node)->fp_mode; break; + case 'S': mode = get_sparc_fp_conv_attr_const(node)->src_mode; break; + default: goto unknown; + } + emit_fp_suffix(mode); + break; + } + + case 'H': + sparc_emit_high_immediate(node); + break; + + case 'L': { + ir_node *n = va_arg(ap, ir_node*); + sparc_emit_cfop_target(n); + break; + } + + case 'M': + switch (*fmt++) { + case 'L': sparc_emit_load_mode(node); break; + case 'S': sparc_emit_store_mode(node); break; + default: goto unknown; + } + break; + + case 'O': + if (*fmt < '0' || '9' <= *fmt) + goto unknown; + sparc_emit_offset(node, *fmt++ - '0'); + break; + + case 'R': { + arch_register_t const *const reg = va_arg(ap, const arch_register_t*); + be_emit_char('%'); + be_emit_string(reg->name); + break; + } + + case 'S': { + bool imm = false; + if (*fmt == 'I') { + imm = true; + ++fmt; + } + if (*fmt < '0' || '9' <= *fmt) + goto unknown; + unsigned const pos = *fmt++ - '0'; + if (imm && arch_get_irn_flags(node) & (arch_irn_flags_t)sparc_arch_irn_flag_immediate_form) { + sparc_emit_immediate(node); + } else { + sparc_emit_source_register(node, pos); + } + break; + } + + case 'd': { + int const num = va_arg(ap, int); + be_emit_irprintf(plus ? "%+d" : "%d", num); + break; + } + + case 's': { + char const *const str = va_arg(ap, char const*); + be_emit_string(str); + break; + } + + case 'u': { + unsigned const num = va_arg(ap, unsigned); + be_emit_irprintf(plus ? "%+u" : "%u", num); + break; + } + + default: +unknown: + panic("unknown format conversion in sparc_emitf()"); + } + } + be_emit_finish_line_gas(node); + va_end(ap); +} + /** * Emits code for stack space management */ @@ -427,201 +720,320 @@ static void emit_be_IncSP(const ir_node *irn) return; /* SPARC stack grows downwards */ - if (offset < 0) { - be_emit_cstring("\tsub "); - offset = -offset; - } else { - be_emit_cstring("\tadd "); - } - - sparc_emit_source_register(irn, 0); - be_emit_irprintf(", %d", -offset); - be_emit_cstring(", "); - sparc_emit_dest_register(irn, 0); - be_emit_finish_line_gas(irn); + char const *const insn = offset > 0 ? offset = -offset, "add" : "sub"; + sparc_emitf(irn, "%s %S0, %d, %D0", insn, offset); } /** - * emits code for mulh + * Emits code for stack space management. */ -static void emit_sparc_Mulh(const ir_node *irn) +static void emit_sparc_SubSP(const ir_node *irn) { - be_emit_cstring("\t"); - sparc_emit_mode_sign_prefix(irn); - be_emit_cstring("mul "); - - sparc_emit_source_register(irn, 0); - be_emit_cstring(", "); - sparc_emit_reg_or_imm(irn, 1); - be_emit_cstring(", "); - sparc_emit_dest_register(irn, 0); - be_emit_finish_line_gas(irn); - - // our result is in the y register now - // we just copy it to the assigned target reg - be_emit_cstring("\tmov %y, "); - sparc_emit_dest_register(irn, 0); - be_emit_finish_line_gas(irn); + sparc_emitf(irn, "sub %S0, %SI1, %D0"); + sparc_emitf(irn, "add %S0, %u, %D1", SPARC_MIN_STACKSIZE); } -static void fill_delay_slot(void) +static void fill_delay_slot(const ir_node *node) { - if (delay_slot_filler != NULL) { - sparc_emit_node(delay_slot_filler); - delay_slot_filler = NULL; + emitting_delay_slot = true; + const ir_node *filler = pmap_get(ir_node, delay_slots, node); + if (filler != NULL) { + assert(!is_no_instruction(filler)); + assert(!emits_multiple_instructions(filler)); + be_emit_node(filler); } else { - be_emit_cstring("\tnop\n"); - be_emit_write_line(); + sparc_emitf(NULL, "nop"); } + emitting_delay_slot = false; } -static void emit_sparc_Div(const ir_node *node, bool is_signed) +static void emit_sparc_Div(const ir_node *node, char const *const insn) { - /* can we get the delay count of the wr instruction somewhere? */ - unsigned wry_delay_count = 3; - unsigned i; - - be_emit_cstring("\twr "); - sparc_emit_source_register(node, 0); - be_emit_cstring(", 0, %y"); - be_emit_finish_line_gas(node); + sparc_emitf(node, "wr %S0, 0, %%y"); - for (i = 0; i < wry_delay_count; ++i) { - fill_delay_slot(); + /* TODO: we should specify number of delayslots in an architecture + * specification */ + unsigned wry_delay_count = 3; + for (unsigned i = 0; i < wry_delay_count; ++i) { + if (i == 0) { + fill_delay_slot(node); + } else { + emitting_delay_slot = true; + sparc_emitf(NULL, "nop"); + emitting_delay_slot = false; + } } - be_emit_irprintf("\t%s ", is_signed ? "sdiv" : "udiv"); - sparc_emit_source_register(node, 1); - be_emit_cstring(", "); - sparc_emit_reg_or_imm(node, 2); - be_emit_cstring(", "); - sparc_emit_dest_register(node, 0); - be_emit_finish_line_gas(node); + sparc_emitf(node, "%s %S1, %SI2, %D0", insn); } static void emit_sparc_SDiv(const ir_node *node) { - emit_sparc_Div(node, true); + emit_sparc_Div(node, "sdiv"); } static void emit_sparc_UDiv(const ir_node *node) { - emit_sparc_Div(node, false); + emit_sparc_Div(node, "udiv"); } -/** - * Emits code for Call node - */ static void emit_sparc_Call(const ir_node *node) { - const sparc_attr_t *attr = get_sparc_attr_const(node); - ir_entity *entity = attr->immediate_value_entity; - - be_emit_cstring("\tcall "); - if (entity != NULL) { - be_gas_emit_entity(entity); - if (attr->immediate_value != 0) { - be_emit_irprintf("%+d", attr->immediate_value); - } - be_emit_cstring(", 0"); - } else { + if (is_sparc_reg_call(node)) { int dest_addr = get_sparc_Call_dest_addr_pos(node); - sparc_emit_source_register(node, dest_addr); + sparc_emitf(node, "call %R", arch_get_irn_register_in(node, dest_addr)); + } else { + sparc_emitf(node, "call %E, 0"); } - be_emit_finish_line_gas(node); - fill_delay_slot(); + fill_delay_slot(node); if (arch_get_irn_flags(node) & sparc_arch_irn_flag_aggregate_return) { - be_emit_cstring("\tunimp 8\n"); - be_emit_write_line(); + sparc_emitf(NULL, "unimp 8"); } } -/** - * Emit code for Perm node - */ static void emit_be_Perm(const ir_node *irn) { - be_emit_cstring("\txor "); - sparc_emit_source_register(irn, 1); - be_emit_cstring(", "); - sparc_emit_source_register(irn, 0); - be_emit_cstring(", "); - sparc_emit_source_register(irn, 0); - be_emit_finish_line_gas(NULL); - - be_emit_cstring("\txor "); - sparc_emit_source_register(irn, 1); - be_emit_cstring(", "); - sparc_emit_source_register(irn, 0); - be_emit_cstring(", "); - sparc_emit_source_register(irn, 1); - be_emit_finish_line_gas(NULL); - - be_emit_cstring("\txor "); - sparc_emit_source_register(irn, 1); - be_emit_cstring(", "); - sparc_emit_source_register(irn, 0); - be_emit_cstring(", "); - sparc_emit_source_register(irn, 0); - be_emit_finish_line_gas(irn); + ir_mode *mode = get_irn_mode(get_irn_n(irn, 0)); + if (mode_is_float(mode)) { + const arch_register_t *reg0 = arch_get_irn_register_in(irn, 0); + const arch_register_t *reg1 = arch_get_irn_register_in(irn, 1); + unsigned reg_idx0 = reg0->global_index; + unsigned reg_idx1 = reg1->global_index; + unsigned width = arch_get_irn_register_req_in(irn, 0)->width; + for (unsigned i = 0; i < width; ++i) { + const arch_register_t *r0 = &sparc_registers[reg_idx0+i]; + const arch_register_t *r1 = &sparc_registers[reg_idx1+i]; + sparc_emitf(irn, "fmovs %R, %%f31", r0); + sparc_emitf(irn, "fmovs %R, %R", r1, r0); + sparc_emitf(irn, "fmovs %%f31, %R", r1); + } + } else { + sparc_emitf(irn, "xor %S1, %S0, %S0"); + sparc_emitf(irn, "xor %S1, %S0, %S1"); + sparc_emitf(irn, "xor %S1, %S0, %S0"); + } +} + +/* The stack pointer must always be SPARC_STACK_ALIGNMENT bytes aligned, so get + * the next bigger integer that's evenly divisible by it. */ +static unsigned get_aligned_sp_change(const unsigned num_regs) +{ + const unsigned bytes = num_regs * SPARC_REGISTER_SIZE; + return round_up2(bytes, SPARC_STACK_ALIGNMENT); +} + +/* Spill register l0 or both l0 and l1, depending on n_spilled and n_to_spill.*/ +static void memperm_emit_spill_registers(const ir_node *node, int n_spilled, + int n_to_spill) +{ + assert(n_spilled < n_to_spill); + + if (n_spilled == 0) { + /* We always reserve stack space for two registers because during copy + * processing we don't know yet if we also need to handle a cycle which + * needs two registers. More complicated code in emit_MemPerm would + * prevent wasting SPARC_REGISTER_SIZE bytes of stack space but + * it is not worth the worse readability of emit_MemPerm. */ + + /* Keep stack pointer aligned. */ + unsigned sp_change = get_aligned_sp_change(2); + sparc_emitf(node, "sub %%sp, %u, %%sp", sp_change); + + /* Spill register l0. */ + sparc_emitf(node, "st %%l0, [%%sp%+d]", SPARC_MIN_STACKSIZE); + } + + if (n_to_spill == 2) { + /* Spill register l1. */ + sparc_emitf(node, "st %%l1, [%%sp%+d]", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE); + } +} + +/* Restore register l0 or both l0 and l1, depending on n_spilled. */ +static void memperm_emit_restore_registers(const ir_node *node, int n_spilled) +{ + if (n_spilled == 2) { + /* Restore register l1. */ + sparc_emitf(node, "ld [%%sp%+d], %%l1", SPARC_MIN_STACKSIZE + SPARC_REGISTER_SIZE); + } + + /* Restore register l0. */ + sparc_emitf(node, "ld [%%sp%+d], %%l0", SPARC_MIN_STACKSIZE); + + /* Restore stack pointer. */ + unsigned sp_change = get_aligned_sp_change(2); + sparc_emitf(node, "add %%sp, %u, %%sp", sp_change); +} + +/* Emit code to copy in_ent to out_ent. Only uses l0. */ +static void memperm_emit_copy(const ir_node *node, ir_entity *in_ent, + ir_entity *out_ent) +{ + ir_graph *irg = get_irn_irg(node); + be_stack_layout_t *layout = be_get_irg_stack_layout(irg); + int off_in = be_get_stack_entity_offset(layout, in_ent, 0); + int off_out = be_get_stack_entity_offset(layout, out_ent, 0); + + /* Load from input entity. */ + sparc_emitf(node, "ld [%%fp%+d], %%l0", off_in); + /* Store to output entity. */ + sparc_emitf(node, "st %%l0, [%%fp%+d]", off_out); } +/* Emit code to swap ent1 and ent2. Uses l0 and l1. */ +static void memperm_emit_swap(const ir_node *node, ir_entity *ent1, + ir_entity *ent2) +{ + ir_graph *irg = get_irn_irg(node); + be_stack_layout_t *layout = be_get_irg_stack_layout(irg); + int off1 = be_get_stack_entity_offset(layout, ent1, 0); + int off2 = be_get_stack_entity_offset(layout, ent2, 0); + + /* Load from first input entity. */ + sparc_emitf(node, "ld [%%fp%+d], %%l0", off1); + /* Load from second input entity. */ + sparc_emitf(node, "ld [%%fp%+d], %%l1", off2); + /* Store first value to second output entity. */ + sparc_emitf(node, "st %%l0, [%%fp%+d]", off2); + /* Store second value to first output entity. */ + sparc_emitf(node, "st %%l1, [%%fp%+d]", off1); +} + +/* Find the index of ent in ents or return -1 if not found. */ +static int get_index(ir_entity **ents, int n, ir_entity *ent) +{ + for (int i = 0; i < n; ++i) { + if (ents[i] == ent) + return i; + } + + return -1; +} + +/* + * Emit code for a MemPerm node. + * + * Analyze MemPerm for copy chains and cyclic swaps and resolve them using + * loads and stores. + * This function is conceptually very similar to permute_values in + * beprefalloc.c. + */ static void emit_be_MemPerm(const ir_node *node) { - int i; - int memperm_arity; - int sp_change = 0; + int memperm_arity = be_get_MemPerm_entity_arity(node); + /* Upper limit for the number of participating entities is twice the + * arity, e.g., for a simple copying MemPerm node with one input/output. */ + int max_size = 2 * memperm_arity; + ir_entity **entities = ALLOCANZ(ir_entity *, max_size); + /* sourceof contains the input entity for each entity. If an entity is + * never used as an output, its entry in sourceof is a fix point. */ + int *sourceof = ALLOCANZ(int, max_size); + /* n_users counts how many output entities use this entity as their input.*/ + int *n_users = ALLOCANZ(int, max_size); + /* n_spilled records the number of spilled registers, either 1 or 2. */ + int n_spilled = 0; + + /* This implementation currently only works with frame pointers. */ ir_graph *irg = get_irn_irg(node); be_stack_layout_t *layout = be_get_irg_stack_layout(irg); + assert(!layout->sp_relative && "MemPerms currently do not work without frame pointers"); - /* this implementation only works with frame pointers currently */ - assert(layout->sp_relative == false); + for (int i = 0; i < max_size; ++i) { + sourceof[i] = i; + } - /* TODO: this implementation is slower than necessary. - The longterm goal is however to avoid the memperm node completely */ + int n = 0; + for (int i = 0; i < memperm_arity; ++i) { + ir_entity *out = be_get_MemPerm_out_entity(node, i); + ir_entity *in = be_get_MemPerm_in_entity(node, i); - memperm_arity = be_get_MemPerm_entity_arity(node); - // we use our local registers - so this is limited to 8 inputs ! - if (memperm_arity > 8) - panic("memperm with more than 8 inputs not supported yet"); + /* Insert into entities to be able to operate on unique indices. */ + if (get_index(entities, n, out) == -1) + entities[n++] = out; + if (get_index(entities, n, in) == -1) + entities[n++] = in; - be_emit_irprintf("\tsub %%sp, %d, %%sp", memperm_arity*4); - be_emit_finish_line_gas(node); + int oidx = get_index(entities, n, out); + int iidx = get_index(entities, n, in); + + sourceof[oidx] = iidx; /* Remember the source. */ + ++n_users[iidx]; /* Increment number of users of this entity. */ + } - for (i = 0; i < memperm_arity; ++i) { - ir_entity *entity = be_get_MemPerm_in_entity(node, i); - int offset = be_get_stack_entity_offset(layout, entity, 0); + /* First do all the copies. */ + for (int oidx = 0; oidx < n; /* empty */) { + int iidx = sourceof[oidx]; - /* spill register */ - be_emit_irprintf("\tst %%l%d, [%%sp%+d]", i, sp_change + SPARC_MIN_STACKSIZE); - be_emit_finish_line_gas(node); + /* Nothing to do for fix points. + * Also, if entities[oidx] is used as an input by another copy, we + * can't overwrite entities[oidx] yet.*/ + if (iidx == oidx || n_users[oidx] > 0) { + ++oidx; + continue; + } - /* load from entity */ - be_emit_irprintf("\tld [%%fp%+d], %%l%d", offset, i); - be_emit_finish_line_gas(node); - sp_change += 4; + /* We found the end of a 'chain', so do the copy. */ + if (n_spilled == 0) { + memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/1); + n_spilled = 1; + } + memperm_emit_copy(node, entities[iidx], entities[oidx]); + + /* Mark as done. */ + sourceof[oidx] = oidx; + + assert(n_users[iidx] > 0); + /* Decrementing the number of users might enable us to do another + * copy. */ + --n_users[iidx]; + + if (iidx < oidx && n_users[iidx] == 0) { + oidx = iidx; + } else { + ++oidx; + } } - for (i = memperm_arity-1; i >= 0; --i) { - ir_entity *entity = be_get_MemPerm_out_entity(node, i); - int offset = be_get_stack_entity_offset(layout, entity, 0); + /* The rest are cycles. */ + for (int oidx = 0; oidx < n; /* empty */) { + int iidx = sourceof[oidx]; + + /* Nothing to do for fix points. */ + if (iidx == oidx) { + ++oidx; + continue; + } + + assert(n_users[iidx] == 1); - sp_change -= 4; + /* Swap the two values to resolve the cycle. */ + if (n_spilled < 2) { + memperm_emit_spill_registers(node, n_spilled, /*n_to_spill=*/2); + n_spilled = 2; + } + memperm_emit_swap(node, entities[iidx], entities[oidx]); - /* store to new entity */ - be_emit_irprintf("\tst %%l%d, [%%fp%+d]", i, offset); - be_emit_finish_line_gas(node); - /* restore register */ - be_emit_irprintf("\tld [%%sp%+d], %%l%d", sp_change + SPARC_MIN_STACKSIZE, i); - be_emit_finish_line_gas(node); + int tidx = sourceof[iidx]; + /* Mark as done. */ + sourceof[iidx] = iidx; + + /* The source of oidx is now the old source of iidx, because we swapped + * the two entities. */ + sourceof[oidx] = tidx; } - be_emit_irprintf("\tadd %%sp, %d, %%sp", memperm_arity*4); - be_emit_finish_line_gas(node); +#ifdef DEBUG_libfirm + /* Only fix points should remain. */ + for (int i = 0; i < max_size; ++i) { + assert(sourceof[i] == i); + } +#endif - assert(sp_change == 0); + assert(n_spilled > 0 && "Useless MemPerm node"); + + memperm_emit_restore_registers(node, n_spilled); } static void emit_sparc_Return(const ir_node *node) @@ -634,20 +1046,31 @@ static void emit_sparc_Return(const ir_node *node) /* hack: we don't explicitely model register changes because of the * restore node. So we have to do it manually here */ - if (delay_slot_filler != NULL && - (is_sparc_Restore(delay_slot_filler) - || is_sparc_RestoreZero(delay_slot_filler))) { + const ir_node *delay_slot = pmap_get(ir_node, delay_slots, node); + if (delay_slot != NULL && + (is_sparc_Restore(delay_slot) || is_sparc_RestoreZero(delay_slot))) { destreg = "%i7"; } - be_emit_cstring("\tjmp "); - be_emit_string(destreg); - if (type->attr.ma.has_compound_ret_parameter) { - be_emit_cstring("+12"); - } else { - be_emit_cstring("+8"); - } - be_emit_finish_line_gas(node); - fill_delay_slot(); + char const *const offset = get_method_calling_convention(type) & cc_compound_ret ? "12" : "8"; + sparc_emitf(node, "jmp %s+%s", destreg, offset); + fill_delay_slot(node); +} + +static const arch_register_t *map_i_to_o_reg(const arch_register_t *reg) +{ + unsigned idx = reg->global_index; + if (idx < REG_I0 || idx > REG_I7) + return reg; + idx += REG_O0 - REG_I0; + assert(REG_O0 <= idx && idx <= REG_O7); + return &sparc_registers[idx]; +} + +static void emit_sparc_Restore(const ir_node *node) +{ + const arch_register_t *destreg + = arch_get_irn_register_out(node, pn_sparc_Restore_res); + sparc_emitf(node, "restore %S2, %SI3, %R", map_i_to_o_reg(destreg)); } static void emit_sparc_FrameAddr(const ir_node *node) @@ -655,23 +1078,9 @@ static void emit_sparc_FrameAddr(const ir_node *node) const sparc_attr_t *attr = get_sparc_attr_const(node); int32_t offset = attr->immediate_value; - if (offset < 0) { - be_emit_cstring("\tadd "); - sparc_emit_source_register(node, 0); - be_emit_cstring(", "); - assert(sparc_is_value_imm_encodeable(offset)); - be_emit_irprintf("%ld", offset); - } else { - be_emit_cstring("\tsub "); - sparc_emit_source_register(node, 0); - be_emit_cstring(", "); - assert(sparc_is_value_imm_encodeable(-offset)); - be_emit_irprintf("%ld", -offset); - } - - be_emit_cstring(", "); - sparc_emit_dest_register(node, 0); - be_emit_finish_line_gas(node); + char const *const insn = offset > 0 ? offset = -offset, "sub" : "add"; + assert(sparc_is_value_imm_encodeable(offset)); + sparc_emitf(node, "%s %S0, %d, %D0", insn, offset); } static const char *get_icc_unsigned(ir_relation relation) @@ -732,57 +1141,40 @@ typedef const char* (*get_cc_func)(ir_relation relation); static void emit_sparc_branch(const ir_node *node, get_cc_func get_cc) { const sparc_jmp_cond_attr_t *attr = get_sparc_jmp_cond_attr_const(node); - ir_relation relation = attr->relation; - const ir_node *proj_true = NULL; - const ir_node *proj_false = NULL; - const ir_edge_t *edge; - const ir_node *block; - const ir_node *next_block; + ir_relation relation = attr->relation; + const ir_node *proj_true = NULL; + const ir_node *proj_false = NULL; + assert((long)pn_sparc_Bicc_false == (long)pn_sparc_fbfcc_false); + assert((long)pn_sparc_Bicc_true == (long)pn_sparc_fbfcc_true); foreach_out_edge(node, edge) { ir_node *proj = get_edge_src_irn(edge); long nr = get_Proj_proj(proj); - if (nr == pn_Cond_true) { + if (nr == pn_sparc_Bicc_true) { proj_true = proj; } else { + assert(nr == pn_sparc_Bicc_false); proj_false = proj; } } - /* for now, the code works for scheduled and non-schedules blocks */ - block = get_nodes_block(node); - - /* we have a block schedule */ - next_block = (ir_node*)get_irn_link(block); - - if (get_irn_link(proj_true) == next_block) { - /* exchange both proj's so the second one can be omitted */ - const ir_node *t = proj_true; + /* emit the true proj */ + sparc_emitf(node, "%s%A %L", get_cc(relation), proj_true); + fill_delay_slot(node); - proj_true = proj_false; - proj_false = t; - relation = get_negated_relation(relation); - } + const ir_node *block = get_nodes_block(node); + const ir_node *next_block = (ir_node*)get_irn_link(block); - /* emit the true proj */ - be_emit_cstring("\t"); - be_emit_string(get_cc(relation)); - be_emit_char(' '); - sparc_emit_cfop_target(proj_true); - be_emit_finish_line_gas(proj_true); - - fill_delay_slot(); - - if (get_irn_link(proj_false) == next_block) { - be_emit_cstring("\t/* fallthrough to "); - sparc_emit_cfop_target(proj_false); - be_emit_cstring(" */"); - be_emit_finish_line_gas(proj_false); + if (get_jump_target(proj_false) == next_block) { + if (be_options.verbose_asm) { + sparc_emitf(node, "/* fallthrough to %L */", proj_false); + } } else { - be_emit_cstring("\tba "); - sparc_emit_cfop_target(proj_false); - be_emit_finish_line_gas(proj_false); - fill_delay_slot(); + sparc_emitf(node, "ba %L", proj_false); + /* TODO: fill this slot as well */ + emitting_delay_slot = true; + sparc_emitf(NULL, "nop"); + emitting_delay_slot = false; } } @@ -806,7 +1198,7 @@ static void emit_sparc_fbfcc(const ir_node *node) panic("TODO: fbfcc flags come from other block"); } if (skip_Proj(flags) == prev) { - be_emit_cstring("\tnop\n"); + sparc_emitf(NULL, "nop"); } emit_sparc_branch(node, get_fcc); } @@ -814,39 +1206,29 @@ static void emit_sparc_fbfcc(const ir_node *node) static void emit_sparc_Ba(const ir_node *node) { if (ba_is_fallthrough(node)) { - be_emit_cstring("\t/* fallthrough to "); - sparc_emit_cfop_target(node); - be_emit_cstring(" */"); + if (be_options.verbose_asm) { + sparc_emitf(node, "/* fallthrough to %L */", node); + } } else { - be_emit_cstring("\tba "); - sparc_emit_cfop_target(node); - be_emit_finish_line_gas(node); - fill_delay_slot(); + sparc_emitf(node, "ba %L", node); + fill_delay_slot(node); } - be_emit_finish_line_gas(node); } static void emit_sparc_SwitchJmp(const ir_node *node) { const sparc_switch_jmp_attr_t *attr = get_sparc_switch_jmp_attr_const(node); - be_emit_cstring("\tjmp "); - sparc_emit_source_register(node, 0); - be_emit_finish_line_gas(node); - fill_delay_slot(); + sparc_emitf(node, "jmp %S0"); + fill_delay_slot(node); - emit_jump_table(node, attr->default_proj_num, attr->jump_table, - get_jump_target); + be_emit_jump_table(node, attr->table, attr->table_entity, get_jump_target); } static void emit_fmov(const ir_node *node, const arch_register_t *src_reg, const arch_register_t *dst_reg) { - be_emit_cstring("\tfmovs %"); - be_emit_string(arch_register_get_name(src_reg)); - be_emit_cstring(", %"); - be_emit_string(arch_register_get_name(dst_reg)); - be_emit_finish_line_gas(node); + sparc_emitf(node, "fmovs %R, %R", src_reg, dst_reg); } static const arch_register_t *get_next_fp_reg(const arch_register_t *reg) @@ -870,36 +1252,19 @@ static void emit_be_Copy(const ir_node *node) if (mode_is_float(mode)) { unsigned bits = get_mode_size_bits(mode); int n = bits > 32 ? bits > 64 ? 3 : 1 : 0; - int i; emit_fmov(node, src_reg, dst_reg); - for (i = 0; i < n; ++i) { + for (int i = 0; i < n; ++i) { src_reg = get_next_fp_reg(src_reg); dst_reg = get_next_fp_reg(dst_reg); emit_fmov(node, src_reg, dst_reg); } } else if (mode_is_data(mode)) { - be_emit_cstring("\tmov "); - sparc_emit_source_register(node, 0); - be_emit_cstring(", "); - sparc_emit_dest_register(node, 0); - be_emit_finish_line_gas(node); + sparc_emitf(node, "mov %S0, %D0"); } else { - panic("emit_be_Copy: invalid mode"); + panic("invalid mode"); } } -static void emit_nothing(const ir_node *irn) -{ - (void) irn; -} - -typedef void (*emit_func) (const ir_node *); - -static inline void set_emitter(ir_op *op, emit_func sparc_emit_node) -{ - op->ops.generic = (op_func)sparc_emit_node; -} - /** * Enters the emitter functions for handled nodes into the generic * pointer of an opcode. @@ -907,93 +1272,66 @@ static inline void set_emitter(ir_op *op, emit_func sparc_emit_node) static void sparc_register_emitters(void) { /* first clear the generic function pointer for all ops */ - clear_irp_opcodes_generic_func(); + ir_clear_opcodes_generic_func(); /* register all emitter functions defined in spec */ sparc_register_spec_emitters(); /* custom emitter */ - set_emitter(op_be_Copy, emit_be_Copy); - set_emitter(op_be_CopyKeep, emit_be_Copy); - set_emitter(op_be_IncSP, emit_be_IncSP); - set_emitter(op_be_MemPerm, emit_be_MemPerm); - set_emitter(op_be_Perm, emit_be_Perm); - set_emitter(op_sparc_Ba, emit_sparc_Ba); - set_emitter(op_sparc_Bicc, emit_sparc_Bicc); - set_emitter(op_sparc_Call, emit_sparc_Call); - set_emitter(op_sparc_fbfcc, emit_sparc_fbfcc); - set_emitter(op_sparc_FrameAddr, emit_sparc_FrameAddr); - set_emitter(op_sparc_Mulh, emit_sparc_Mulh); - set_emitter(op_sparc_Return, emit_sparc_Return); - set_emitter(op_sparc_SDiv, emit_sparc_SDiv); - set_emitter(op_sparc_SwitchJmp, emit_sparc_SwitchJmp); - set_emitter(op_sparc_UDiv, emit_sparc_UDiv); + be_set_emitter(op_be_Copy, emit_be_Copy); + be_set_emitter(op_be_CopyKeep, emit_be_Copy); + be_set_emitter(op_be_IncSP, emit_be_IncSP); + be_set_emitter(op_be_MemPerm, emit_be_MemPerm); + be_set_emitter(op_be_Perm, emit_be_Perm); + be_set_emitter(op_sparc_Ba, emit_sparc_Ba); + be_set_emitter(op_sparc_Bicc, emit_sparc_Bicc); + be_set_emitter(op_sparc_Call, emit_sparc_Call); + be_set_emitter(op_sparc_FrameAddr, emit_sparc_FrameAddr); + be_set_emitter(op_sparc_Restore, emit_sparc_Restore); + be_set_emitter(op_sparc_Return, emit_sparc_Return); + be_set_emitter(op_sparc_SDiv, emit_sparc_SDiv); + be_set_emitter(op_sparc_SubSP, emit_sparc_SubSP); + be_set_emitter(op_sparc_SwitchJmp, emit_sparc_SwitchJmp); + be_set_emitter(op_sparc_UDiv, emit_sparc_UDiv); + be_set_emitter(op_sparc_fbfcc, emit_sparc_fbfcc); /* no need to emit anything for the following nodes */ - set_emitter(op_be_Keep, emit_nothing); - set_emitter(op_sparc_Start, emit_nothing); - set_emitter(op_Phi, emit_nothing); + be_set_emitter(op_Phi, be_emit_nothing); + be_set_emitter(op_be_Keep, be_emit_nothing); + be_set_emitter(op_sparc_Start, be_emit_nothing); } -/** - * Emits code for a node. - */ -static void sparc_emit_node(const ir_node *node) +static bool block_needs_label(const ir_node *block, const ir_node *sched_prev) { - ir_op *op = get_irn_op(node); + if (get_Block_entity(block) != NULL) + return true; - if (op->ops.generic) { - emit_func func = (emit_func) op->ops.generic; - be_dbg_set_dbg_info(get_irn_dbg_info(node)); - (*func) (node); + int n_cfgpreds = get_Block_n_cfgpreds(block); + if (n_cfgpreds == 0) { + return false; + } else if (n_cfgpreds > 1) { + return true; } else { - panic("No emit handler for node %+F (graph %+F)\n", node, - current_ir_graph); - } -} - -static ir_node *find_next_delay_slot(ir_node *from) -{ - ir_node *schedpoint = from; - while (!has_delay_slot(schedpoint)) { - if (!sched_has_next(schedpoint)) - return NULL; - schedpoint = sched_next(schedpoint); + ir_node *cfgpred = get_Block_cfgpred(block, 0); + ir_node *cfgpred_block = get_nodes_block(cfgpred); + if (is_Proj(cfgpred) && is_sparc_SwitchJmp(get_Proj_pred(cfgpred))) + return true; + return sched_prev != cfgpred_block || get_jump_target(cfgpred) != block; } - return schedpoint; } /** * Walks over the nodes in a block connected by scheduling edges * and emits code for each node. */ -static void sparc_emit_block(ir_node *block) +static void sparc_emit_block(ir_node *block, ir_node *prev) { - ir_node *node; - ir_node *next_delay_slot; - - assert(is_Block(block)); - - be_gas_emit_block_name(block); - be_emit_cstring(":\n"); - be_emit_write_line(); - - next_delay_slot = find_next_delay_slot(sched_first(block)); - if (next_delay_slot != NULL) - delay_slot_filler = pick_delay_slot_for(next_delay_slot); + bool needs_label = block_needs_label(block, prev); + be_gas_begin_block(block, needs_label); sched_foreach(block, node) { - if (node == delay_slot_filler) { + if (rbitset_is_set(delay_slot_fillers, get_irn_idx(node))) continue; - } - - sparc_emit_node(node); - - if (node == next_delay_slot) { - assert(delay_slot_filler == NULL); - next_delay_slot = find_next_delay_slot(sched_next(node)); - if (next_delay_slot != NULL) - delay_slot_filler = pick_delay_slot_for(next_delay_slot); - } + be_emit_node(node); } } @@ -1002,9 +1340,8 @@ static void sparc_emit_block(ir_node *block) */ static void sparc_emit_func_prolog(ir_graph *irg) { - ir_entity *ent = get_irg_entity(irg); - be_gas_emit_function_prolog(ent, 4); - be_emit_write_line(); + ir_entity *entity = get_irg_entity(irg); + be_gas_emit_function_prolog(entity, 4, NULL); } /** @@ -1012,65 +1349,94 @@ static void sparc_emit_func_prolog(ir_graph *irg) */ static void sparc_emit_func_epilog(ir_graph *irg) { - ir_entity *ent = get_irg_entity(irg); - const char *irg_name = get_entity_ld_name(ent); - be_emit_write_line(); - be_emit_irprintf("\t.size %s, .-%s\n", irg_name, irg_name); - be_emit_cstring("# -- End "); - be_emit_string(irg_name); - be_emit_cstring("\n"); - be_emit_write_line(); + ir_entity *entity = get_irg_entity(irg); + be_gas_emit_function_epilog(entity); } -static void sparc_gen_labels(ir_node *block, void *env) +static void init_jump_links(ir_node *block, void *env) { - ir_node *pred; - int n = get_Block_n_cfgpreds(block); (void) env; + int n = get_Block_n_cfgpreds(block); for (n--; n >= 0; n--) { - pred = get_Block_cfgpred(block, n); - set_irn_link(pred, block); // link the pred of a block (which is a jmp) + ir_node *pred = get_Block_cfgpred(block, n); + set_jump_target(pred, block); } } -void sparc_emit_routine(ir_graph *irg) +static int cmp_block_execfreqs(const void *d1, const void *d2) { - ir_entity *entity = get_irg_entity(irg); - ir_node **block_schedule; - size_t i; - size_t n; + ir_node **p1 = (ir_node**)d1; + ir_node **p2 = (ir_node**)d2; + double freq1 = get_block_execfreq(*p1); + double freq2 = get_block_execfreq(*p2); + if (freq1 < freq2) + return -1; + if (freq1 > freq2) + return 1; + return get_irn_node_nr(*p2)-get_irn_node_nr(*p1); +} + +static void pick_delay_slots(size_t n_blocks, ir_node **blocks) +{ + /* create blocklist sorted by execution frequency */ + ir_node **sorted_blocks = XMALLOCN(ir_node*, n_blocks); + memcpy(sorted_blocks, blocks, n_blocks*sizeof(sorted_blocks[0])); + qsort(sorted_blocks, n_blocks, sizeof(sorted_blocks[0]), + cmp_block_execfreqs); + + for (size_t i = 0; i < n_blocks; ++i) { + const ir_node *block = sorted_blocks[i]; + sched_foreach(block, node) { + if (!has_delay_slot(node)) + continue; + ir_node *filler = pick_delay_slot_for(node); + if (filler == NULL) + continue; + rbitset_set(delay_slot_fillers, get_irn_idx(filler)); + pmap_insert(delay_slots, node, filler); + } + } +} - heights = heights_new(irg); +void sparc_emit_routine(ir_graph *irg) +{ + heights = heights_new(irg); + delay_slot_fillers = rbitset_malloc(get_irg_last_idx(irg)); + delay_slots = pmap_create(); /* register all emitter functions */ sparc_register_emitters(); - be_dbg_method_begin(entity); /* create the block schedule. For now, we don't need it earlier. */ - block_schedule = be_create_block_schedule(irg); + ir_node **block_schedule = be_create_block_schedule(irg); sparc_emit_func_prolog(irg); - irg_block_walk_graph(irg, sparc_gen_labels, NULL, NULL); + irg_block_walk_graph(irg, init_jump_links, NULL, NULL); /* inject block scheduling links & emit code of each block */ - n = ARR_LEN(block_schedule); - for (i = 0; i < n; ++i) { + size_t n_blocks = ARR_LEN(block_schedule); + for (size_t i = 0; i < n_blocks; ++i) { ir_node *block = block_schedule[i]; - ir_node *next_block = i+1 < n ? block_schedule[i+1] : NULL; + ir_node *next_block = i+1 < n_blocks ? block_schedule[i+1] : NULL; set_irn_link(block, next_block); } - for (i = 0; i < n; ++i) { + pick_delay_slots(n_blocks, block_schedule); + + for (size_t i = 0; i < n_blocks; ++i) { ir_node *block = block_schedule[i]; + ir_node *prev = i>=1 ? block_schedule[i-1] : NULL; if (block == get_irg_end_block(irg)) continue; - sparc_emit_block(block); + sparc_emit_block(block, prev); } /* emit function epilog */ sparc_emit_func_epilog(irg); + pmap_destroy(delay_slots); + xfree(delay_slot_fillers); heights_free(heights); }