X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fsparc%2Fsparc_finish.c;h=e30e550aa17ee334d6566bf5afd2dfd8e9686037;hb=b38e2c8fb2d6b9f713f0948a536a28b623b0732b;hp=30fd44c32cd8c5f70a5c959aeafbdbdb52d76074;hpb=01e23d45af0fae4eb29b0909294728eefbfd5f41;p=libfirm diff --git a/ir/be/sparc/sparc_finish.c b/ir/be/sparc/sparc_finish.c index 30fd44c32..e30e550aa 100644 --- a/ir/be/sparc/sparc_finish.c +++ b/ir/be/sparc/sparc_finish.c @@ -1,27 +1,12 @@ /* - * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved. - * * This file is part of libFirm. - * - * This file may be distributed and/or modified under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation and appearing in the file LICENSE.GPL included in the - * packaging of this file. - * - * Licensees holding valid libFirm Professional Edition licenses may use - * this file in accordance with the libFirm Commercial License. - * Agreement provided with the Software. - * - * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE - * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE. + * Copyright (C) 2012 University of Karlsruhe. */ /** * @file * @brief Peephole optimization and legalization of a sparc function * @author Matthias Braun - * @version $Id$ * * A note on sparc stackpointer (sp) behaviour: * The ABI expects SPARC_MIN_STACKSIZE bytes to be available at the @@ -47,7 +32,8 @@ #include "irgmod.h" #include "ircons.h" #include "irgwalk.h" - +#include "heights.h" +#include "beirg.h" #include "bepeephole.h" #include "benode.h" #include "besched.h" @@ -55,6 +41,8 @@ #include "bestack.h" #include "beirgmod.h" +static ir_heights_t *heights; + static void kill_unused_stacknodes(ir_node *node) { if (get_irn_n_edges(node) > 0) @@ -78,27 +66,27 @@ static void kill_unused_stacknodes(ir_node *node) static void introduce_epilog(ir_node *ret) { - const arch_register_t *sp_reg = &sparc_registers[REG_SP]; - ir_graph *irg = get_irn_irg(ret); - be_stack_layout_t *layout = be_get_irg_stack_layout(irg); - ir_node *block = get_nodes_block(ret); - ir_type *frame_type = get_irg_frame_type(irg); - unsigned frame_size = get_type_size_bytes(frame_type); - int sp_idx = be_find_return_reg_input(ret, sp_reg); - ir_node *sp = get_irn_n(ret, sp_idx); + arch_register_t const *const sp_reg = &sparc_registers[REG_SP]; + assert(arch_get_irn_register_req_in(ret, n_sparc_Return_sp) == sp_reg->single_req); + ir_node *const sp = get_irn_n(ret, n_sparc_Return_sp); + ir_node *const block = get_nodes_block(ret); + ir_graph *const irg = get_irn_irg(ret); + be_stack_layout_t *const layout = be_get_irg_stack_layout(irg); if (!layout->sp_relative) { - const arch_register_t *fp_reg = &sparc_registers[REG_FRAME_POINTER]; - ir_node *fp = be_get_initial_reg_value(irg, fp_reg); - ir_node *restore = new_bd_sparc_RestoreZero(NULL, block, fp); + arch_register_t const *const fp_reg = &sparc_registers[REG_FRAME_POINTER]; + ir_node *const fp = be_get_initial_reg_value(irg, fp_reg); + ir_node *const new_sp = be_get_initial_reg_value(irg, sp_reg); + ir_node *const restore = new_bd_sparc_RestoreZero(NULL, block, new_sp, fp); sched_add_before(ret, restore); arch_set_irn_register(restore, sp_reg); - set_irn_n(ret, sp_idx, restore); - + set_irn_n(ret, n_sparc_Return_sp, restore); kill_unused_stacknodes(sp); } else { - ir_node *incsp = be_new_IncSP(sp_reg, block, sp, -frame_size, 0); - set_irn_n(ret, sp_idx, incsp); + ir_type *const frame_type = get_irg_frame_type(irg); + unsigned const frame_size = get_type_size_bytes(frame_type); + ir_node *const incsp = be_new_IncSP(sp_reg, block, sp, -frame_size, 0); + set_irn_n(ret, n_sparc_Return_sp, incsp); sched_add_before(ret, incsp); } } @@ -110,7 +98,6 @@ void sparc_introduce_prolog_epilog(ir_graph *irg) be_stack_layout_t *layout = be_get_irg_stack_layout(irg); ir_node *block = get_nodes_block(start); ir_node *initial_sp = be_get_initial_reg_value(irg, sp_reg); - ir_node *sp = initial_sp; ir_node *schedpoint = start; ir_type *frame_type = get_irg_frame_type(irg); unsigned frame_size = get_type_size_bytes(frame_type); @@ -132,14 +119,12 @@ void sparc_introduce_prolog_epilog(ir_graph *irg) schedpoint = sched_next(schedpoint); if (!layout->sp_relative) { - ir_node *save = new_bd_sparc_Save_imm(NULL, block, sp, NULL, - -SPARC_MIN_STACKSIZE-frame_size); + ir_node *const save = new_bd_sparc_Save_imm(NULL, block, initial_sp, NULL, -(SPARC_MIN_STACKSIZE + frame_size)); arch_set_irn_register(save, sp_reg); sched_add_after(schedpoint, save); schedpoint = save; - edges_reroute(initial_sp, save); - set_irn_n(save, n_sparc_Save_stack, initial_sp); + edges_reroute_except(initial_sp, save, save); /* we still need the Save even if noone is explicitely using the * value. (TODO: this isn't 100% correct yet, something at the end of @@ -152,9 +137,8 @@ void sparc_introduce_prolog_epilog(ir_graph *irg) sched_add_after(schedpoint, keep); } } else { - ir_node *incsp = be_new_IncSP(sp_reg, block, sp, frame_size, 0); - edges_reroute(initial_sp, incsp); - be_set_IncSP_pred(incsp, sp); + ir_node *const incsp = be_new_IncSP(sp_reg, block, initial_sp, frame_size, 0); + edges_reroute_except(initial_sp, incsp, incsp); sched_add_after(schedpoint, incsp); } } @@ -247,7 +231,7 @@ static void finish_sparc_FrameAddr(ir_node *node) sched_add_before(node, new_frameaddr); arch_set_irn_register(new_frameaddr, reg); - exchange(node, new_frameaddr); + be_peephole_exchange(node, new_frameaddr); } } @@ -269,21 +253,49 @@ static void finish_sparc_Ld(ir_node *node) ir_node *constant = create_constant_from_immediate(node, offset); ir_node *new_load = new_bd_sparc_Ld_reg(dbgi, block, ptr, constant, mem, load_store_mode); sparc_load_store_attr_t *new_load_attr = get_sparc_load_store_attr(new_load); - unsigned n_outs = arch_get_irn_n_outs(node); - unsigned i; new_load_attr->is_frame_entity = load_store_attr->is_frame_entity; new_load_attr->is_reg_reg = load_store_attr->is_reg_reg; sched_add_before(node, new_load); - for (i = 0; i < n_outs; i++) { + be_foreach_out(node, i) { arch_set_irn_register_out(new_load, i, arch_get_irn_register_out(node, i)); } - exchange(node, new_load); + be_peephole_exchange(node, new_load); } } +static void split_sparc_ldf(ir_node *node) +{ + sparc_load_store_attr_t *attr = get_sparc_load_store_attr(node); + unsigned bits = get_mode_size_bits(attr->load_store_mode); + /* split 128bit loads into 2 64bit loads */ + if (bits == 128) { + dbg_info *dbgi = get_irn_dbg_info(node); + ir_node *block = get_nodes_block(node); + ir_node *ptr = get_irn_n(node, n_sparc_Ldf_ptr); + ir_node *mem = get_irn_n(node, n_sparc_Ldf_mem); + ir_node *new_load + = new_bd_sparc_Ldf_d(dbgi, block, ptr, mem, mode_D, + attr->base.immediate_value_entity, + attr->base.immediate_value + 8, + attr->is_frame_entity); + ir_node *new_mem = new_r_Proj(new_load, mode_M, pn_sparc_Ldf_M); + + const arch_register_t *reg + = arch_get_irn_register_out(node, pn_sparc_Ldf_res); + unsigned reg_index = reg->global_index; + + arch_set_irn_register_out(new_load, pn_sparc_Ldf_res, + &sparc_registers[reg_index+2]); + + attr->load_store_mode = mode_D; + set_irn_n(node, n_sparc_Ldf_mem, new_mem); + sched_add_before(node, new_load); + } +} + static void finish_sparc_Ldf(ir_node *node) { sparc_attr_t *attr = get_sparc_attr(node); @@ -303,17 +315,15 @@ static void finish_sparc_Ldf(ir_node *node) ir_node *new_ptr = new_bd_sparc_Add_reg(dbgi, block, ptr, constant); ir_node *new_load = new_bd_sparc_Ldf_s(dbgi, block, new_ptr, mem, load_store_mode, NULL, 0, true); sparc_load_store_attr_t *new_load_attr = get_sparc_load_store_attr(new_load); - unsigned n_outs = arch_get_irn_n_outs(node); - unsigned i; new_load_attr->is_frame_entity = load_store_attr->is_frame_entity; new_load_attr->is_reg_reg = load_store_attr->is_reg_reg; sched_add_before(node, new_load); - for (i = 0; i < n_outs; i++) { + be_foreach_out(node, i) { arch_set_irn_register_out(new_load, i, arch_get_irn_register_out(node, i)); } - exchange(node, new_load); + be_peephole_exchange(node, new_load); } } @@ -337,17 +347,15 @@ static void finish_sparc_St(ir_node *node) ir_node *constant = create_constant_from_immediate(node, offset); ir_node *new_load = new_bd_sparc_St_reg(dbgi, block, value, ptr, constant, mem, load_store_mode); sparc_load_store_attr_t *new_load_attr = get_sparc_load_store_attr(new_load); - unsigned n_outs = arch_get_irn_n_outs(node); - unsigned i; new_load_attr->is_frame_entity = load_store_attr->is_frame_entity; new_load_attr->is_reg_reg = load_store_attr->is_reg_reg; sched_add_before(node, new_load); - for (i = 0; i < n_outs; i++) { + be_foreach_out(node, i) { arch_set_irn_register_out(new_load, i, arch_get_irn_register_out(node, i)); } - exchange(node, new_load); + be_peephole_exchange(node, new_load); } } @@ -372,17 +380,15 @@ static void finish_sparc_Stf(ir_node *node) ir_node *new_ptr = new_bd_sparc_Add_reg(dbgi, block, ptr, constant); ir_node *new_load = new_bd_sparc_Stf_s(dbgi, block, value, new_ptr, mem, load_store_mode, NULL, 0, true); sparc_load_store_attr_t *new_load_attr = get_sparc_load_store_attr(new_load); - unsigned n_outs = arch_get_irn_n_outs(node); - unsigned i; new_load_attr->is_frame_entity = load_store_attr->is_frame_entity; new_load_attr->is_reg_reg = load_store_attr->is_reg_reg; sched_add_before(node, new_load); - for (i = 0; i < n_outs; i++) { + be_foreach_out(node, i) { arch_set_irn_register_out(new_load, i, arch_get_irn_register_out(node, i)); } - exchange(node, new_load); + be_peephole_exchange(node, new_load); } } @@ -407,37 +413,156 @@ static void peephole_sparc_FrameAddr(ir_node *node) { /* the peephole code currently doesn't allow this since it changes * the register. Find out why and how to workaround this... */ -#if 0 - const sparc_attr_t *attr = get_sparc_attr_const(node); - if (attr->immediate_value == 0) { - ir_node *base = get_irn_n(node, n_sparc_FrameAddr_base); - be_peephole_exchange(node, base); - } -#endif (void) node; } -static void finish_sparc_Return(ir_node *node) +/* output must not be local, or out reg. Since the destination of the restore + * is the rotated register-file where only the old in-registers are still + * visible (as out-registers) */ +static bool is_restorezeroopt_reg(const arch_register_t *reg) { - ir_node *schedpoint = node; - ir_node *restore; - /* see that there is no code between Return and restore, if there is move - * it in front of the restore */ - while (true) { - if (!sched_has_prev(schedpoint)) - return; + unsigned index = reg->global_index; + return (index >= REG_G0 && index <= REG_G7) + || (index >= REG_I0 && index <= REG_I7); +} + +static void replace_with_restore_reg(ir_node *node, ir_node *replaced, + ir_node *op0, ir_node *op1) +{ + dbg_info *dbgi = get_irn_dbg_info(node); + ir_node *stack_in = get_irn_n(node, n_sparc_RestoreZero_stack); + ir_node *fp = get_irn_n(node, n_sparc_RestoreZero_frame_pointer); + ir_node *block = get_nodes_block(node); + ir_mode *mode = get_irn_mode(node); + ir_node *new_node = new_bd_sparc_Restore_reg(dbgi, block, stack_in, fp, + op0, op1); + ir_node *stack = new_r_Proj(new_node, mode, pn_sparc_Restore_stack); + ir_node *res = new_r_Proj(new_node, mode, pn_sparc_Restore_res); + const arch_register_t *reg = arch_get_irn_register(replaced); + const arch_register_t *sp = &sparc_registers[REG_SP]; + arch_set_irn_register_out(new_node, pn_sparc_Restore_stack, sp); + arch_set_irn_register_out(new_node, pn_sparc_Restore_res, reg); + + sched_add_before(node, new_node); + be_peephole_exchange(node, stack); + be_peephole_exchange(replaced, res); +} + +static void replace_with_restore_imm(ir_node *node, ir_node *replaced, + ir_node *op, ir_entity *imm_entity, + int32_t immediate) +{ + dbg_info *dbgi = get_irn_dbg_info(node); + ir_node *stack_in = get_irn_n(node, n_sparc_RestoreZero_stack); + ir_node *fp = get_irn_n(node, n_sparc_RestoreZero_frame_pointer); + ir_node *block = get_nodes_block(node); + ir_mode *mode = get_irn_mode(node); + ir_node *new_node = new_bd_sparc_Restore_imm(dbgi, block, stack_in, fp, + op, imm_entity, immediate); + ir_node *stack = new_r_Proj(new_node, mode, pn_sparc_Restore_stack); + ir_node *res = new_r_Proj(new_node, mode, pn_sparc_Restore_res); + const arch_register_t *reg = arch_get_irn_register(replaced); + const arch_register_t *sp = &sparc_registers[REG_SP]; + arch_set_irn_register_out(new_node, pn_sparc_Restore_stack, sp); + arch_set_irn_register_out(new_node, pn_sparc_Restore_res, reg); + + sched_add_before(node, new_node); + be_peephole_exchange(node, stack); + be_peephole_exchange(replaced, res); +} + +static void peephole_sparc_RestoreZero(ir_node *node) +{ + /* restore gives us a free "add" instruction, let's try to use that to fold + * an instruction in. We can do the following: + * + * - Copy values (g0 + reg) + * - Produce constants (g0 + immediate) + * - Perform an add (reg + reg) + * - Perform a sub with immediate (reg + (-immediate)) + * + * Note: In an ideal world, this would not be a peephole optimization but + * already performed during code selection. Since about all foldable ops are + * arguments of the return node. However we have a hard time doing this + * since we construct epilogue code only after register allocation + * (and therefore after code selection). + */ + int n_tries = 10; /* limit our search */ + + for (ir_node *schedpoint = node;;) { + const arch_register_t *reg; schedpoint = sched_prev(schedpoint); - if (is_sparc_Restore(schedpoint) || is_sparc_RestoreZero(schedpoint)) + if (sched_is_begin(schedpoint)) break; + + if (--n_tries == 0) + break; + + if (arch_get_irn_n_outs(schedpoint) == 0) + continue; + + if (!mode_is_data(get_irn_mode(schedpoint))) + return; + + reg = arch_get_irn_register(schedpoint); + if (!is_restorezeroopt_reg(reg)) + continue; + + if (be_is_Copy(schedpoint) && be_can_move_down(heights, schedpoint, node)) { + ir_node *const op = be_get_Copy_op(schedpoint); + replace_with_restore_imm(node, schedpoint, op, NULL, 0); + } else if (is_sparc_Or(schedpoint) && + arch_get_irn_flags(schedpoint) & ((arch_irn_flags_t)sparc_arch_irn_flag_immediate_form) && + arch_get_irn_register_in(schedpoint, 0) == &sparc_registers[REG_G0] && + be_can_move_down(heights, schedpoint, node)) { + /* it's a constant */ + const sparc_attr_t *attr = get_sparc_attr_const(schedpoint); + ir_entity *entity = attr->immediate_value_entity; + int32_t immediate = attr->immediate_value; + ir_node *g0 = get_irn_n(schedpoint, 0); + replace_with_restore_imm(node, schedpoint, g0, entity, immediate); + } else if (is_sparc_Add(schedpoint) && + be_can_move_down(heights, schedpoint, node)) { + if (arch_get_irn_flags(schedpoint) & ((arch_irn_flags_t)sparc_arch_irn_flag_immediate_form)) { + ir_node *op = get_irn_n(schedpoint, 0); + const sparc_attr_t *attr = get_sparc_attr_const(schedpoint); + ir_entity *entity = attr->immediate_value_entity; + int32_t imm = attr->immediate_value; + replace_with_restore_imm(node, schedpoint, op, entity, imm); + } else { + ir_node *op0 = get_irn_n(schedpoint, 0); + ir_node *op1 = get_irn_n(schedpoint, 1); + replace_with_restore_reg(node, schedpoint, op0, op1); + } + } else if (is_sparc_Sub(schedpoint) && + arch_get_irn_flags(schedpoint) & ((arch_irn_flags_t)sparc_arch_irn_flag_immediate_form) && + arch_get_irn_register_in(schedpoint, 0) == &sparc_registers[REG_G0] && + be_can_move_down(heights, schedpoint, node)) { + /* it's a constant */ + const sparc_attr_t *attr = get_sparc_attr_const(schedpoint); + ir_entity *entity = attr->immediate_value_entity; + int32_t imm = attr->immediate_value; + if (entity == NULL && sparc_is_value_imm_encodeable(-imm)) { + ir_node *g0 = get_irn_n(schedpoint, 0); + replace_with_restore_imm(node, schedpoint, g0, NULL, -imm); + } else { + continue; + } + } + /* when we're here then we performed a folding and are done */ + return; } - restore = schedpoint; - schedpoint = sched_prev(node); - /* move all code between return and restore up */ - while (schedpoint != restore) { - ir_node *next_schedpoint = sched_prev(schedpoint); - sched_remove(schedpoint); - sched_add_before(restore, schedpoint); - schedpoint = next_schedpoint; +} + +static void finish_sparc_Return(ir_node *node) +{ + /* Ensure that the restore is directly before the return. */ + sched_foreach_reverse_from(sched_prev(node), restore) { + if (is_sparc_Restore(restore) || is_sparc_RestoreZero(restore)) { + sched_remove(restore); + sched_add_before(node, restore); + break; + } } } @@ -492,7 +617,7 @@ static void sparc_set_frame_entity(ir_node *node, ir_entity *entity) } } -void sparc_finish(ir_graph *irg) +void sparc_finish_graph(ir_graph *irg) { be_stack_layout_t *stack_layout = be_get_irg_stack_layout(irg); bool at_begin = stack_layout->sp_relative ? true : false; @@ -501,6 +626,7 @@ void sparc_finish(ir_graph *irg) irg_walk_graph(irg, NULL, sparc_collect_frame_entity_nodes, fec_env); be_assign_entities(fec_env, sparc_set_frame_entity, at_begin); be_free_frame_entity_coalescer(fec_env); + sparc_adjust_stack_entity_offsets(irg); sparc_introduce_prolog_epilog(irg); @@ -508,14 +634,19 @@ void sparc_finish(ir_graph *irg) be_abi_fix_stack_nodes(irg); sparc_fix_stack_bias(irg); + heights = heights_new(irg); + /* perform peephole optimizations */ - clear_irp_opcodes_generic_func(); + ir_clear_opcodes_generic_func(); register_peephole_optimisation(op_be_IncSP, peephole_be_IncSP); register_peephole_optimisation(op_sparc_FrameAddr, peephole_sparc_FrameAddr); + register_peephole_optimisation(op_sparc_RestoreZero, + peephole_sparc_RestoreZero); + register_peephole_optimisation(op_sparc_Ldf, split_sparc_ldf); be_peephole_opt(irg); /* perform legalizations (mostly fix nodes with too big immediates) */ - clear_irp_opcodes_generic_func(); + ir_clear_opcodes_generic_func(); register_peephole_optimisation(op_be_IncSP, finish_be_IncSP); register_peephole_optimisation(op_sparc_FrameAddr, finish_sparc_FrameAddr); register_peephole_optimisation(op_sparc_Ld, finish_sparc_Ld); @@ -526,5 +657,7 @@ void sparc_finish(ir_graph *irg) register_peephole_optimisation(op_sparc_Stf, finish_sparc_Stf); be_peephole_opt(irg); + heights_free(heights); + be_remove_dead_nodes_from_schedule(irg); }