From: Matthias Braun Date: Fri, 17 Feb 2012 15:53:56 +0000 (+0100) Subject: ia32: fix and optimize bswap sequence on i386 X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=50fcbd2c84474270ca6c5c5787c79b4571251b82;p=libfirm ia32: fix and optimize bswap sequence on i386 --- diff --git a/ir/be/ia32/ia32_architecture.c b/ir/be/ia32/ia32_architecture.c index 92990c27e..6fdfb607a 100644 --- a/ir/be/ia32/ia32_architecture.c +++ b/ir/be/ia32/ia32_architecture.c @@ -108,7 +108,7 @@ enum cpu_arch_features { * CPU's. */ typedef enum cpu_support { - cpu_generic = arch_generic32, + cpu_generic = arch_generic32, /* intel CPUs */ cpu_i386 = arch_i386, @@ -916,7 +916,7 @@ void ia32_setup_cg_config(void) c->use_sse_prefetch = FLAGS(arch, (arch_feature_3DNowE | arch_feature_sse1)); c->use_3dnow_prefetch = FLAGS(arch, arch_feature_3DNow); c->use_popcnt = FLAGS(arch, arch_feature_popcnt); - c->use_i486 = (arch & arch_mask) >= arch_i486; + c->use_bswap = (arch & arch_mask) >= arch_i486; c->optimize_cc = opt_cc; c->use_unsafe_floatconv = opt_unsafe_floatconv; c->emit_machcode = emit_machcode; diff --git a/ir/be/ia32/ia32_architecture.h b/ir/be/ia32/ia32_architecture.h index 4668d8b55..04e3cf344 100644 --- a/ir/be/ia32/ia32_architecture.h +++ b/ir/be/ia32/ia32_architecture.h @@ -80,7 +80,7 @@ typedef struct { /** use SSE4.2 or SSE4a popcnt instruction */ unsigned use_popcnt:1; /** use i486 instructions */ - unsigned use_i486:1; + unsigned use_bswap:1; /** optimize calling convention where possible */ unsigned optimize_cc:1; /** diff --git a/ir/be/ia32/ia32_spec.pl b/ir/be/ia32/ia32_spec.pl index 3355198ee..ac32a75c9 100644 --- a/ir/be/ia32/ia32_spec.pl +++ b/ir/be/ia32/ia32_spec.pl @@ -746,7 +746,7 @@ Rol => { out => [ "in_r1 !in_r2", "flags" ] }, ins => [ "val", "count" ], outs => [ "res", "flags" ], - emit => '. rol%M %SB1, %S0', + emit => '. rol%M %SB1, %DS0', units => [ "GP" ], latency => 1, mode => $mode_gp, diff --git a/ir/be/ia32/ia32_transform.c b/ir/be/ia32/ia32_transform.c index ebcdd6616..0b15ad7e2 100644 --- a/ir/be/ia32/ia32_transform.c +++ b/ir/be/ia32/ia32_transform.c @@ -5283,27 +5283,23 @@ static ir_node *gen_bswap(ir_node *node) ir_node *new_block = be_transform_node(block); ir_mode *mode = get_irn_mode(param); unsigned size = get_mode_size_bits(mode); - ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4; switch (size) { case 32: - if (ia32_cg_config.use_i486) { + if (ia32_cg_config.use_bswap) { /* swap available */ return new_bd_ia32_Bswap(dbgi, new_block, param); + } else { + ir_node *i8 = ia32_create_Immediate(NULL, 0, 8); + ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8); + ir_node *i16 = ia32_create_Immediate(NULL, 0, 16); + ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16); + ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8); + set_ia32_ls_mode(rol1, mode_Hu); + set_ia32_ls_mode(rol2, mode_Iu); + set_ia32_ls_mode(rol3, mode_Hu); + return rol3; } - s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24)); - s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8)); - - m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00)); - m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1); - - s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8)); - - m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000)); - m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3); - - s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24)); - return new_bd_ia32_Lea(dbgi, new_block, m4, s4); case 16: /* swap16 always available */