ia32: fix and optimize bswap sequence on i386
authorMatthias Braun <matze@braunis.de>
Fri, 17 Feb 2012 15:53:56 +0000 (16:53 +0100)
committerMatthias Braun <matze@braunis.de>
Fri, 17 Feb 2012 16:00:01 +0000 (17:00 +0100)
ir/be/ia32/ia32_architecture.c
ir/be/ia32/ia32_architecture.h
ir/be/ia32/ia32_spec.pl
ir/be/ia32/ia32_transform.c

index 92990c2..6fdfb60 100644 (file)
@@ -108,7 +108,7 @@ enum cpu_arch_features {
  * CPU's.
  */
 typedef enum cpu_support {
-       cpu_generic     = arch_generic32,
+       cpu_generic             = arch_generic32,
 
        /* intel CPUs */
        cpu_i386                = arch_i386,
@@ -916,7 +916,7 @@ void ia32_setup_cg_config(void)
        c->use_sse_prefetch     = FLAGS(arch, (arch_feature_3DNowE | arch_feature_sse1));
        c->use_3dnow_prefetch   = FLAGS(arch, arch_feature_3DNow);
        c->use_popcnt           = FLAGS(arch, arch_feature_popcnt);
-       c->use_i486             = (arch & arch_mask) >= arch_i486;
+       c->use_bswap            = (arch & arch_mask) >= arch_i486;
        c->optimize_cc          = opt_cc;
        c->use_unsafe_floatconv = opt_unsafe_floatconv;
        c->emit_machcode        = emit_machcode;
index 4668d8b..04e3cf3 100644 (file)
@@ -80,7 +80,7 @@ typedef struct {
        /** use SSE4.2 or SSE4a popcnt instruction */
        unsigned use_popcnt:1;
        /** use i486 instructions */
-       unsigned use_i486:1;
+       unsigned use_bswap:1;
        /** optimize calling convention where possible */
        unsigned optimize_cc:1;
        /**
index 3355198..ac32a75 100644 (file)
@@ -746,7 +746,7 @@ Rol => {
                       out => [ "in_r1 !in_r2", "flags" ] },
        ins       => [ "val", "count" ],
        outs      => [ "res", "flags" ],
-       emit      => '. rol%M %SB1, %S0',
+       emit      => '. rol%M %SB1, %DS0',
        units     => [ "GP" ],
        latency   => 1,
        mode      => $mode_gp,
index ebcdd66..0b15ad7 100644 (file)
@@ -5283,27 +5283,23 @@ static ir_node *gen_bswap(ir_node *node)
        ir_node *new_block = be_transform_node(block);
        ir_mode *mode      = get_irn_mode(param);
        unsigned size      = get_mode_size_bits(mode);
-       ir_node  *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
 
        switch (size) {
        case 32:
-               if (ia32_cg_config.use_i486) {
+               if (ia32_cg_config.use_bswap) {
                        /* swap available */
                        return new_bd_ia32_Bswap(dbgi, new_block, param);
+               } else {
+                       ir_node *i8 = ia32_create_Immediate(NULL, 0, 8);
+                       ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
+                       ir_node *i16 = ia32_create_Immediate(NULL, 0, 16);
+                       ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
+                       ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
+                       set_ia32_ls_mode(rol1, mode_Hu);
+                       set_ia32_ls_mode(rol2, mode_Iu);
+                       set_ia32_ls_mode(rol3, mode_Hu);
+                       return rol3;
                }
-               s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
-               s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
-
-               m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
-               m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
-
-               s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
-
-               m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
-               m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
-
-               s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
-               return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
 
        case 16:
                /* swap16 always available */