becopyilp: Do not advertise the switch to dump the solution, because this is not...
[libfirm] / ir / lower / lower_dw.c
index 5adc340..78e48d3 100644 (file)
@@ -1,20 +1,6 @@
 /*
- * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
- *
  * This file is part of libFirm.
- *
- * This file may be distributed and/or modified under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation and appearing in the file LICENSE.GPL included in the
- * packaging of this file.
- *
- * Licensees holding valid libFirm Professional Edition licenses may use
- * this file in accordance with the libFirm Commercial License.
- * Agreement provided with the Software.
- *
- * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
- * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE.
+ * Copyright (C) 2012 University of Karlsruhe.
  */
 
 /**
@@ -22,7 +8,6 @@
  * @brief   Lower double word operations, i.e. 64bit -> 32bit, 32bit -> 16bit etc.
  * @date    8.10.2004
  * @author  Michael Beck
- * @version $Id$
  */
 #include "config.h"
 
 #include <stdbool.h>
 #include <assert.h>
 
+#include "be.h"
 #include "error.h"
 #include "lowering.h"
 #include "irnode_t.h"
+#include "irnodeset.h"
 #include "irgraph_t.h"
 #include "irmode_t.h"
 #include "iropt_t.h"
@@ -46,6 +33,7 @@
 #include "irgwalk.h"
 #include "ircons.h"
 #include "irflag.h"
+#include "iroptimize.h"
 #include "irtools.h"
 #include "debug.h"
 #include "set.h"
 #include "irdump.h"
 #include "array_t.h"
 #include "irpass_t.h"
-
-typedef struct lower_env_t lower_env_t;
-
-/**
- * The type of a lower function.
- *
- * @param node   the node to be lowered
- * @param env    the lower environment
- */
-typedef void (*lower_func)(ir_node *node, ir_mode *mode, lower_env_t *env);
+#include "lower_dw.h"
 
 /** A map from (op, imode, omode) to Intrinsic functions entities. */
 static set *intrinsic_fkt;
@@ -74,8 +53,14 @@ static set *conv_types;
 /** A map from a method type to its lowered type. */
 static pmap *lowered_type;
 
+/** A map from a builtin type to its lower and higher type. */
+static pmap *lowered_builtin_type_high;
+static pmap *lowered_builtin_type_low;
+
 /** The types for the binop and unop intrinsics. */
-static ir_type *binop_tp_u, *binop_tp_s, *unop_tp_u, *unop_tp_s, *shiftop_tp_u, *shiftop_tp_s, *tp_s, *tp_u;
+static ir_type *binop_tp_u, *binop_tp_s, *unop_tp_u, *unop_tp_s, *tp_s, *tp_u;
+
+static ir_nodeset_t created_mux_nodes;
 
 /** the debug handle */
 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
@@ -99,15 +84,6 @@ typedef struct conv_tp_entry {
        ir_type       *mtd;   /**< the associated method type of this (imode, omode) pair */
 } conv_tp_entry_t;
 
-/**
- * Every double word node will be replaced,
- * we need some store to hold the replacement:
- */
-typedef struct node_entry_t {
-       ir_node *low_word;    /**< the low word */
-       ir_node *high_word;   /**< the high word */
-} node_entry_t;
-
 enum lower_flags {
        MUST_BE_LOWERED = 1,  /**< graph must be lowered */
        CF_CHANGED      = 2,  /**< control flow was changed */
@@ -116,32 +92,32 @@ enum lower_flags {
 /**
  * The lower environment.
  */
-struct lower_env_t {
-       node_entry_t **entries;       /**< entries per node */
+typedef struct lower_dw_env_t {
+       lower64_entry_t **entries;     /**< entries per node */
        ir_graph      *irg;
-       struct obstack obst;          /**< an obstack holding the temporary data */
-       ir_type   *l_mtp;              /**< lowered method type of the current method */
-       ir_tarval *tv_mode_bytes;     /**< a tarval containing the number of bytes in the lowered modes */
-       ir_tarval *tv_mode_bits;      /**< a tarval containing the number of bits in the lowered modes */
+       struct obstack obst;           /**< an obstack holding the temporary data */
+       ir_tarval *tv_mode_bytes;      /**< a tarval containing the number of bytes in the lowered modes */
        pdeq      *waitq;              /**< a wait queue of all nodes that must be handled later */
        ir_node  **lowered_phis;       /**< list of lowered phis */
-       pmap      *proj_2_block;       /**< a map from ProjX to its destination blocks */
        ir_mode   *high_signed;        /**< doubleword signed type */
        ir_mode   *high_unsigned;      /**< doubleword unsigned type */
        ir_mode   *low_signed;         /**< word signed type */
        ir_mode   *low_unsigned;       /**< word unsigned type */
        ident     *first_id;           /**< .l for little and .h for big endian */
        ident     *next_id;            /**< .h for little and .l for big endian */
-       const lwrdw_param_t *params;  /**< transformation parameter */
-       unsigned flags;               /**< some flags */
-       unsigned n_entries;           /**< number of entries */
-       ir_type  *value_param_tp;     /**< the old value param type */
-};
+       const lwrdw_param_t *params;   /**< transformation parameter */
+       unsigned flags;                /**< some flags */
+       unsigned n_entries;            /**< number of entries */
+} lower_dw_env_t;
+
+static lower_dw_env_t *env;
+
+static void lower_node(ir_node *node);
 
 /**
  * Create a method type for a Conv emulation from imode to omode.
  */
-static ir_type *get_conv_type(ir_mode *imode, ir_mode *omode, lower_env_t *env)
+static ir_type *get_conv_type(ir_mode *imode, ir_mode *omode)
 {
        conv_tp_entry_t key, *entry;
        ir_type *mtd;
@@ -150,7 +126,7 @@ static ir_type *get_conv_type(ir_mode *imode, ir_mode *omode, lower_env_t *env)
        key.omode = omode;
        key.mtd   = NULL;
 
-       entry = (conv_tp_entry_t*)set_insert(conv_types, &key, sizeof(key), HASH_PTR(imode) ^ HASH_PTR(omode));
+       entry = set_insert(conv_tp_entry_t, conv_types, &key, sizeof(key), hash_ptr(imode) ^ hash_ptr(omode));
        if (! entry->mtd) {
                int n_param = 1, n_res = 1;
 
@@ -165,8 +141,13 @@ static ir_type *get_conv_type(ir_mode *imode, ir_mode *omode, lower_env_t *env)
                /* set param types and result types */
                n_param = 0;
                if (imode == env->high_signed) {
-                       set_method_param_type(mtd, n_param++, tp_u);
-                       set_method_param_type(mtd, n_param++, tp_s);
+                       if (env->params->little_endian) {
+                               set_method_param_type(mtd, n_param++, tp_u);
+                               set_method_param_type(mtd, n_param++, tp_s);
+                       } else {
+                               set_method_param_type(mtd, n_param++, tp_s);
+                               set_method_param_type(mtd, n_param++, tp_u);
+                       }
                } else if (imode == env->high_unsigned) {
                        set_method_param_type(mtd, n_param++, tp_u);
                        set_method_param_type(mtd, n_param++, tp_u);
@@ -177,8 +158,13 @@ static ir_type *get_conv_type(ir_mode *imode, ir_mode *omode, lower_env_t *env)
 
                n_res = 0;
                if (omode == env->high_signed) {
-                       set_method_res_type(mtd, n_res++, tp_u);
-                       set_method_res_type(mtd, n_res++, tp_s);
+                       if (env->params->little_endian) {
+                               set_method_res_type(mtd, n_res++, tp_u);
+                               set_method_res_type(mtd, n_res++, tp_s);
+                       } else {
+                               set_method_res_type(mtd, n_res++, tp_s);
+                               set_method_res_type(mtd, n_res++, tp_u);
+                       }
                } else if (omode == env->high_unsigned) {
                        set_method_res_type(mtd, n_res++, tp_u);
                        set_method_res_type(mtd, n_res++, tp_u);
@@ -200,19 +186,23 @@ static ir_type *get_conv_type(ir_mode *imode, ir_mode *omode, lower_env_t *env)
  */
 static void add_block_cf_input_nr(ir_node *block, int nr, ir_node *cf)
 {
-       int i, arity = get_irn_arity(block);
-       ir_node **in, *phi;
+       int i, arity = get_Block_n_cfgpreds(block);
+       ir_node **in;
 
        assert(nr < arity);
 
        NEW_ARR_A(ir_node *, in, arity + 1);
        for (i = 0; i < arity; ++i)
-               in[i] = get_irn_n(block, i);
+               in[i] = get_Block_cfgpred(block, i);
        in[i] = cf;
 
        set_irn_in(block, i + 1, in);
 
-       for (phi = get_Block_phis(block); phi != NULL; phi = get_Phi_next(phi)) {
+       foreach_out_edge(block, edge) {
+               ir_node *phi = get_edge_src_irn(edge);
+               if (!is_Phi(phi))
+                       continue;
+
                for (i = 0; i < arity; ++i)
                        in[i] = get_irn_n(phi, i);
                in[i] = in[nr];
@@ -227,11 +217,11 @@ static void add_block_cf_input_nr(ir_node *block, int nr, ir_node *cf)
  */
 static void add_block_cf_input(ir_node *block, ir_node *tmpl, ir_node *cf)
 {
-       int i, arity = get_irn_arity(block);
+       int i, arity = get_Block_n_cfgpreds(block);
        int nr = 0;
 
        for (i = 0; i < arity; ++i) {
-               if (get_irn_n(block, i) == tmpl) {
+               if (get_Block_cfgpred(block, i) == tmpl) {
                        nr = i;
                        break;
                }
@@ -265,23 +255,22 @@ static ir_mode *get_irn_op_mode(ir_node *node)
  * Walker, prepare the node links and determine which nodes need to be lowered
  * at all.
  */
-static void prepare_links(lower_env_t *env, ir_node *node)
+static void prepare_links(ir_node *node)
 {
-       ir_mode      *mode = get_irn_op_mode(node);
-       node_entry_t *link;
-       int           i;
+       ir_mode         *mode = get_irn_op_mode(node);
+       lower64_entry_t *link;
 
        if (mode == env->high_signed || mode == env->high_unsigned) {
                unsigned idx = get_irn_idx(node);
                /* ok, found a node that will be lowered */
-               link = OALLOCZ(&env->obst, node_entry_t);
+               link = OALLOCZ(&env->obst, lower64_entry_t);
 
                if (idx >= env->n_entries) {
                        /* enlarge: this happens only for Rotl nodes which is RARELY */
                        unsigned old   = env->n_entries;
                        unsigned n_idx = idx + (idx >> 3);
 
-                       ARR_RESIZE(node_entry_t *, env->entries, n_idx);
+                       ARR_RESIZE(lower64_entry_t *, env->entries, n_idx);
                        memset(&env->entries[old], 0, (n_idx - old) * sizeof(env->entries[0]));
                        env->n_entries = n_idx;
                }
@@ -297,49 +286,53 @@ static void prepare_links(lower_env_t *env, ir_node *node)
                        env->flags |= MUST_BE_LOWERED;
                }
                return;
-       }
-
-       if (is_Proj(node)) {
-               /* link all Proj nodes to its predecessor:
-                  Note that Tuple Proj's and its Projs are linked either. */
-               ir_node *pred = get_Proj_pred(node);
-
-               set_irn_link(node, get_irn_link(pred));
-               set_irn_link(pred, node);
-       } else if (is_Phi(node)) {
-               /* link all Phi nodes to its block */
-               ir_node *block = get_nodes_block(node);
-               add_Block_phi(block, node);
-       } else if (is_Block(node)) {
-               /* fill the Proj -> Block map */
-               for (i = get_Block_n_cfgpreds(node) - 1; i >= 0; --i) {
-                       ir_node *pred = get_Block_cfgpred(node, i);
-
-                       if (is_Proj(pred))
-                               pmap_insert(env->proj_2_block, pred, node);
+       } else if (is_Call(node)) {
+               /* Special case:  If the result of the Call is never used, we won't
+                * find a Proj with a mode that potentially triggers MUST_BE_LOWERED
+                * to be set.  Thus, if we see a call, we check its result types and
+                * decide whether MUST_BE_LOWERED has to be set.
+                */
+               ir_type *tp = get_Call_type(node);
+               size_t   n_res, i;
+
+               n_res = get_method_n_ress(tp);
+               for (i = 0; i < n_res; ++i) {
+                       ir_type *rtp = get_method_res_type(tp, i);
+
+                       if (is_Primitive_type(rtp)) {
+                               ir_mode *rmode = get_type_mode(rtp);
+
+                               if (rmode == env->high_signed || rmode == env->high_unsigned) {
+                                       env->flags |= MUST_BE_LOWERED;
+                               }
+                       }
                }
        }
 }
 
-static node_entry_t *get_node_entry(lower_env_t *env, ir_node *node)
+lower64_entry_t *get_node_entry(ir_node *node)
 {
        unsigned idx = get_irn_idx(node);
        assert(idx < env->n_entries);
        return env->entries[idx];
 }
 
-static void set_lowered(lower_env_t *env, ir_node *old,
-                        ir_node *new_low, ir_node *new_high)
+void ir_set_dw_lowered(ir_node *old, ir_node *new_low, ir_node *new_high)
 {
-       node_entry_t *entry = get_node_entry(env, old);
+       lower64_entry_t *entry = get_node_entry(old);
        entry->low_word  = new_low;
        entry->high_word = new_high;
 }
 
+ir_mode *ir_get_low_unsigned_mode(void)
+{
+       return env->low_unsigned;
+}
+
 /**
  * Translate a Constant: create two.
  */
-static void lower_Const(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Const(ir_node *node, ir_mode *mode)
 {
        ir_graph  *irg      = get_irn_irg(node);
        dbg_info  *dbg      = get_irn_dbg_info(node);
@@ -347,23 +340,25 @@ static void lower_Const(ir_node *node, ir_mode *mode, lower_env_t *env)
        ir_tarval *tv       = get_Const_tarval(node);
        ir_tarval *tv_l     = tarval_convert_to(tv, low_mode);
        ir_node   *res_low  = new_rd_Const(dbg, irg, tv_l);
-       ir_tarval *tv_shrs  = tarval_shrs(tv, env->tv_mode_bits);
+       ir_tarval *tv_shrs  = tarval_shrs_unsigned(tv, get_mode_size_bits(low_mode));
        ir_tarval *tv_h     = tarval_convert_to(tv_shrs, mode);
        ir_node   *res_high = new_rd_Const(dbg, irg, tv_h);
 
-       set_lowered(env, node, res_low, res_high);
+       ir_set_dw_lowered(node, res_low, res_high);
 }
 
 /**
  * Translate a Load: create two.
  */
-static void lower_Load(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Load(ir_node *node, ir_mode *mode)
 {
        ir_mode    *low_mode = env->low_unsigned;
        ir_graph   *irg = get_irn_irg(node);
        ir_node    *adr = get_Load_ptr(node);
        ir_node    *mem = get_Load_mem(node);
-       ir_node    *low, *high, *proj;
+       ir_node    *low;
+       ir_node    *high;
+       ir_node    *proj_m;
        dbg_info   *dbg;
        ir_node    *block = get_nodes_block(node);
        ir_cons_flags volatility = get_Load_volatility(node) == volatility_is_volatile
@@ -378,15 +373,16 @@ static void lower_Load(ir_node *node, ir_mode *mode, lower_env_t *env)
        }
 
        /* create two loads */
-       dbg  = get_irn_dbg_info(node);
-       low  = new_rd_Load(dbg, block, mem,  low,  low_mode, volatility);
-       proj = new_r_Proj(low, mode_M, pn_Load_M);
-       high = new_rd_Load(dbg, block, proj, high, mode, volatility);
-
-       set_lowered(env, node, low, high);
+       dbg    = get_irn_dbg_info(node);
+       low    = new_rd_Load(dbg, block, mem,  low,  low_mode, volatility);
+       proj_m = new_r_Proj(low, mode_M, pn_Load_M);
+       high   = new_rd_Load(dbg, block, proj_m, high, mode, volatility);
+
+       foreach_out_edge_safe(node, edge) {
+               ir_node *proj = get_edge_src_irn(edge);
+               if (!is_Proj(proj))
+                       continue;
 
-       for (proj = (ir_node*)get_irn_link(node); proj;
-            proj = (ir_node*)get_irn_link(proj)) {
                switch (get_Proj_proj(proj)) {
                case pn_Load_M:         /* Memory result. */
                        /* put it to the second one */
@@ -399,7 +395,7 @@ static void lower_Load(ir_node *node, ir_mode *mode, lower_env_t *env)
                case pn_Load_res: {       /* Result of load operation. */
                        ir_node *res_low  = new_r_Proj(low,  low_mode, pn_Load_res);
                        ir_node *res_high = new_r_Proj(high, mode,     pn_Load_res);
-                       set_lowered(env, proj, res_low, res_high);
+                       ir_set_dw_lowered(proj, res_low, res_high);
                        break;
                }
                default:
@@ -414,14 +410,14 @@ static void lower_Load(ir_node *node, ir_mode *mode, lower_env_t *env)
 /**
  * Translate a Store: create two.
  */
-static void lower_Store(ir_node *node, ir_mode *mode, lower_env_t *env)
-{
-       ir_graph     *irg;
-       ir_node      *block, *adr, *mem;
-       ir_node      *low, *high, *proj;
-       dbg_info     *dbg;
-       ir_node            *value = get_Store_value(node);
-       const node_entry_t *entry = get_node_entry(env, value);
+static void lower_Store(ir_node *node, ir_mode *mode)
+{
+       ir_graph              *irg;
+       ir_node               *block, *adr, *mem;
+       ir_node               *low, *high, *proj_m;
+       dbg_info              *dbg;
+       ir_node               *value = get_Store_value(node);
+       const lower64_entry_t *entry = get_node_entry(value);
        ir_cons_flags volatility = get_Store_volatility(node) == volatility_is_volatile
                                   ? cons_volatile : cons_none;
        (void) mode;
@@ -448,15 +444,16 @@ static void lower_Store(ir_node *node, ir_mode *mode, lower_env_t *env)
        }
 
        /* create two Stores */
-       dbg = get_irn_dbg_info(node);
-       low  = new_rd_Store(dbg, block, mem, low,  entry->low_word, volatility);
-       proj = new_r_Proj(low, mode_M, pn_Store_M);
-       high = new_rd_Store(dbg, block, proj, high, entry->high_word, volatility);
-
-       set_lowered(env, node, low, high);
+       dbg    = get_irn_dbg_info(node);
+       low    = new_rd_Store(dbg, block, mem, low,  entry->low_word, volatility);
+       proj_m = new_r_Proj(low, mode_M, pn_Store_M);
+       high   = new_rd_Store(dbg, block, proj_m, high, entry->high_word, volatility);
+
+       foreach_out_edge_safe(node, edge) {
+               ir_node *proj = get_edge_src_irn(edge);
+               if (!is_Proj(proj))
+                       continue;
 
-       for (proj = (ir_node*)get_irn_link(node); proj;
-            proj = (ir_node*)get_irn_link(proj)) {
                switch (get_Proj_proj(proj)) {
                case pn_Store_M:         /* Memory result. */
                        /* put it to the second one */
@@ -485,8 +482,7 @@ static void lower_Store(ir_node *node, ir_mode *mode, lower_env_t *env)
  * @param env     the lower environment
  */
 static ir_node *get_intrinsic_address(ir_type *method, ir_op *op,
-                                      ir_mode *imode, ir_mode *omode,
-                                      lower_env_t *env)
+                                      ir_mode *imode, ir_mode *omode)
 {
        symconst_symbol sym;
        ir_entity *ent;
@@ -497,8 +493,8 @@ static ir_node *get_intrinsic_address(ir_type *method, ir_op *op,
        key.omode = omode;
        key.ent   = NULL;
 
-       entry = (op_mode_entry_t*)set_insert(intrinsic_fkt, &key, sizeof(key),
-                               HASH_PTR(op) ^ HASH_PTR(imode) ^ (HASH_PTR(omode) << 8));
+       entry = set_insert(op_mode_entry_t, intrinsic_fkt, &key, sizeof(key),
+                               hash_ptr(op) ^ hash_ptr(imode) ^ (hash_ptr(omode) << 8));
        if (! entry->ent) {
                /* create a new one */
                ent = env->params->create_intrinsic(method, op, imode, omode, env->params->ctx);
@@ -517,49 +513,64 @@ static ir_node *get_intrinsic_address(ir_type *method, ir_op *op,
  *
  * Create an intrinsic Call.
  */
-static void lower_Div(ir_node *node, ir_mode *mode, lower_env_t *env)
-{
-       ir_node            *left        = get_Div_left(node);
-       ir_node            *right       = get_Div_right(node);
-       const node_entry_t *left_entry  = get_node_entry(env, left);
-       const node_entry_t *right_entry = get_node_entry(env, right);
-       ir_node            *block       = get_nodes_block(node);
-       dbg_info           *dbgi        = get_irn_dbg_info(node);
-       ir_type            *mtp
-               = mode_is_signed(mode) ? binop_tp_s : binop_tp_u;
-       ir_mode            *opmode = get_irn_op_mode(node);
-       ir_node            *addr
-               = get_intrinsic_address(mtp, get_irn_op(node), opmode, opmode, env);
-       ir_node  *in[4] = {
-               left_entry->low_word, left_entry->high_word,
-               right_entry->low_word, right_entry->high_word };
-       ir_node            *call
-               = new_rd_Call(dbgi, block, get_Div_mem(node), addr, 4, in, mtp);
-       ir_node            *resproj = new_r_Proj(call, mode_T, pn_Call_T_result);
-       ir_node            *proj;
+static void lower_Div(ir_node *node, ir_mode *mode)
+{
+       ir_node  *left   = get_Div_left(node);
+       ir_node  *right  = get_Div_right(node);
+       ir_node  *block  = get_nodes_block(node);
+       dbg_info *dbgi   = get_irn_dbg_info(node);
+       ir_type  *mtp    = mode_is_signed(mode) ? binop_tp_s : binop_tp_u;
+       ir_mode  *opmode = get_irn_op_mode(node);
+       ir_node  *addr   = get_intrinsic_address(mtp, get_irn_op(node), opmode, opmode);
+       ir_node  *in[4];
+       ir_node  *call;
+       ir_node  *resproj;
 
+       if (env->params->little_endian) {
+               in[0] = get_lowered_low(left);
+               in[1] = get_lowered_high(left);
+               in[2] = get_lowered_low(right);
+               in[3] = get_lowered_high(right);
+       } else {
+               in[0] = get_lowered_high(left);
+               in[1] = get_lowered_low(left);
+               in[2] = get_lowered_high(right);
+               in[3] = get_lowered_low(right);
+       }
+       call    = new_rd_Call(dbgi, block, get_Div_mem(node), addr, 4, in, mtp);
+       resproj = new_r_Proj(call, mode_T, pn_Call_T_result);
        set_irn_pinned(call, get_irn_pinned(node));
 
-       for (proj = (ir_node*)get_irn_link(node); proj;
-            proj = (ir_node*)get_irn_link(proj)) {
+       foreach_out_edge_safe(node, edge) {
+               ir_node *proj = get_edge_src_irn(edge);
+               if (!is_Proj(proj))
+                       continue;
+
                switch (get_Proj_proj(proj)) {
                case pn_Div_M:         /* Memory result. */
                        /* reroute to the call */
                        set_Proj_pred(proj, call);
                        set_Proj_proj(proj, pn_Call_M);
                        break;
-               case pn_Div_X_except:  /* Execution result if exception occurred. */
-                       /* reroute to the call */
+               case pn_Div_X_regular:
+                       set_Proj_pred(proj, call);
+                       set_Proj_proj(proj, pn_Call_X_regular);
+                       break;
+               case pn_Div_X_except:
                        set_Proj_pred(proj, call);
                        set_Proj_proj(proj, pn_Call_X_except);
                        break;
-               case pn_Div_res: {
-                       /* Result of computation. */
-                       ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
-                       ir_node *res_high = new_r_Proj(resproj, mode,              1);
-                       set_lowered(env, proj, res_low, res_high);
+               case pn_Div_res:
+                       if (env->params->little_endian) {
+                               ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
+                               ir_node *res_high = new_r_Proj(resproj, mode,              1);
+                               ir_set_dw_lowered(proj, res_low, res_high);
+                       } else {
+                               ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 1);
+                               ir_node *res_high = new_r_Proj(resproj, mode,              0);
+                               ir_set_dw_lowered(proj, res_low, res_high);
+                       }
                        break;
-               }
                default:
                        assert(0 && "unexpected Proj number");
                }
@@ -574,49 +585,64 @@ static void lower_Div(ir_node *node, ir_mode *mode, lower_env_t *env)
  *
  * Create an intrinsic Call.
  */
-static void lower_Mod(ir_node *node, ir_mode *mode, lower_env_t *env)
-{
-       ir_node            *left        = get_Mod_left(node);
-       ir_node            *right       = get_Mod_right(node);
-       const node_entry_t *left_entry  = get_node_entry(env, left);
-       const node_entry_t *right_entry = get_node_entry(env, right);
-       ir_node            *in[4]       = {
-               left_entry->low_word, left_entry->high_word,
-               right_entry->low_word, right_entry->high_word
-       };
-       dbg_info           *dbgi        = get_irn_dbg_info(node);
-       ir_node            *block       = get_nodes_block(node);
-       ir_type            *mtp
-               = mode_is_signed(mode) ? binop_tp_s : binop_tp_u;
-       ir_mode            *opmode = get_irn_op_mode(node);
-       ir_node            *addr
-               = get_intrinsic_address(mtp, get_irn_op(node), opmode, opmode, env);
-       ir_node            *call
-               = new_rd_Call(dbgi, block, get_Mod_mem(node), addr, 4, in, mtp);
-       ir_node            *resproj = new_r_Proj(call, mode_T, pn_Call_T_result);
-       ir_node            *proj;
+static void lower_Mod(ir_node *node, ir_mode *mode)
+{
+       ir_node  *left   = get_Mod_left(node);
+       ir_node  *right  = get_Mod_right(node);
+       dbg_info *dbgi   = get_irn_dbg_info(node);
+       ir_node  *block  = get_nodes_block(node);
+       ir_type  *mtp    = mode_is_signed(mode) ? binop_tp_s : binop_tp_u;
+       ir_mode  *opmode = get_irn_op_mode(node);
+       ir_node  *addr   = get_intrinsic_address(mtp, get_irn_op(node), opmode, opmode);
+       ir_node  *in[4];
+       ir_node  *call;
+       ir_node  *resproj;
+
+       if (env->params->little_endian) {
+               in[0] = get_lowered_low(left);
+               in[1] = get_lowered_high(left);
+               in[2] = get_lowered_low(right);
+               in[3] = get_lowered_high(right);
+       } else {
+               in[0] = get_lowered_high(left);
+               in[1] = get_lowered_low(left);
+               in[2] = get_lowered_high(right);
+               in[3] = get_lowered_low(right);
+       }
+       call    = new_rd_Call(dbgi, block, get_Mod_mem(node), addr, 4, in, mtp);
+       resproj = new_r_Proj(call, mode_T, pn_Call_T_result);
        set_irn_pinned(call, get_irn_pinned(node));
 
-       for (proj = (ir_node*)get_irn_link(node); proj;
-            proj = (ir_node*)get_irn_link(proj)) {
+       foreach_out_edge_safe(node, edge) {
+               ir_node *proj = get_edge_src_irn(edge);
+               if (!is_Proj(proj))
+                       continue;
+
                switch (get_Proj_proj(proj)) {
                case pn_Mod_M:         /* Memory result. */
                        /* reroute to the call */
                        set_Proj_pred(proj, call);
                        set_Proj_proj(proj, pn_Call_M);
                        break;
-               case pn_Mod_X_except:  /* Execution result if exception occurred. */
-                       /* reroute to the call */
+               case pn_Div_X_regular:
+                       set_Proj_pred(proj, call);
+                       set_Proj_proj(proj, pn_Call_X_regular);
+                       break;
+               case pn_Mod_X_except:
                        set_Proj_pred(proj, call);
                        set_Proj_proj(proj, pn_Call_X_except);
                        break;
-               case pn_Mod_res: {
-                       /* Result of computation. */
-                       ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
-                       ir_node *res_high = new_r_Proj(resproj, mode,              1);
-                       set_lowered(env, proj, res_low, res_high);
+               case pn_Mod_res:
+                       if (env->params->little_endian) {
+                               ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
+                               ir_node *res_high = new_r_Proj(resproj, mode,              1);
+                               ir_set_dw_lowered(proj, res_low, res_high);
+                       } else {
+                               ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 1);
+                               ir_node *res_high = new_r_Proj(resproj, mode,              0);
+                               ir_set_dw_lowered(proj, res_low, res_high);
+                       }
                        break;
-               }
                default:
                        assert(0 && "unexpected Proj number");
                }
@@ -631,227 +657,383 @@ static void lower_Mod(ir_node *node, ir_mode *mode, lower_env_t *env)
  *
  * Create an intrinsic Call.
  */
-static void lower_binop(ir_node *node, ir_mode *mode, lower_env_t *env)
-{
-       ir_node            *left        = get_binop_left(node);
-       ir_node            *right       = get_binop_right(node);
-       const node_entry_t *left_entry  = get_node_entry(env, left);
-       const node_entry_t *right_entry = get_node_entry(env, right);
-       ir_node            *in[4]       = {
-               left_entry->low_word, left_entry->high_word,
-               right_entry->low_word, right_entry->high_word
-       };
-       dbg_info           *dbgi        = get_irn_dbg_info(node);
-       ir_node            *block       = get_nodes_block(node);
-       ir_graph           *irg         = get_irn_irg(block);
-       ir_type            *mtp
-               = mode_is_signed(mode) ? binop_tp_s : binop_tp_u;
-       ir_node            *addr
-               = get_intrinsic_address(mtp, get_irn_op(node), mode, mode, env);
-       ir_node            *call
-               = new_rd_Call(dbgi, block, get_irg_no_mem(irg), addr, 4, in, mtp);
-       ir_node            *resproj  = new_r_Proj(call, mode_T, pn_Call_T_result);
-       ir_node            *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
-       ir_node            *res_high = new_r_Proj(resproj, mode,              1);
+static void lower_binop(ir_node *node, ir_mode *mode)
+{
+       ir_node  *left  = get_binop_left(node);
+       ir_node  *right = get_binop_right(node);
+       dbg_info *dbgi  = get_irn_dbg_info(node);
+       ir_node  *block = get_nodes_block(node);
+       ir_graph *irg   = get_irn_irg(block);
+       ir_type  *mtp   = mode_is_signed(mode) ? binop_tp_s : binop_tp_u;
+       ir_node  *addr  = get_intrinsic_address(mtp, get_irn_op(node), mode, mode);
+       ir_node  *in[4];
+       ir_node  *call;
+       ir_node  *resproj;
+
+       if (env->params->little_endian) {
+               in[0] = get_lowered_low(left);
+               in[1] = get_lowered_high(left);
+               in[2] = get_lowered_low(right);
+               in[3] = get_lowered_high(right);
+       } else {
+               in[0] = get_lowered_high(left);
+               in[1] = get_lowered_low(left);
+               in[2] = get_lowered_high(right);
+               in[3] = get_lowered_low(right);
+       }
+       call    = new_rd_Call(dbgi, block, get_irg_no_mem(irg), addr, 4, in, mtp);
+       resproj = new_r_Proj(call, mode_T, pn_Call_T_result);
        set_irn_pinned(call, get_irn_pinned(node));
-       set_lowered(env, node, res_low, res_high);
+
+       if (env->params->little_endian) {
+               ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
+               ir_node *res_high = new_r_Proj(resproj, mode,              1);
+               ir_set_dw_lowered(node, res_low, res_high);
+       } else {
+               ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 1);
+               ir_node *res_high = new_r_Proj(resproj, mode,              0);
+               ir_set_dw_lowered(node, res_low, res_high);
+       }
+}
+
+static ir_node *create_conv(ir_node *block, ir_node *node, ir_mode *dest_mode)
+{
+       if (get_irn_mode(node) == dest_mode)
+               return node;
+       return new_r_Conv(block, node, dest_mode);
 }
 
 /**
- * Translate a Shiftop.
- *
- * Create an intrinsic Call.
+ * Moves node and all predecessors of node from from_bl to to_bl.
+ * Does not move predecessors of Phi nodes (or block nodes).
  */
-static void lower_Shiftop(ir_node *node, ir_mode *mode, lower_env_t *env)
-{
-       ir_node            *block      = get_nodes_block(node);
-       ir_node            *left       = get_binop_left(node);
-       const node_entry_t *left_entry = get_node_entry(env, left);
-       ir_node            *right      = get_binop_right(node);
-       ir_node            *in[3]      = {
-               left_entry->low_word, left_entry->high_word,
-               /* it should be safe to conv to low_unsigned */
-               new_r_Conv(block, right, env->low_unsigned)
-       };
-       dbg_info           *dbgi       = get_irn_dbg_info(node);
-       ir_graph           *irg        = get_irn_irg(block);
-       ir_type            *mtp
-               = mode_is_signed(mode) ? shiftop_tp_s : shiftop_tp_u;
-       ir_node            *addr
-               = get_intrinsic_address(mtp, get_irn_op(node), mode, mode, env);
-       ir_node            *call
-               = new_rd_Call(dbgi, block, get_irg_no_mem(irg), addr, 3, in, mtp);
-       ir_node            *resproj  = new_r_Proj(call, mode_T, pn_Call_T_result);
-       ir_node            *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
-       ir_node            *res_high = new_r_Proj(resproj, mode,              1);
+static void move(ir_node *node, ir_node *from_bl, ir_node *to_bl)
+{
+       int i, arity;
+
+       /* move this node */
+       set_nodes_block(node, to_bl);
+
+       /* move its Projs */
+       if (get_irn_mode(node) == mode_T) {
+               foreach_out_edge(node, edge) {
+                       ir_node *proj = get_edge_src_irn(edge);
+                       if (!is_Proj(proj))
+                               continue;
+                       move(proj, from_bl, to_bl);
+               }
+       }
 
-       set_irn_pinned(call, get_irn_pinned(node));
-       set_lowered(env, node, res_low, res_high);
+       /* We must not move predecessors of Phi nodes, even if they are in
+        * from_bl. (because these are values from an earlier loop iteration
+        * which are not predecessors of node here)
+        */
+       if (is_Phi(node))
+               return;
+
+       /* recursion ... */
+       arity = get_irn_arity(node);
+       for (i = 0; i < arity; i++) {
+               ir_node *pred      = get_irn_n(node, i);
+               ir_mode *pred_mode = get_irn_mode(pred);
+               if (get_nodes_block(pred) == from_bl)
+                       move(pred, from_bl, to_bl);
+               if (pred_mode == env->high_signed || pred_mode == env->high_unsigned) {
+                       ir_node *pred_low  = get_lowered_low(pred);
+                       ir_node *pred_high = get_lowered_high(pred);
+                       if (get_nodes_block(pred_low) == from_bl)
+                               move(pred_low, from_bl, to_bl);
+                       if (pred_high != NULL && get_nodes_block(pred_high) == from_bl)
+                               move(pred_high, from_bl, to_bl);
+               }
+       }
 }
 
 /**
- * Translate a Shr and handle special cases.
+ * We need a custom version of part_block_edges because during transformation
+ * not all data-dependencies are explicit yet if a lowered nodes users are not
+ * lowered yet.
+ * We can fix this by modifying move to look for such implicit dependencies.
+ * Additionally we have to keep the proj_2_block map updated
  */
-static void lower_Shr(ir_node *node, ir_mode *mode, lower_env_t *env)
+static ir_node *part_block_dw(ir_node *node)
 {
-       ir_graph *irg   = get_irn_irg(node);
-       ir_node  *right = get_Shr_right(node);
+       ir_graph *irg        = get_irn_irg(node);
+       ir_node  *old_block  = get_nodes_block(node);
+       int       n_cfgpreds = get_Block_n_cfgpreds(old_block);
+       ir_node **cfgpreds   = get_Block_cfgpred_arr(old_block);
+       ir_node  *new_block  = new_r_Block(irg, n_cfgpreds, cfgpreds);
+
+       /* old_block has no predecessors anymore for now */
+       set_irn_in(old_block, 0, NULL);
+
+       /* move node and its predecessors to new_block */
+       move(node, old_block, new_block);
+
+       /* move Phi nodes to new_block */
+       foreach_out_edge_safe(old_block, edge) {
+               ir_node *phi = get_edge_src_irn(edge);
+               if (!is_Phi(phi))
+                       continue;
+               set_nodes_block(phi, new_block);
+       }
+       return old_block;
+}
 
-       if (get_mode_arithmetic(mode) == irma_twos_complement && is_Const(right)) {
-               ir_tarval *tv = get_Const_tarval(right);
+typedef ir_node* (*new_rd_shr_func)(dbg_info *dbgi, ir_node *block,
+                                    ir_node *left, ir_node *right,
+                                    ir_mode *mode);
 
-               if (tarval_is_long(tv) &&
-                   get_tarval_long(tv) >= (long)get_mode_size_bits(mode)) {
-                       ir_node *block        = get_nodes_block(node);
-                       ir_node *left         = get_Shr_left(node);
-                       ir_mode *low_unsigned = env->low_unsigned;
-                       long shf_cnt = get_tarval_long(tv) - get_mode_size_bits(mode);
-                       const node_entry_t *left_entry = get_node_entry(env, left);
-                       ir_node *res_low;
-                       ir_node *res_high;
+static void lower_shr_helper(ir_node *node, ir_mode *mode,
+                             new_rd_shr_func new_rd_shrs)
+{
+       ir_node  *right         = get_binop_right(node);
+       ir_node  *left          = get_binop_left(node);
+       ir_mode  *shr_mode      = get_irn_mode(node);
+       unsigned  modulo_shift  = get_mode_modulo_shift(shr_mode);
+       ir_mode  *low_unsigned  = env->low_unsigned;
+       unsigned  modulo_shift2 = get_mode_modulo_shift(mode);
+       ir_graph *irg           = get_irn_irg(node);
+       ir_node  *left_low      = get_lowered_low(left);
+       ir_node  *left_high     = get_lowered_high(left);
+       dbg_info *dbgi          = get_irn_dbg_info(node);
+       ir_node  *lower_block;
+       ir_node  *block;
+       ir_node  *cnst;
+       ir_node  *andn;
+       ir_node  *cmp;
+       ir_node  *cond;
+       ir_node  *proj_true;
+       ir_node  *proj_false;
+       ir_node  *phi_low;
+       ir_node  *phi_high;
+       ir_node  *lower_in[2];
+       ir_node  *phi_low_in[2];
+       ir_node  *phi_high_in[2];
+
+       /* this version is optimized for modulo shift architectures
+        * (and can't handle anything else) */
+       if (modulo_shift != get_mode_size_bits(shr_mode)
+                       || modulo_shift2<<1 != modulo_shift) {
+               panic("Shr lowering only implemented for modulo shift shr operations");
+       }
+       if (!is_po2(modulo_shift) || !is_po2(modulo_shift2)) {
+               panic("Shr lowering only implemented for power-of-2 modes");
+       }
+       /* without 2-complement the -x instead of (bit_width-x) trick won't work */
+       if (get_mode_arithmetic(shr_mode) != irma_twos_complement) {
+               panic("Shr lowering only implemented for two-complement modes");
+       }
 
-                       left = left_entry->high_word;
+       block = get_nodes_block(node);
 
-                       /* convert high word into low_unsigned mode if necessary */
-                       if (get_irn_mode(left) != low_unsigned)
-                               left = new_r_Conv(block, left, low_unsigned);
+       /* if the right operand is a 64bit value, we're only interested in the
+        * lower word */
+       if (get_irn_mode(right) == env->high_unsigned) {
+               right = get_lowered_low(right);
+       } else {
+               /* shift should never have signed mode on the right */
+               assert(get_irn_mode(right) != env->high_signed);
+               right = create_conv(block, right, low_unsigned);
+       }
 
-                       if (shf_cnt > 0) {
-                               ir_node *c = new_r_Const_long(irg, low_unsigned, shf_cnt);
-                               res_low = new_r_Shr(block, left, c, low_unsigned);
-                       } else {
-                               res_low = left;
-                       }
-                       res_high = new_r_Const(irg, get_mode_null(mode));
-                       set_lowered(env, node, res_low, res_high);
+       lower_block = part_block_dw(node);
+       env->flags |= CF_CHANGED;
+       block = get_nodes_block(node);
 
-                       return;
+       /* add a Cmp to test if highest bit is set <=> whether we shift more
+        * than half the word width */
+       cnst       = new_r_Const_long(irg, low_unsigned, modulo_shift2);
+       andn       = new_r_And(block, right, cnst, low_unsigned);
+       cnst       = new_r_Const(irg, get_mode_null(low_unsigned));
+       cmp        = new_rd_Cmp(dbgi, block, andn, cnst, ir_relation_equal);
+       cond       = new_rd_Cond(dbgi, block, cmp);
+       proj_true  = new_r_Proj(cond, mode_X, pn_Cond_true);
+       proj_false = new_r_Proj(cond, mode_X, pn_Cond_false);
+
+       /* the true block => shift_width < 1word */
+       {
+               /* In theory the low value (for 64bit shifts) is:
+                *    Or(High << (32-x)), Low >> x)
+                * In practice High << 32-x will fail when x is zero (since we have
+                * modulo shift and 32 will be 0). So instead we use:
+                *    Or(High<<1<<~x, Low >> x)
+                */
+               ir_node *in[1]        = { proj_true };
+               ir_node *block_true   = new_r_Block(irg, ARRAY_SIZE(in), in);
+               ir_node *res_high     = new_rd_shrs(dbgi, block_true, left_high,
+                                                   right, mode);
+               ir_node *shift_low    = new_rd_Shr(dbgi, block_true, left_low, right,
+                                                  low_unsigned);
+               ir_node *not_shiftval = new_rd_Not(dbgi, block_true, right,
+                                                  low_unsigned);
+               ir_node *conv         = create_conv(block_true, left_high,
+                                                   low_unsigned);
+               ir_node *one          = new_r_Const(irg, get_mode_one(low_unsigned));
+               ir_node *carry0       = new_rd_Shl(dbgi, block_true, conv, one,
+                                                  low_unsigned);
+               ir_node *carry1       = new_rd_Shl(dbgi, block_true, carry0,
+                                                  not_shiftval, low_unsigned);
+               ir_node *res_low      = new_rd_Or(dbgi, block_true, shift_low, carry1,
+                                                 low_unsigned);
+               lower_in[0]           = new_r_Jmp(block_true);
+               phi_low_in[0]         = res_low;
+               phi_high_in[0]        = res_high;
+       }
+
+       /* false block => shift_width > 1word */
+       {
+               ir_node *in[1]       = { proj_false };
+               ir_node *block_false = new_r_Block(irg, ARRAY_SIZE(in), in);
+               ir_node *conv        = create_conv(block_false, left_high, low_unsigned);
+               ir_node *res_low     = new_rd_shrs(dbgi, block_false, conv, right,
+                                                  low_unsigned);
+               int      cnsti       = modulo_shift2-1;
+               ir_node *cnst2       = new_r_Const_long(irg, low_unsigned, cnsti);
+               ir_node *res_high;
+               if (new_rd_shrs == new_rd_Shrs) {
+                       res_high = new_rd_shrs(dbgi, block_false, left_high, cnst2, mode);
+               } else {
+                       res_high = new_r_Const(irg, get_mode_null(mode));
                }
-       }
-       lower_Shiftop(node, mode, env);
+               lower_in[1]          = new_r_Jmp(block_false);
+               phi_low_in[1]        = res_low;
+               phi_high_in[1]       = res_high;
+       }
+
+       /* patch lower block */
+       set_irn_in(lower_block, ARRAY_SIZE(lower_in), lower_in);
+       phi_low  = new_r_Phi(lower_block, ARRAY_SIZE(phi_low_in), phi_low_in,
+                            low_unsigned);
+       phi_high = new_r_Phi(lower_block, ARRAY_SIZE(phi_high_in), phi_high_in,
+                            mode);
+       ir_set_dw_lowered(node, phi_low, phi_high);
 }
 
-/**
- * Translate a Shl and handle special cases.
- */
-static void lower_Shl(ir_node *node, ir_mode *mode, lower_env_t *env)
-{
-       ir_graph *irg   = get_irn_irg(node);
-       ir_node  *right = get_Shl_right(node);
-
-       if (get_mode_arithmetic(mode) == irma_twos_complement && is_Const(right)) {
-               ir_tarval *tv = get_Const_tarval(right);
-
-               if (tarval_is_long(tv)) {
-                       long value = get_tarval_long(tv);
-                   if (value >= (long)get_mode_size_bits(mode)) {
-                               /* simple case: shift above the lower word */
-                               ir_mode *mode_l;
-                               ir_node *block = get_nodes_block(node);
-                               ir_node *left = get_Shl_left(node);
-                               ir_node *c;
-                               long shf_cnt = get_tarval_long(tv) - get_mode_size_bits(mode);
-                               const node_entry_t *left_entry = get_node_entry(env, left);
-                               ir_node  *res_low;
-                               ir_node  *res_high;
-
-                               left = left_entry->low_word;
-                               left = new_r_Conv(block, left, mode);
-
-                               mode_l = env->low_unsigned;
-                               if (shf_cnt > 0) {
-                                       c        = new_r_Const_long(irg, mode_l, shf_cnt);
-                                       res_high = new_r_Shl(block, left, c, mode);
-                               } else {
-                                       res_high = left;
-                               }
-                               res_low = new_r_Const(irg, get_mode_null(mode_l));
-                               set_lowered(env, node, res_low, res_high);
-
-                               return;
-                       }
-                       if (value == 1) {
-                               /* left << 1 == left + left */
-                               ir_node            *left        = get_binop_left(node);
-                               const node_entry_t *left_entry  = get_node_entry(env, left);
-                               ir_node            *in[4]       = {
-                                       left_entry->low_word, left_entry->high_word,
-                                       left_entry->low_word, left_entry->high_word,
-                               };
-                               dbg_info           *dbgi        = get_irn_dbg_info(node);
-                               ir_node            *block       = get_nodes_block(node);
-                               ir_graph           *irg         = get_irn_irg(block);
-                               ir_type            *mtp
-                                       = mode_is_signed(mode) ? binop_tp_s : binop_tp_u;
-                               ir_node            *addr
-                                       = get_intrinsic_address(mtp, op_Add, mode, mode, env);
-                               ir_node            *call
-                                       = new_rd_Call(dbgi, block, get_irg_no_mem(irg), addr, 4, in, mtp);
-                               ir_node            *resproj  = new_r_Proj(call, mode_T, pn_Call_T_result);
-                               ir_node            *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
-                               ir_node            *res_high = new_r_Proj(resproj, mode,              1);
-                               set_irn_pinned(call, get_irn_pinned(node));
-                               set_lowered(env, node, res_low, res_high);
-
-                               return;
-                       }
-               }
-       }
-       lower_Shiftop(node, mode, env);
+static void lower_Shr(ir_node *node, ir_mode *mode)
+{
+       lower_shr_helper(node, mode, new_rd_Shr);
 }
 
-/**
- * Translate a Shrs and handle special cases.
- */
-static void lower_Shrs(ir_node *node, ir_mode *mode, lower_env_t *env)
-{
-       ir_graph *irg   = get_irn_irg(node);
-       ir_node  *right = get_Shrs_right(node);
-
-       if (get_mode_arithmetic(mode) == irma_twos_complement && is_Const(right)) {
-               ir_tarval *tv = get_Const_tarval(right);
-
-               if (tarval_is_long(tv) &&
-                   get_tarval_long(tv) >= (long)get_mode_size_bits(mode)) {
-                       ir_node *block         = get_nodes_block(node);
-                       ir_node *left          = get_Shrs_left(node);
-                       ir_mode *low_unsigned  = env->low_unsigned;
-                       long     shf_cnt       = get_tarval_long(tv) - get_mode_size_bits(mode);
-                       const node_entry_t *left_entry = get_node_entry(env, left);
-                       ir_node *left_unsigned = left;
-                       ir_node *res_low;
-                       ir_node *res_high;
-                       ir_node *c;
-
-                       left = left_entry->high_word;
-
-                       /* convert high word into low_unsigned mode if necessary */
-                       if (get_irn_mode(left_unsigned) != low_unsigned)
-                               left_unsigned = new_r_Conv(block, left, low_unsigned);
-
-                       if (shf_cnt > 0) {
-                               c       = new_r_Const_long(irg, low_unsigned, shf_cnt);
-                               res_low = new_r_Shrs(block, left_unsigned, c, low_unsigned);
-                       } else {
-                               res_low = left_unsigned;
-                       }
+static void lower_Shrs(ir_node *node, ir_mode *mode)
+{
+       lower_shr_helper(node, mode, new_rd_Shrs);
+}
 
-                       c        = new_r_Const(irg, get_mode_all_one(low_unsigned));
-                       res_high = new_r_Shrs(block, left, c, mode);
-                       set_lowered(env, node, res_low, res_high);
-                       return;
-               }
+static void lower_Shl(ir_node *node, ir_mode *mode)
+{
+       ir_node  *right         = get_binop_right(node);
+       ir_node  *left          = get_binop_left(node);
+       ir_mode  *shr_mode      = get_irn_mode(node);
+       unsigned  modulo_shift  = get_mode_modulo_shift(shr_mode);
+       ir_mode  *low_unsigned  = env->low_unsigned;
+       unsigned  modulo_shift2 = get_mode_modulo_shift(mode);
+       ir_graph *irg           = get_irn_irg(node);
+       ir_node  *left_low      = get_lowered_low(left);
+       ir_node  *left_high     = get_lowered_high(left);
+       dbg_info *dbgi          = get_irn_dbg_info(node);
+       ir_node  *lower_block   = get_nodes_block(node);
+       ir_node  *block;
+       ir_node  *cnst;
+       ir_node  *andn;
+       ir_node  *cmp;
+       ir_node  *cond;
+       ir_node  *proj_true;
+       ir_node  *proj_false;
+       ir_node  *phi_low;
+       ir_node  *phi_high;
+       ir_node  *lower_in[2];
+       ir_node  *phi_low_in[2];
+       ir_node  *phi_high_in[2];
+
+       /* this version is optimized for modulo shift architectures
+        * (and can't handle anything else) */
+       if (modulo_shift != get_mode_size_bits(shr_mode)
+                       || modulo_shift2<<1 != modulo_shift) {
+               panic("Shl lowering only implemented for modulo shift shl operations");
+       }
+       if (!is_po2(modulo_shift) || !is_po2(modulo_shift2)) {
+               panic("Shl lowering only implemented for power-of-2 modes");
+       }
+       /* without 2-complement the -x instead of (bit_width-x) trick won't work */
+       if (get_mode_arithmetic(shr_mode) != irma_twos_complement) {
+               panic("Shl lowering only implemented for two-complement modes");
+       }
+
+       /* if the right operand is a 64bit value, we're only interested in the
+        * lower word */
+       if (get_irn_mode(right) == env->high_unsigned) {
+               right = get_lowered_low(right);
+       } else {
+               /* shift should never have signed mode on the right */
+               assert(get_irn_mode(right) != env->high_signed);
+               right = create_conv(lower_block, right, low_unsigned);
        }
-       lower_Shiftop(node, mode, env);
+
+       part_block_dw(node);
+       env->flags |= CF_CHANGED;
+       block = get_nodes_block(node);
+
+       /* add a Cmp to test if highest bit is set <=> whether we shift more
+        * than half the word width */
+       cnst       = new_r_Const_long(irg, low_unsigned, modulo_shift2);
+       andn       = new_r_And(block, right, cnst, low_unsigned);
+       cnst       = new_r_Const(irg, get_mode_null(low_unsigned));
+       cmp        = new_rd_Cmp(dbgi, block, andn, cnst, ir_relation_equal);
+       cond       = new_rd_Cond(dbgi, block, cmp);
+       proj_true  = new_r_Proj(cond, mode_X, pn_Cond_true);
+       proj_false = new_r_Proj(cond, mode_X, pn_Cond_false);
+
+       /* the true block => shift_width < 1word */
+       {
+               ir_node *in[1]        = { proj_true };
+               ir_node *block_true   = new_r_Block(irg, ARRAY_SIZE(in), in);
+
+               ir_node *res_low      = new_rd_Shl(dbgi, block_true, left_low,
+                                                  right, low_unsigned);
+               ir_node *shift_high   = new_rd_Shl(dbgi, block_true, left_high, right,
+                                                  mode);
+               ir_node *not_shiftval = new_rd_Not(dbgi, block_true, right,
+                                                  low_unsigned);
+               ir_node *conv         = create_conv(block_true, left_low, mode);
+               ir_node *one          = new_r_Const(irg, get_mode_one(low_unsigned));
+               ir_node *carry0       = new_rd_Shr(dbgi, block_true, conv, one, mode);
+               ir_node *carry1       = new_rd_Shr(dbgi, block_true, carry0,
+                                                  not_shiftval, mode);
+               ir_node *res_high     = new_rd_Or(dbgi, block_true, shift_high, carry1,
+                                                 mode);
+               lower_in[0]           = new_r_Jmp(block_true);
+               phi_low_in[0]         = res_low;
+               phi_high_in[0]        = res_high;
+       }
+
+       /* false block => shift_width > 1word */
+       {
+               ir_node *in[1]       = { proj_false };
+               ir_node *block_false = new_r_Block(irg, ARRAY_SIZE(in), in);
+               ir_node *res_low     = new_r_Const(irg, get_mode_null(low_unsigned));
+               ir_node *conv        = create_conv(block_false, left_low, mode);
+               ir_node *res_high    = new_rd_Shl(dbgi, block_false, conv, right, mode);
+               lower_in[1]          = new_r_Jmp(block_false);
+               phi_low_in[1]        = res_low;
+               phi_high_in[1]       = res_high;
+       }
+
+       /* patch lower block */
+       set_irn_in(lower_block, ARRAY_SIZE(lower_in), lower_in);
+       phi_low  = new_r_Phi(lower_block, ARRAY_SIZE(phi_low_in), phi_low_in,
+                            low_unsigned);
+       phi_high = new_r_Phi(lower_block, ARRAY_SIZE(phi_high_in), phi_high_in,
+                            mode);
+       ir_set_dw_lowered(node, phi_low, phi_high);
 }
 
 /**
  * Rebuild Rotl nodes into Or(Shl, Shr) and prepare all nodes.
  */
-static void prepare_links_and_handle_rotl(ir_node *node, void *env)
+static void prepare_links_and_handle_rotl(ir_node *node, void *data)
 {
-       lower_env_t *lenv = (lower_env_t*)env;
-
+       (void) data;
        if (is_Rotl(node)) {
                ir_mode  *mode = get_irn_op_mode(node);
                ir_node  *right;
@@ -861,8 +1043,8 @@ static void prepare_links_and_handle_rotl(ir_node *node, void *env)
                dbg_info *dbg;
                optimization_state_t state;
 
-               if (mode != lenv->high_signed && mode != lenv->high_unsigned) {
-                       prepare_links(lenv, node);
+               if (mode != env->high_signed && mode != env->high_unsigned) {
+                       prepare_links(node);
                        return;
                }
 
@@ -888,15 +1070,15 @@ static void prepare_links_and_handle_rotl(ir_node *node, void *env)
                exchange(node, ornode);
 
                /* do lowering on the new nodes */
-               prepare_links(lenv, shl);
-               prepare_links(lenv, c);
-               prepare_links(lenv, sub);
-               prepare_links(lenv, shr);
-               prepare_links(lenv, ornode);
+               prepare_links(shl);
+               prepare_links(c);
+               prepare_links(sub);
+               prepare_links(shr);
+               prepare_links(ornode);
                return;
        }
 
-       prepare_links(lenv, node);
+       prepare_links(node);
 }
 
 /**
@@ -904,24 +1086,40 @@ static void prepare_links_and_handle_rotl(ir_node *node, void *env)
  *
  * Create an intrinsic Call.
  */
-static void lower_Unop(ir_node *node, ir_mode *mode, lower_env_t *env)
-{
-       ir_node            *op       = get_unop_op(node);
-       const node_entry_t *op_entry = get_node_entry(env, op);
-       ir_node            *in[2]    = { op_entry->low_word, op_entry->high_word };
-       dbg_info           *dbgi     = get_irn_dbg_info(node);
-       ir_node            *block    = get_nodes_block(node);
-       ir_graph           *irg      = get_irn_irg(block);
-       ir_type            *mtp      = mode_is_signed(mode) ? unop_tp_s : unop_tp_u;
-       ir_op              *irop     = get_irn_op(node);
-       ir_node            *addr     = get_intrinsic_address(mtp, irop, mode, mode, env);
-       ir_node            *nomem    = get_irg_no_mem(irg);
-       ir_node            *call     = new_rd_Call(dbgi, block, nomem, addr, 2, in, mtp);
-       ir_node            *resproj  = new_r_Proj(call, mode_T, pn_Call_T_result);
-       ir_node            *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
-       ir_node            *res_high = new_r_Proj(resproj, mode,              1);
+static void lower_unop(ir_node *node, ir_mode *mode)
+{
+       ir_node  *op       = get_unop_op(node);
+       dbg_info *dbgi     = get_irn_dbg_info(node);
+       ir_node  *block    = get_nodes_block(node);
+       ir_graph *irg      = get_irn_irg(block);
+       ir_type  *mtp      = mode_is_signed(mode) ? unop_tp_s : unop_tp_u;
+       ir_op    *irop     = get_irn_op(node);
+       ir_node  *addr     = get_intrinsic_address(mtp, irop, mode, mode);
+       ir_node  *nomem    = get_irg_no_mem(irg);
+       ir_node  *in[2];
+       ir_node  *call;
+       ir_node  *resproj;
+
+       if (env->params->little_endian) {
+               in[0] = get_lowered_low(op);
+               in[1] = get_lowered_high(op);
+       } else {
+               in[0] = get_lowered_high(op);
+               in[1] = get_lowered_low(op);
+       }
+       call    = new_rd_Call(dbgi, block, nomem, addr, 2, in, mtp);
+       resproj = new_r_Proj(call, mode_T, pn_Call_T_result);
        set_irn_pinned(call, get_irn_pinned(node));
-       set_lowered(env, node, res_low, res_high);
+
+       if (env->params->little_endian) {
+               ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 0);
+               ir_node *res_high = new_r_Proj(resproj, mode,              1);
+               ir_set_dw_lowered(node, res_low, res_high);
+       } else {
+               ir_node *res_low  = new_r_Proj(resproj, env->low_unsigned, 1);
+               ir_node *res_high = new_r_Proj(resproj, mode,              0);
+               ir_set_dw_lowered(node, res_low, res_high);
+       }
 }
 
 /**
@@ -929,37 +1127,37 @@ static void lower_Unop(ir_node *node, ir_mode *mode, lower_env_t *env)
  *
  * Create two logical binops.
  */
-static void lower_binop_logical(ir_node *node, ir_mode *mode, lower_env_t *env,
+static void lower_binop_logical(ir_node *node, ir_mode *mode,
                                                                ir_node *(*constr_rd)(dbg_info *db, ir_node *block, ir_node *op1, ir_node *op2, ir_mode *mode) )
 {
-       ir_node            *left        = get_binop_left(node);
-       ir_node            *right       = get_binop_right(node);
-       const node_entry_t *left_entry  = get_node_entry(env, left);
-       const node_entry_t *right_entry = get_node_entry(env, right);
-       dbg_info           *dbgi        = get_irn_dbg_info(node);
-       ir_node            *block       = get_nodes_block(node);
-       ir_node            *res_low
+       ir_node               *left        = get_binop_left(node);
+       ir_node               *right       = get_binop_right(node);
+       const lower64_entry_t *left_entry  = get_node_entry(left);
+       const lower64_entry_t *right_entry = get_node_entry(right);
+       dbg_info              *dbgi        = get_irn_dbg_info(node);
+       ir_node               *block       = get_nodes_block(node);
+       ir_node               *res_low
                = constr_rd(dbgi, block, left_entry->low_word, right_entry->low_word,
                            env->low_unsigned);
-       ir_node            *res_high
+       ir_node               *res_high
                = constr_rd(dbgi, block, left_entry->high_word, right_entry->high_word,
                            mode);
-       set_lowered(env, node, res_low, res_high);
+       ir_set_dw_lowered(node, res_low, res_high);
 }
 
-static void lower_And(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_And(ir_node *node, ir_mode *mode)
 {
-       lower_binop_logical(node, mode, env, new_rd_And);
+       lower_binop_logical(node, mode, new_rd_And);
 }
 
-static void lower_Or(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Or(ir_node *node, ir_mode *mode)
 {
-       lower_binop_logical(node, mode, env, new_rd_Or);
+       lower_binop_logical(node, mode, new_rd_Or);
 }
 
-static void lower_Eor(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Eor(ir_node *node, ir_mode *mode)
 {
-       lower_binop_logical(node, mode, env, new_rd_Eor);
+       lower_binop_logical(node, mode, new_rd_Eor);
 }
 
 /**
@@ -967,30 +1165,88 @@ static void lower_Eor(ir_node *node, ir_mode *mode, lower_env_t *env)
  *
  * Create two logical Nots.
  */
-static void lower_Not(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Not(ir_node *node, ir_mode *mode)
 {
-       ir_node            *op       = get_Not_op(node);
-       const node_entry_t *op_entry = get_node_entry(env, op);
-       dbg_info           *dbgi     = get_irn_dbg_info(node);
-       ir_node            *block    = get_nodes_block(node);
-       ir_node            *res_low
+       ir_node               *op       = get_Not_op(node);
+       const lower64_entry_t *op_entry = get_node_entry(op);
+       dbg_info              *dbgi     = get_irn_dbg_info(node);
+       ir_node               *block    = get_nodes_block(node);
+       ir_node               *res_low
                = new_rd_Not(dbgi, block, op_entry->low_word, env->low_unsigned);
-       ir_node            *res_high
+       ir_node               *res_high
                = new_rd_Not(dbgi, block, op_entry->high_word, mode);
-       set_lowered(env, node, res_low, res_high);
+       ir_set_dw_lowered(node, res_low, res_high);
+}
+
+static void lower_Proj(ir_node *node, ir_mode *op_mode)
+{
+       ir_mode *mode = get_irn_mode(node);
+       ir_node *pred;
+       (void)op_mode;
+       if (mode != env->high_signed && mode != env->high_unsigned)
+               return;
+       /* skip tuples */
+       pred = get_Proj_pred(node);
+       if (is_Tuple(pred)) {
+               long                   pn    = get_Proj_proj(node);
+               ir_node               *op    = get_irn_n(pred, pn);
+               const lower64_entry_t *entry = get_node_entry(op);
+               ir_set_dw_lowered(node, entry->low_word, entry->high_word);
+       }
+}
+
+static bool is_equality_cmp(const ir_node *node)
+{
+       ir_relation relation = get_Cmp_relation(node);
+       ir_node    *left     = get_Cmp_left(node);
+       ir_node    *right    = get_Cmp_right(node);
+       ir_mode    *mode     = get_irn_mode(left);
+
+       /* this probably makes no sense if unordered is involved */
+       assert(!mode_is_float(mode));
+
+       if (relation == ir_relation_equal || relation == ir_relation_less_greater)
+               return true;
+
+       if (!is_Const(right) || !is_Const_null(right))
+               return false;
+       if (mode_is_signed(mode)) {
+               return relation == ir_relation_less_greater;
+       } else {
+               return relation == ir_relation_greater;
+       }
+}
+
+static ir_node *get_cfop_destination(const ir_node *cfop)
+{
+       const ir_edge_t *first = get_irn_out_edge_first(cfop);
+       /* we should only have 1 destination */
+       assert(get_irn_n_edges(cfop) == 1);
+       return get_edge_src_irn(first);
+}
+
+static void lower_Switch(ir_node *node, ir_mode *high_mode)
+{
+       ir_node *selector = get_Switch_selector(node);
+       ir_mode *mode     = get_irn_mode(selector);
+       (void)high_mode;
+       if (mode == env->high_signed || mode == env->high_unsigned) {
+               /* we can't really handle Switch with 64bit offsets */
+               panic("Switch with 64bit jumptable not supported");
+       }
+       lower_node(selector);
 }
 
 /**
  * Translate a Cond.
  */
-static void lower_Cond(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Cond(ir_node *node, ir_mode *high_mode)
 {
        ir_node *left, *right, *block;
        ir_node *sel = get_Cond_selector(node);
-       ir_mode *m = get_irn_mode(sel);
        ir_mode *cmp_mode;
-       const node_entry_t *lentry, *rentry;
-       ir_node  *proj, *projT = NULL, *projF = NULL;
+       const lower64_entry_t *lentry, *rentry;
+       ir_node  *projT = NULL, *projF = NULL;
        ir_node  *new_bl, *irn;
        ir_node  *projHF, *projHT;
        ir_node  *dst_blk;
@@ -998,32 +1254,33 @@ static void lower_Cond(ir_node *node, ir_mode *mode, lower_env_t *env)
        ir_graph *irg;
        dbg_info *dbg;
 
-       (void) mode;
+       (void) high_mode;
 
-       if (m != mode_b) {
-               if (m == env->high_signed || m == env->high_unsigned) {
-                       /* bad we can't really handle Switch with 64bit offsets */
-                       panic("Cond with 64bit jumptable not supported");
-               }
+       if (!is_Cmp(sel)) {
+               lower_node(sel);
                return;
        }
 
-       if (!is_Cmp(sel))
-               return;
-
        left     = get_Cmp_left(sel);
        cmp_mode = get_irn_mode(left);
-       if (cmp_mode != env->high_signed && cmp_mode != env->high_unsigned)
+       if (cmp_mode != env->high_signed && cmp_mode != env->high_unsigned) {
+               lower_node(sel);
                return;
+       }
 
        right  = get_Cmp_right(sel);
-       lentry = get_node_entry(env, left);
-       rentry = get_node_entry(env, right);
+       lower_node(left);
+       lower_node(right);
+       lentry = get_node_entry(left);
+       rentry = get_node_entry(right);
 
        /* all right, build the code */
-       for (proj = (ir_node*)get_irn_link(node); proj;
-            proj = (ir_node*)get_irn_link(proj)) {
-               long proj_nr = get_Proj_proj(proj);
+       foreach_out_edge_safe(node, edge) {
+               ir_node *proj    = get_edge_src_irn(edge);
+               long     proj_nr;
+               if (!is_Proj(proj))
+                       continue;
+               proj_nr = get_Proj_proj(proj);
 
                if (proj_nr == pn_Cond_true) {
                        assert(projT == NULL && "more than one Proj(true)");
@@ -1043,25 +1300,25 @@ static void lower_Cond(ir_node *node, ir_mode *mode, lower_env_t *env)
        dbg      = get_irn_dbg_info(sel);
        relation = get_Cmp_relation(sel);
 
-       if (is_Const(right) && is_Const_null(right)) {
-               if (relation == ir_relation_equal || relation == ir_relation_less_greater) {
-                       /* x ==/!= 0 ==> or(low,high) ==/!= 0 */
-                       ir_mode *mode   = env->low_unsigned;
-                       ir_node *low    = new_r_Conv(block, lentry->low_word, mode);
-                       ir_node *high   = new_r_Conv(block, lentry->high_word, mode);
-                       ir_node *ornode = new_rd_Or(dbg, block, low, high, mode);
-                       ir_node *cmp    = new_rd_Cmp(dbg, block, ornode, new_r_Const_long(irg, mode, 0), relation);
-                       set_Cond_selector(node, cmp);
-                       return;
-               }
+       if (is_equality_cmp(sel)) {
+               /* x ==/!= y ==> or(x_low^y_low,x_high^y_high) ==/!= 0 */
+               ir_mode *mode       = env->low_unsigned;
+               ir_node *low_left   = new_rd_Conv(dbg, block, lentry->low_word, mode);
+               ir_node *high_left  = new_rd_Conv(dbg, block, lentry->high_word, mode);
+               ir_node *low_right  = new_rd_Conv(dbg, block, rentry->low_word, mode);
+               ir_node *high_right = new_rd_Conv(dbg, block, rentry->high_word, mode);
+               ir_node *xor_low    = new_rd_Eor(dbg, block, low_left, low_right, mode);
+               ir_node *xor_high   = new_rd_Eor(dbg, block, high_left, high_right, mode);
+               ir_node *ornode = new_rd_Or(dbg, block, xor_low, xor_high, mode);
+               ir_node *cmp    = new_rd_Cmp(dbg, block, ornode, new_r_Const(irg, get_mode_null(mode)), relation);
+               set_Cond_selector(node, cmp);
+               return;
        }
 
        if (relation == ir_relation_equal) {
+               ir_node *proj;
                /* simple case:a == b <==> a_h == b_h && a_l == b_l */
-               pmap_entry *entry = pmap_find(env->proj_2_block, projF);
-
-               assert(entry);
-               dst_blk = (ir_node*)entry->value;
+               dst_blk = get_cfop_destination(projF);
 
                irn = new_rd_Cmp(dbg, block, lentry->high_word, rentry->high_word,
                                 ir_relation_equal);
@@ -1091,11 +1348,9 @@ static void lower_Cond(ir_node *node, ir_mode *mode, lower_env_t *env)
                mark_irn_visited(proj);
                exchange(projT, proj);
        } else if (relation == ir_relation_less_greater) {
+               ir_node *proj;
                /* simple case:a != b <==> a_h != b_h || a_l != b_l */
-               pmap_entry *entry = pmap_find(env->proj_2_block, projT);
-
-               assert(entry);
-               dst_blk = (ir_node*)entry->value;
+               dst_blk = get_cfop_destination(projT);
 
                irn = new_rd_Cmp(dbg, block, lentry->high_word, rentry->high_word,
                                 ir_relation_less_greater);
@@ -1125,17 +1380,13 @@ static void lower_Cond(ir_node *node, ir_mode *mode, lower_env_t *env)
                mark_irn_visited(proj);
                exchange(projF, proj);
        } else {
+               ir_node *proj;
                /* a rel b <==> a_h REL b_h || (a_h == b_h && a_l rel b_l) */
                ir_node *dstT, *dstF, *newbl_eq, *newbl_l;
-               pmap_entry *entry;
+               ir_node *projEqF;
 
-               entry = pmap_find(env->proj_2_block, projT);
-               assert(entry);
-               dstT = (ir_node*)entry->value;
-
-               entry = pmap_find(env->proj_2_block, projF);
-               assert(entry);
-               dstF = (ir_node*)entry->value;
+               dstT = get_cfop_destination(projT);
+               dstF = get_cfop_destination(projF);
 
                irn = new_rd_Cmp(dbg, block, lentry->high_word, rentry->high_word,
                                 relation & ~ir_relation_equal);
@@ -1144,8 +1395,6 @@ static void lower_Cond(ir_node *node, ir_mode *mode, lower_env_t *env)
 
                projHT = new_r_Proj(irn, mode_X, pn_Cond_true);
                mark_irn_visited(projHT);
-               exchange(projT, projHT);
-               projT = projHT;
 
                projHF = new_r_Proj(irn, mode_X, pn_Cond_false);
                mark_irn_visited(projHF);
@@ -1156,10 +1405,8 @@ static void lower_Cond(ir_node *node, ir_mode *mode, lower_env_t *env)
                                 ir_relation_equal);
                irn = new_rd_Cond(dbg, newbl_eq, irn);
 
-               proj = new_r_Proj(irn, mode_X, pn_Cond_false);
-               mark_irn_visited(proj);
-               exchange(projF, proj);
-               projF = proj;
+               projEqF = new_r_Proj(irn, mode_X, pn_Cond_false);
+               mark_irn_visited(projEqF);
 
                proj = new_r_Proj(irn, mode_X, pn_Cond_true);
                mark_irn_visited(proj);
@@ -1179,6 +1426,9 @@ static void lower_Cond(ir_node *node, ir_mode *mode, lower_env_t *env)
                proj = new_r_Proj(irn, mode_X, pn_Cond_false);
                mark_irn_visited(proj);
                add_block_cf_input(dstF, projF, proj);
+
+               exchange(projT, projHT);
+               exchange(projF, projEqF);
        }
 
        /* we have changed the control flow */
@@ -1188,7 +1438,7 @@ static void lower_Cond(ir_node *node, ir_mode *mode, lower_env_t *env)
 /**
  * Translate a Conv to higher_signed
  */
-static void lower_Conv_to_Ll(ir_node *node, lower_env_t *env)
+static void lower_Conv_to_Ll(ir_node *node)
 {
        ir_mode  *omode        = get_irn_mode(node);
        ir_node  *op           = get_Conv_op(node);
@@ -1206,7 +1456,7 @@ static void lower_Conv_to_Ll(ir_node *node, lower_env_t *env)
        if (mode_is_int(imode) || mode_is_reference(imode)) {
                if (imode == env->high_signed || imode == env->high_unsigned) {
                        /* a Conv from Lu to Ls or Ls to Lu */
-                       const node_entry_t *op_entry = get_node_entry(env, op);
+                       const lower64_entry_t *op_entry = get_node_entry(op);
                        res_low  = op_entry->low_word;
                        res_high = new_rd_Conv(dbg, block, op_entry->high_word, low_signed);
                } else {
@@ -1231,30 +1481,35 @@ static void lower_Conv_to_Ll(ir_node *node, lower_env_t *env)
                res_high = new_r_Const(irg, get_mode_null(low_signed));
        } else {
                ir_node *irn, *call;
-               ir_type *mtp = get_conv_type(imode, omode, env);
+               ir_type *mtp = get_conv_type(imode, omode);
 
-               irn = get_intrinsic_address(mtp, get_irn_op(node), imode, omode, env);
+               irn = get_intrinsic_address(mtp, get_irn_op(node), imode, omode);
                call = new_rd_Call(dbg, block, get_irg_no_mem(irg), irn, 1, &op, mtp);
                set_irn_pinned(call, get_irn_pinned(node));
                irn = new_r_Proj(call, mode_T, pn_Call_T_result);
 
-               res_low  = new_r_Proj(irn, low_unsigned, 0);
-               res_high = new_r_Proj(irn, low_signed, 1);
+               if (env->params->little_endian) {
+                       res_low  = new_r_Proj(irn, low_unsigned, 0);
+                       res_high = new_r_Proj(irn, low_signed, 1);
+               } else {
+                       res_low  = new_r_Proj(irn, low_unsigned, 1);
+                       res_high = new_r_Proj(irn, low_signed,   0);
+               }
        }
-       set_lowered(env, node, res_low, res_high);
+       ir_set_dw_lowered(node, res_low, res_high);
 }
 
 /**
  * Translate a Conv from higher_unsigned
  */
-static void lower_Conv_from_Ll(ir_node *node, lower_env_t *env)
+static void lower_Conv_from_Ll(ir_node *node)
 {
-       ir_node            *op    = get_Conv_op(node);
-       ir_mode            *omode = get_irn_mode(node);
-       ir_node            *block = get_nodes_block(node);
-       dbg_info           *dbg   = get_irn_dbg_info(node);
-       ir_graph           *irg   = get_irn_irg(node);
-       const node_entry_t *entry = get_node_entry(env, op);
+       ir_node               *op    = get_Conv_op(node);
+       ir_mode               *omode = get_irn_mode(node);
+       ir_node               *block = get_nodes_block(node);
+       dbg_info              *dbg   = get_irn_dbg_info(node);
+       ir_graph              *irg   = get_irn_irg(node);
+       const lower64_entry_t *entry = get_node_entry(op);
 
        if (mode_is_int(omode) || mode_is_reference(omode)) {
                op = entry->low_word;
@@ -1273,71 +1528,92 @@ static void lower_Conv_from_Ll(ir_node *node, lower_env_t *env)
        } else {
                ir_node *irn, *call, *in[2];
                ir_mode *imode = get_irn_mode(op);
-               ir_type *mtp   = get_conv_type(imode, omode, env);
+               ir_type *mtp   = get_conv_type(imode, omode);
+               ir_node *res;
 
-               irn   = get_intrinsic_address(mtp, get_irn_op(node), imode, omode, env);
-               in[0] = entry->low_word;
-               in[1] = entry->high_word;
+               irn   = get_intrinsic_address(mtp, get_irn_op(node), imode, omode);
+               if (env->params->little_endian) {
+                       in[0] = entry->low_word;
+                       in[1] = entry->high_word;
+               } else {
+                       in[0] = entry->high_word;
+                       in[1] = entry->low_word;
+               }
 
                call = new_rd_Call(dbg, block, get_irg_no_mem(irg), irn, 2, in, mtp);
                set_irn_pinned(call, get_irn_pinned(node));
                irn = new_r_Proj(call, mode_T, pn_Call_T_result);
+               res = new_r_Proj(irn, omode, 0);
 
-               exchange(node, new_r_Proj(irn, omode, 0));
+               exchange(node, res);
        }
 }
 
 /**
  * lower Cmp
  */
-static void lower_Cmp(ir_node *cmp, ir_mode *m, lower_env_t *env)
+static void lower_Cmp(ir_node *cmp, ir_mode *m)
 {
-       ir_node  *l    = get_Cmp_left(cmp);
-       ir_mode  *mode = get_irn_mode(l);
+       ir_node  *l        = get_Cmp_left(cmp);
+       ir_mode  *cmp_mode = get_irn_mode(l);
        ir_node  *r, *low, *high, *t, *res;
        ir_relation relation;
-       ir_node  *blk;
-       dbg_info *db;
-       const node_entry_t *lentry;
-       const node_entry_t *rentry;
+       ir_node  *block;
+       dbg_info *dbg;
+       const lower64_entry_t *lentry;
+       const lower64_entry_t *rentry;
        (void) m;
 
-       if (mode != env->high_signed && mode != env->high_unsigned) {
+       if (cmp_mode != env->high_signed && cmp_mode != env->high_unsigned)
                return;
-       }
 
        r        = get_Cmp_right(cmp);
-       lentry   = get_node_entry(env, l);
-       rentry   = get_node_entry(env, r);
+       lentry   = get_node_entry(l);
+       rentry   = get_node_entry(r);
        relation = get_Cmp_relation(cmp);
-       blk      = get_nodes_block(cmp);
-       db       = get_irn_dbg_info(cmp);
+       block    = get_nodes_block(cmp);
+       dbg      = get_irn_dbg_info(cmp);
+
+       /* easy case for x ==/!= 0 (see lower_Cond for details) */
+       if (is_equality_cmp(cmp)) {
+               ir_graph *irg        = get_irn_irg(cmp);
+               ir_mode  *mode       = env->low_unsigned;
+               ir_node  *low_left   = new_rd_Conv(dbg, block, lentry->low_word, mode);
+               ir_node  *high_left  = new_rd_Conv(dbg, block, lentry->high_word, mode);
+               ir_node  *low_right  = new_rd_Conv(dbg, block, rentry->low_word, mode);
+               ir_node  *high_right = new_rd_Conv(dbg, block, rentry->high_word, mode);
+               ir_node  *xor_low    = new_rd_Eor(dbg, block, low_left, low_right, mode);
+               ir_node  *xor_high   = new_rd_Eor(dbg, block, high_left, high_right, mode);
+               ir_node  *ornode     = new_rd_Or(dbg, block, xor_low, xor_high, mode);
+               ir_node  *new_cmp    = new_rd_Cmp(dbg, block, ornode, new_r_Const(irg, get_mode_null(mode)), relation);
+               exchange(cmp, new_cmp);
+               return;
+       }
 
        if (relation == ir_relation_equal) {
                /* simple case:a == b <==> a_h == b_h && a_l == b_l */
-               low  = new_rd_Cmp(db, blk, lentry->low_word, rentry->low_word,
+               low  = new_rd_Cmp(dbg, block, lentry->low_word, rentry->low_word,
                                  relation);
-               high = new_rd_Cmp(db, blk, lentry->high_word, rentry->high_word,
+               high = new_rd_Cmp(dbg, block, lentry->high_word, rentry->high_word,
                                  relation);
-               res  = new_rd_And(db, blk, low, high, mode_b);
+               res  = new_rd_And(dbg, block, low, high, mode_b);
        } else if (relation == ir_relation_less_greater) {
                /* simple case:a != b <==> a_h != b_h || a_l != b_l */
-               low  = new_rd_Cmp(db, blk, lentry->low_word, rentry->low_word,
+               low  = new_rd_Cmp(dbg, block, lentry->low_word, rentry->low_word,
                                  relation);
-               high = new_rd_Cmp(db, blk, lentry->high_word, rentry->high_word,
+               high = new_rd_Cmp(dbg, block, lentry->high_word, rentry->high_word,
                                  relation);
-               res = new_rd_Or(db, blk, low, high, mode_b);
+               res = new_rd_Or(dbg, block, low, high, mode_b);
        } else {
-               low  = new_rd_Cmp(db, blk, lentry->low_word, rentry->low_word,
+               /* a rel b <==> a_h REL b_h || (a_h == b_h && a_l rel b_l) */
+               ir_node *high1 = new_rd_Cmp(dbg, block, lentry->high_word,
+                       rentry->high_word, relation & ~ir_relation_equal);
+               low  = new_rd_Cmp(dbg, block, lentry->low_word, rentry->low_word,
                                  relation);
-               high = new_rd_Cmp(db, blk, lentry->high_word, rentry->high_word,
+               high = new_rd_Cmp(dbg, block, lentry->high_word, rentry->high_word,
                                  ir_relation_equal);
-
-               /* a rel b <==> a_h REL b_h || (a_h == b_h && a_l rel b_l) */
-               ir_node *high1 = new_rd_Cmp(db, blk, lentry->high_word,
-                                           rentry->high_word, relation & ~ir_relation_equal);
-               t = new_rd_And(db, blk, low, high, mode_b);
-               res = new_rd_Or(db, blk, high1, t, mode_b);
+               t = new_rd_And(dbg, block, low, high, mode_b);
+               res = new_rd_Or(dbg, block, high1, t, mode_b);
        }
        exchange(cmp, res);
 }
@@ -1345,40 +1621,73 @@ static void lower_Cmp(ir_node *cmp, ir_mode *m, lower_env_t *env)
 /**
  * Translate a Conv.
  */
-static void lower_Conv(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Conv(ir_node *node, ir_mode *mode)
 {
        mode = get_irn_mode(node);
 
        if (mode == env->high_signed || mode == env->high_unsigned) {
-               lower_Conv_to_Ll(node, env);
+               lower_Conv_to_Ll(node);
        } else {
                ir_mode *op_mode = get_irn_mode(get_Conv_op(node));
 
-               if (op_mode == env->high_signed || op_mode == env->high_unsigned) {
-                       lower_Conv_from_Ll(node, env);
+               if (op_mode == env->high_signed || op_mode == env->high_unsigned) {
+                       lower_Conv_from_Ll(node);
                }
        }
 }
 
-/**
- * Remember the new argument index of this value type entity in the lowered
- * method type.
- *
- * @param ent  the entity
- * @param pos  the argument index of this entity
- */
-static inline void set_entity_arg_idx(ir_entity *ent, int pos)
+static void fix_parameter_entities(ir_graph *irg, ir_type *orig_mtp)
 {
-       set_entity_link(ent, INT_TO_PTR(pos));
-}
+       size_t      orig_n_params      = get_method_n_params(orig_mtp);
+       ir_entity **parameter_entities;
 
-/**
- * Retrieve the argument index of a value type entity.
- *
- * @param ent  the entity
- */
-static int get_entity_arg_idx(const ir_entity *ent) {
-       return (int)PTR_TO_INT(get_entity_link(ent));
+       parameter_entities = ALLOCANZ(ir_entity*, orig_n_params);
+
+       ir_type *frame_type = get_irg_frame_type(irg);
+       size_t   n          = get_compound_n_members(frame_type);
+       size_t   i;
+       size_t   n_param;
+
+       /* collect parameter entities */
+       for (i = 0; i < n; ++i) {
+               ir_entity *entity = get_compound_member(frame_type, i);
+               size_t     p;
+               if (!is_parameter_entity(entity))
+                       continue;
+               p = get_entity_parameter_number(entity);
+               if (p == IR_VA_START_PARAMETER_NUMBER)
+                       continue;
+               assert(p < orig_n_params);
+               assert(parameter_entities[p] == NULL);
+               parameter_entities[p] = entity;
+       }
+
+       /* adjust indices */
+       n_param = 0;
+       for (i = 0; i < orig_n_params; ++i, ++n_param) {
+               ir_entity *entity = parameter_entities[i];
+               ir_type   *tp;
+
+               if (entity != NULL)
+                       set_entity_parameter_number(entity, n_param);
+
+               tp = get_method_param_type(orig_mtp, i);
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
+                       if (mode == env->high_signed || mode == env->high_unsigned) {
+                               ++n_param;
+                               /* note that we do not change the type of the parameter
+                                * entities, as calling convention fixup later still needs to
+                                * know which is/was a lowered doubleword.
+                                * So we just mark/remember it for later */
+                               if (entity != NULL) {
+                                       assert(entity->attr.parameter.doubleword_low_mode == NULL);
+                                       entity->attr.parameter.doubleword_low_mode
+                                               = env->low_unsigned;
+                               }
+                       }
+               }
+       }
 }
 
 /**
@@ -1389,322 +1698,310 @@ static int get_entity_arg_idx(const ir_entity *ent) {
  *
  * @return the lowered type
  */
-static ir_type *lower_mtp(lower_env_t *env, ir_type *mtp)
+static ir_type *lower_mtp(ir_type *mtp)
 {
-       pmap_entry *entry;
-       ident      *lid;
-       ir_type    *res, *value_type;
-
-       if (is_lowered_type(mtp))
+       ir_type *res;
+       size_t   i;
+       size_t   orig_n_params;
+       size_t   orig_n_res;
+       size_t   n_param;
+       size_t   n_res;
+       bool     must_be_lowered;
+
+       res = pmap_get(ir_type, lowered_type, mtp);
+       if (res != NULL)
+               return res;
+       if (type_visited(mtp))
                return mtp;
+       mark_type_visited(mtp);
 
-       entry = pmap_find(lowered_type, mtp);
-       if (! entry) {
-               int i, orig_n_params, orig_n_res, n_param, n_res;
+       orig_n_params   = get_method_n_params(mtp);
+       orig_n_res      = get_method_n_ress(mtp);
+       n_param         = orig_n_params;
+       n_res           = orig_n_res;
+       must_be_lowered = false;
 
-               /* count new number of params */
-               n_param = orig_n_params = get_method_n_params(mtp);
-               for (i = orig_n_params - 1; i >= 0; --i) {
-                       ir_type *tp = get_method_param_type(mtp, i);
+       /* count new number of params */
+       for (i = orig_n_params; i > 0;) {
+               ir_type *tp = get_method_param_type(mtp, --i);
 
-                       if (is_Primitive_type(tp)) {
-                               ir_mode *mode = get_type_mode(tp);
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
 
-                               if (mode == env->high_signed ||
-                                       mode == env->high_unsigned)
-                                       ++n_param;
+                       if (mode == env->high_signed || mode == env->high_unsigned) {
+                               ++n_param;
+                               must_be_lowered = true;
                        }
                }
+       }
 
-               /* count new number of results */
-               n_res = orig_n_res = get_method_n_ress(mtp);
-               for (i = orig_n_res - 1; i >= 0; --i) {
-                       ir_type *tp = get_method_res_type(mtp, i);
+       /* count new number of results */
+       for (i = orig_n_res; i > 0;) {
+               ir_type *tp = get_method_res_type(mtp, --i);
 
-                       if (is_Primitive_type(tp)) {
-                               ir_mode *mode = get_type_mode(tp);
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
 
-                               if (mode == env->high_signed ||
-                                       mode == env->high_unsigned)
-                                       ++n_res;
+                       if (mode == env->high_signed || mode == env->high_unsigned) {
+                               ++n_res;
+                               must_be_lowered = true;
                        }
                }
+       }
+       if (!must_be_lowered) {
+               set_type_link(mtp, NULL);
+               return mtp;
+       }
 
-               res = new_type_method(n_param, n_res);
+       res = new_d_type_method(n_param, n_res, get_type_dbg_info(mtp));
 
-               /* set param types and result types */
-               for (i = n_param = 0; i < orig_n_params; ++i) {
-                       ir_type *tp = get_method_param_type(mtp, i);
+       /* set param types and result types */
+       for (i = n_param = 0; i < orig_n_params; ++i) {
+               ir_type *tp = get_method_param_type(mtp, i);
 
-                       if (is_Primitive_type(tp)) {
-                               ir_mode *mode = get_type_mode(tp);
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
 
-                               if (mode == env->high_signed) {
+                       if (mode == env->high_signed) {
+                               if (env->params->little_endian) {
                                        set_method_param_type(res, n_param++, tp_u);
                                        set_method_param_type(res, n_param++, tp_s);
-                               } else if (mode == env->high_unsigned) {
-                                       set_method_param_type(res, n_param++, tp_u);
-                                       set_method_param_type(res, n_param++, tp_u);
                                } else {
-                                       set_method_param_type(res, n_param++, tp);
+                                       set_method_param_type(res, n_param++, tp_s);
+                                       set_method_param_type(res, n_param++, tp_u);
                                }
+                       } else if (mode == env->high_unsigned) {
+                               set_method_param_type(res, n_param++, tp_u);
+                               set_method_param_type(res, n_param++, tp_u);
                        } else {
-                               set_method_param_type(res, n_param++, tp);
+                               set_method_param_type(res, n_param, tp);
+                               ++n_param;
                        }
+               } else {
+                       set_method_param_type(res, n_param, tp);
+                       ++n_param;
                }
-               for (i = n_res = 0; i < orig_n_res; ++i) {
-                       ir_type *tp = get_method_res_type(mtp, i);
+       }
+       for (i = n_res = 0; i < orig_n_res; ++i) {
+               ir_type *tp = get_method_res_type(mtp, i);
 
-                       if (is_Primitive_type(tp)) {
-                               ir_mode *mode = get_type_mode(tp);
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
 
-                               if (mode == env->high_signed) {
+                       if (mode == env->high_signed) {
+                               if (env->params->little_endian) {
                                        set_method_res_type(res, n_res++, tp_u);
                                        set_method_res_type(res, n_res++, tp_s);
-                               } else if (mode == env->high_unsigned) {
-                                       set_method_res_type(res, n_res++, tp_u);
-                                       set_method_res_type(res, n_res++, tp_u);
                                } else {
-                                       set_method_res_type(res, n_res++, tp);
+                                       set_method_res_type(res, n_res++, tp_s);
+                                       set_method_res_type(res, n_res++, tp_u);
                                }
+                       } else if (mode == env->high_unsigned) {
+                               set_method_res_type(res, n_res++, tp_u);
+                               set_method_res_type(res, n_res++, tp_u);
                        } else {
                                set_method_res_type(res, n_res++, tp);
                        }
+               } else {
+                       set_method_res_type(res, n_res++, tp);
                }
-               set_lowered_type(mtp, res);
-               pmap_insert(lowered_type, mtp, res);
-
-               value_type = get_method_value_param_type(mtp);
-               if (value_type != NULL) {
-                       /* this creates a new value parameter type */
-                       (void)get_method_value_param_ent(res, 0);
-
-                       /* set new param positions for all entities of the value type */
-                       for (i = n_param = 0; i < orig_n_params; ++i) {
-                               ir_type   *tp  = get_method_param_type(mtp, i);
-                               ident     *id  = get_method_param_ident(mtp, i);
-                               ir_entity *ent = get_method_value_param_ent(mtp, i);
-
-                               set_entity_arg_idx(ent, n_param);
-                               if (is_Primitive_type(tp)) {
-                                       ir_mode *mode = get_type_mode(tp);
-
-                                       if (mode == env->high_signed || mode == env->high_unsigned) {
-                                               if (id != NULL) {
-                                                       lid = id_mangle(id, env->first_id);
-                                                       set_method_param_ident(res, n_param, lid);
-                                                       set_entity_ident(get_method_value_param_ent(res, n_param), lid);
-                                                       lid = id_mangle(id, env->next_id);
-                                                       set_method_param_ident(res, n_param + 1, lid);
-                                                       set_entity_ident(get_method_value_param_ent(res, n_param + 1), lid);
-                                               }
-                                               n_param += 2;
-                                               continue;
-                                       }
-                               }
-                               if (id != NULL) {
-                                       set_method_param_ident(res, n_param, id);
-                                       set_entity_ident(get_method_value_param_ent(res, n_param), id);
-                               }
-                               ++n_param;
-                       }
-
-                       set_lowered_type(value_type, get_method_value_param_type(res));
-               }
-       } else {
-               res = (ir_type*)entry->value;
        }
+
+       set_method_variadicity(res, get_method_variadicity(mtp));
+       set_method_calling_convention(res, get_method_calling_convention(mtp));
+       set_method_additional_properties(res, get_method_additional_properties(mtp));
+
+       set_higher_type(res, mtp);
+       set_type_link(res, mtp);
+
+       mark_type_visited(res);
+       pmap_insert(lowered_type, mtp, res);
        return res;
 }
 
 /**
  * Translate a Return.
  */
-static void lower_Return(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Return(ir_node *node, ir_mode *mode)
 {
-       ir_graph  *irg = get_irn_irg(node);
-       ir_entity *ent = get_irg_entity(irg);
-       ir_type   *mtp = get_entity_type(ent);
        ir_node  **in;
-       int        i, j, n;
+       size_t     i, j, n;
        int        need_conv = 0;
        (void) mode;
 
        /* check if this return must be lowered */
        for (i = 0, n = get_Return_n_ress(node); i < n; ++i) {
-               ir_node *pred = get_Return_res(node, i);
-               ir_mode *mode = get_irn_op_mode(pred);
+               ir_node *pred  = get_Return_res(node, i);
+               ir_mode *rmode = get_irn_op_mode(pred);
 
-               if (mode == env->high_signed || mode == env->high_unsigned)
+               if (rmode == env->high_signed || rmode == env->high_unsigned)
                        need_conv = 1;
        }
        if (! need_conv)
                return;
 
-       ent = get_irg_entity(irg);
-       mtp = get_entity_type(ent);
-
-       mtp = lower_mtp(env, mtp);
-       set_entity_type(ent, mtp);
+       ir_graph  *irg = get_irn_irg(node);
+       ir_entity *ent = get_irg_entity(irg);
+       ir_type   *mtp = get_entity_type(ent);
 
        /* create a new in array */
        NEW_ARR_A(ir_node *, in, get_method_n_ress(mtp) + 1);
-       in[0] = get_Return_mem(node);
+       j = 0;
+       in[j++] = get_Return_mem(node);
 
-       for (j = i = 0, n = get_Return_n_ress(node); i < n; ++i) {
+       for (i = 0, n = get_Return_n_ress(node); i < n; ++i) {
                ir_node *pred      = get_Return_res(node, i);
                ir_mode *pred_mode = get_irn_mode(pred);
 
                if (pred_mode == env->high_signed || pred_mode == env->high_unsigned) {
-                       const node_entry_t *entry = get_node_entry(env, pred);
-                       in[++j] = entry->low_word;
-                       in[++j] = entry->high_word;
+                       const lower64_entry_t *entry = get_node_entry(pred);
+                       if (env->params->little_endian) {
+                               in[j++] = entry->low_word;
+                               in[j++] = entry->high_word;
+                       } else {
+                               in[j++] = entry->high_word;
+                               in[j++] = entry->low_word;
+                       }
                } else {
-                       in[++j] = pred;
+                       in[j++] = pred;
                }
        }
+       assert(j == get_method_n_ress(mtp)+1);
 
-       set_irn_in(node, j+1, in);
+       set_irn_in(node, j, in);
 }
 
 /**
  * Translate the parameters.
  */
-static void lower_Start(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Start(ir_node *node, ir_mode *high_mode)
 {
-       ir_graph  *irg = get_irn_irg(node);
-       ir_entity *ent = get_irg_entity(irg);
-       ir_type   *tp  = get_entity_type(ent);
-       ir_type   *mtp;
+       ir_graph  *irg      = get_irn_irg(node);
+       ir_entity *ent      = get_irg_entity(irg);
+       ir_type   *mtp      = get_entity_type(ent);
+       ir_type   *orig_mtp = (ir_type*)get_type_link(mtp);
+       ir_node   *args;
        long      *new_projs;
-       int       i, j, n_params, rem;
-       ir_node   *proj, *args;
-       (void) mode;
-
-       if (is_lowered_type(tp)) {
-               mtp = get_associated_type(tp);
-       } else {
-               mtp = tp;
-       }
-       assert(! is_lowered_type(mtp));
+       size_t    i, j, n_params;
+       (void) high_mode;
 
-       n_params = get_method_n_params(mtp);
-       if (n_params <= 0)
+       /* if type link is NULL then the type was not lowered, hence no changes
+        * at Start necessary */
+       if (orig_mtp == NULL)
                return;
 
+       n_params = get_method_n_params(orig_mtp);
+
        NEW_ARR_A(long, new_projs, n_params);
 
-       /* first check if we have parameters that must be fixed */
+       /* Calculate mapping of proj numbers in new_projs */
        for (i = j = 0; i < n_params; ++i, ++j) {
-               ir_type *tp = get_method_param_type(mtp, i);
+               ir_type *ptp = get_method_param_type(orig_mtp, i);
 
                new_projs[i] = j;
-               if (is_Primitive_type(tp)) {
-                       ir_mode *mode = get_type_mode(tp);
-
-                       if (mode == env->high_signed ||
-                               mode == env->high_unsigned)
+               if (is_Primitive_type(ptp)) {
+                       ir_mode *amode = get_type_mode(ptp);
+                       if (amode == env->high_signed || amode == env->high_unsigned)
                                ++j;
                }
        }
-       if (i == j)
+
+       /* find args Proj */
+       args = NULL;
+       foreach_out_edge(node, edge) {
+               ir_node *proj = get_edge_src_irn(edge);
+               if (!is_Proj(proj))
+                       continue;
+               if (get_Proj_proj(proj) == pn_Start_T_args) {
+                       args = proj;
+                       break;
+               }
+       }
+       if (args == NULL)
                return;
 
-       mtp = lower_mtp(env, mtp);
-       set_entity_type(ent, mtp);
-
-       /* switch off optimization for new Proj nodes or they might be CSE'ed
-          with not patched one's */
-       rem = get_optimize();
-       set_optimize(0);
-
-       /* ok, fix all Proj's and create new ones */
-       args = get_irg_args(irg);
-       for (proj = (ir_node*)get_irn_link(node); proj;
-            proj = (ir_node*)get_irn_link(proj)) {
-               ir_node *pred = get_Proj_pred(proj);
-               long proj_nr;
-               ir_mode *mode;
-               ir_mode *mode_l;
+       /* fix all Proj's and create new ones */
+       foreach_out_edge_safe(args, edge) {
+               ir_node *proj   = get_edge_src_irn(edge);
+               ir_mode *mode   = get_irn_mode(proj);
+               ir_mode *mode_l = env->low_unsigned;
+               ir_node *pred;
+               long     proj_nr;
                ir_mode *mode_h;
                ir_node *res_low;
                ir_node *res_high;
+               int      old_cse;
                dbg_info *dbg;
 
-               /* do not visit this node again */
-               mark_irn_visited(proj);
-
-               if (pred != args)
+               if (!is_Proj(proj))
                        continue;
-
+               pred    = get_Proj_pred(proj);
                proj_nr = get_Proj_proj(proj);
-               set_Proj_proj(proj, new_projs[proj_nr]);
 
-               mode = get_irn_mode(proj);
-               mode_l = env->low_unsigned;
                if (mode == env->high_signed) {
                        mode_h = env->low_signed;
                } else if (mode == env->high_unsigned) {
                        mode_h = env->low_unsigned;
                } else {
+                       long new_pn = new_projs[proj_nr];
+                       set_Proj_proj(proj, new_pn);
                        continue;
                }
 
+               /* Switch off CSE or we might get an already existing Proj. */
+               old_cse = get_opt_cse();
+               set_opt_cse(0);
                dbg = get_irn_dbg_info(proj);
-               res_low  = new_rd_Proj(dbg, args, mode_l, new_projs[proj_nr]);
-               res_high = new_rd_Proj(dbg, args, mode_h, new_projs[proj_nr] + 1);
-               set_lowered(env, proj, res_low, res_high);
+               if (env->params->little_endian) {
+                       res_low  = new_rd_Proj(dbg, pred, mode_l, new_projs[proj_nr]);
+                       res_high = new_rd_Proj(dbg, pred, mode_h, new_projs[proj_nr] + 1);
+               } else {
+                       res_high = new_rd_Proj(dbg, pred, mode_h, new_projs[proj_nr]);
+                       res_low  = new_rd_Proj(dbg, pred, mode_l, new_projs[proj_nr] + 1);
+               }
+               set_opt_cse(old_cse);
+               ir_set_dw_lowered(proj, res_low, res_high);
        }
-       set_optimize(rem);
 }
 
 /**
  * Translate a Call.
  */
-static void lower_Call(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Call(ir_node *node, ir_mode *mode)
 {
        ir_type  *tp = get_Call_type(node);
-       ir_type  *call_tp;
-       ir_node  **in, *proj, *results;
-       int      n_params, n_res;
+       ir_node  **in;
+       size_t   n_params, n_res;
        bool     need_lower = false;
-       int      i, j;
+       size_t   i, j;
+       size_t   p;
        long     *res_numbers = NULL;
+       ir_node  *resproj;
        (void) mode;
 
-       if (is_lowered_type(tp)) {
-               call_tp = get_associated_type(tp);
-       } else {
-               call_tp = tp;
-       }
-
-       assert(! is_lowered_type(call_tp));
-
-       n_params = get_method_n_params(call_tp);
-       for (i = 0; i < n_params; ++i) {
-               ir_type *tp = get_method_param_type(call_tp, i);
-
-               if (is_Primitive_type(tp)) {
-                       ir_mode *mode = get_type_mode(tp);
+       n_params = get_method_n_params(tp);
+       for (p = 0; p < n_params; ++p) {
+               ir_type *ptp = get_method_param_type(tp, p);
 
-                       if (mode == env->high_signed || mode == env->high_unsigned) {
+               if (is_Primitive_type(ptp)) {
+                       ir_mode *pmode = get_type_mode(ptp);
+                       if (pmode == env->high_signed || pmode == env->high_unsigned) {
                                need_lower = true;
                                break;
                        }
                }
        }
-       n_res = get_method_n_ress(call_tp);
+       n_res = get_method_n_ress(tp);
        if (n_res > 0) {
                NEW_ARR_A(long, res_numbers, n_res);
 
                for (i = j = 0; i < n_res; ++i, ++j) {
-                       ir_type *tp = get_method_res_type(call_tp, i);
+                       ir_type *ptp = get_method_res_type(tp, i);
 
                        res_numbers[i] = j;
-                       if (is_Primitive_type(tp)) {
-                               ir_mode *mode = get_type_mode(tp);
-
-                               if (mode == env->high_signed || mode == env->high_unsigned) {
+                       if (is_Primitive_type(ptp)) {
+                               ir_mode *rmode = get_type_mode(ptp);
+                               if (rmode == env->high_signed || rmode == env->high_unsigned) {
                                        need_lower = true;
                                        ++j;
                                }
@@ -1716,10 +2013,10 @@ static void lower_Call(ir_node *node, ir_mode *mode, lower_env_t *env)
                return;
 
        /* let's lower it */
-       call_tp = lower_mtp(env, call_tp);
-       set_Call_type(node, call_tp);
+       tp = lower_mtp(tp);
+       set_Call_type(node, tp);
 
-       NEW_ARR_A(ir_node *, in, get_method_n_params(call_tp) + 2);
+       NEW_ARR_A(ir_node *, in, get_method_n_params(tp) + 2);
 
        in[0] = get_Call_mem(node);
        in[1] = get_Call_ptr(node);
@@ -1729,9 +2026,14 @@ static void lower_Call(ir_node *node, ir_mode *mode, lower_env_t *env)
                ir_mode *pred_mode = get_irn_mode(pred);
 
                if (pred_mode == env->high_signed || pred_mode == env->high_unsigned) {
-                       const node_entry_t *pred_entry = get_node_entry(env, pred);
-                       in[j++] = pred_entry->low_word;
-                       in[j++] = pred_entry->high_word;
+                       const lower64_entry_t *pred_entry = get_node_entry(pred);
+                       if (env->params->little_endian) {
+                               in[j++] = pred_entry->low_word;
+                               in[j++] = pred_entry->high_word;
+                       } else {
+                               in[j++] = pred_entry->high_word;
+                               in[j++] = pred_entry->low_word;
+                       }
                } else {
                        in[j++] = pred;
                }
@@ -1739,70 +2041,81 @@ static void lower_Call(ir_node *node, ir_mode *mode, lower_env_t *env)
 
        set_irn_in(node, j, in);
 
-       /* fix the results */
-       results = NULL;
-       for (proj = (ir_node*)get_irn_link(node); proj;
-            proj = (ir_node*)get_irn_link(proj)) {
-               long proj_nr = get_Proj_proj(proj);
-
-               if (proj_nr == pn_Call_T_result && get_Proj_pred(proj) == node) {
-                       /* found the result proj */
-                       results = proj;
+       /* find results T */
+       resproj = NULL;
+       foreach_out_edge(node, edge) {
+               ir_node *proj = get_edge_src_irn(edge);
+               if (!is_Proj(proj))
+                       continue;
+               if (get_Proj_proj(proj) == pn_Call_T_result) {
+                       resproj = proj;
                        break;
                }
        }
+       if (resproj == NULL)
+               return;
 
-       if (results != NULL) {    /* there are results */
-               int rem = get_optimize();
-
-               /* switch off optimization for new Proj nodes or they might be CSE'ed
-                  with not patched one's */
-               set_optimize(0);
-               for (i = j = 0, proj = (ir_node*)get_irn_link(results); proj;
-                    proj = (ir_node*)get_irn_link(proj), ++i, ++j) {
-                       if (get_Proj_pred(proj) == results) {
-                               long      proj_nr   = get_Proj_proj(proj);
-                               ir_mode  *proj_mode = get_irn_mode(proj);
-                               ir_mode  *mode_l;
-                               ir_mode  *mode_h;
-                               ir_node  *res_low;
-                               ir_node  *res_high;
-                               dbg_info *dbg;
-
-                               /* found a result */
-                               mark_irn_visited(proj);
-
-                               set_Proj_proj(proj, res_numbers[proj_nr]);
-
-                               mode_l = env->low_unsigned;
-                               if (proj_mode == env->high_signed) {
-                                       mode_h = env->low_signed;
-                               } else if (proj_mode == env->high_unsigned) {
-                                       mode_h = env->low_unsigned;
-                               } else {
-                                       continue;
-                               }
+       /* fix the results */
+       foreach_out_edge_safe(resproj, edge) {
+               ir_node  *proj      = get_edge_src_irn(edge);
+               ir_mode  *proj_mode = get_irn_mode(proj);
+               ir_mode  *mode_l    = env->low_unsigned;
+               ir_node  *pred;
+               long      proj_nr;
+               ir_mode  *mode_h;
+               ir_node  *res_low;
+               ir_node  *res_high;
+               dbg_info *dbg;
 
-                               dbg      = get_irn_dbg_info(proj);
-                               res_low  = new_rd_Proj(dbg, results, mode_l, res_numbers[proj_nr]);
-                               res_high = new_rd_Proj(dbg, results, mode_h, res_numbers[proj_nr] + 1);
-                               set_lowered(env, proj, res_low, res_high);
-                       }
+               if (!is_Proj(proj))
+                       continue;
+               pred    = get_Proj_pred(proj);
+               proj_nr = get_Proj_proj(proj);
+
+               if (proj_mode == env->high_signed) {
+                       mode_h = env->low_signed;
+               } else if (proj_mode == env->high_unsigned) {
+                       mode_h = env->low_unsigned;
+               } else {
+                       long new_nr = res_numbers[proj_nr];
+                       set_Proj_proj(proj, new_nr);
+                       continue;
+               }
+
+               dbg = get_irn_dbg_info(proj);
+               if (env->params->little_endian) {
+                       res_low  = new_rd_Proj(dbg, pred, mode_l, res_numbers[proj_nr]);
+                       res_high = new_rd_Proj(dbg, pred, mode_h, res_numbers[proj_nr] + 1);
+               } else {
+                       res_high = new_rd_Proj(dbg, pred, mode_h, res_numbers[proj_nr]);
+                       res_low  = new_rd_Proj(dbg, pred, mode_l, res_numbers[proj_nr] + 1);
                }
-               set_optimize(rem);
+               ir_set_dw_lowered(proj, res_low, res_high);
        }
 }
 
 /**
  * Translate an Unknown into two.
  */
-static void lower_Unknown(ir_node *node, ir_mode *mode, lower_env_t *env)
+static void lower_Unknown(ir_node *node, ir_mode *mode)
 {
        ir_mode  *low_mode = env->low_unsigned;
        ir_graph *irg      = get_irn_irg(node);
        ir_node  *res_low  = new_r_Unknown(irg, low_mode);
        ir_node  *res_high = new_r_Unknown(irg, mode);
-       set_lowered(env, node, res_low, res_high);
+       ir_set_dw_lowered(node, res_low, res_high);
+}
+
+/**
+ * Translate a Bad into two.
+ */
+static void lower_Bad(ir_node *node, ir_mode *mode)
+{
+       ir_mode  *low_mode = env->low_unsigned;
+       ir_graph *irg      = get_irn_irg(node);
+       ir_node  *res_low  = new_r_Bad(irg, low_mode);
+       ir_node  *res_high = new_r_Bad(irg, mode);
+       ir_set_dw_lowered(node, res_low, res_high);
 }
 
 /**
@@ -1810,7 +2123,7 @@ static void lower_Unknown(ir_node *node, ir_mode *mode, lower_env_t *env)
  *
  * First step: just create two templates
  */
-static void lower_Phi(lower_env_t *env, ir_node *phi)
+static void lower_Phi(ir_node *phi)
 {
        ir_mode  *mode = get_irn_mode(phi);
        int       i;
@@ -1855,31 +2168,24 @@ static void lower_Phi(lower_env_t *env, ir_node *phi)
        phi_l = new_rd_Phi(dbg, block, arity, in_l, mode_l);
        phi_h = new_rd_Phi(dbg, block, arity, in_h, mode_h);
 
-       set_lowered(env, phi, phi_l, phi_h);
+       ir_set_dw_lowered(phi, phi_l, phi_h);
 
        /* remember that we need to fixup the predecessors later */
        ARR_APP1(ir_node*, env->lowered_phis, phi);
-
-       /* Don't forget to link the new Phi nodes into the block.
-        * Beware that some Phis might be optimized away. */
-       if (is_Phi(phi_l))
-               add_Block_phi(block, phi_l);
-       if (is_Phi(phi_h))
-               add_Block_phi(block, phi_h);
 }
 
-static void fixup_phi(lower_env_t *env, ir_node *phi)
+static void fixup_phi(ir_node *phi)
 {
-       const node_entry_t *entry = get_node_entry(env, phi);
-       ir_node            *phi_l = entry->low_word;
-       ir_node            *phi_h = entry->high_word;
-       int                 arity = get_Phi_n_preds(phi);
-       int                 i;
+       const lower64_entry_t *entry = get_node_entry(phi);
+       ir_node               *phi_l = entry->low_word;
+       ir_node               *phi_h = entry->high_word;
+       int                    arity = get_Phi_n_preds(phi);
+       int                    i;
 
        /* exchange phi predecessors which are lowered by now */
        for (i = 0; i < arity; ++i) {
-               ir_node            *pred       = get_Phi_pred(phi, i);
-               const node_entry_t *pred_entry = get_node_entry(env, pred);
+               ir_node               *pred       = get_Phi_pred(phi, i);
+               const lower64_entry_t *pred_entry = get_node_entry(pred);
 
                set_Phi_pred(phi_l, i, pred_entry->low_word);
                set_Phi_pred(phi_h, i, pred_entry->high_word);
@@ -1889,77 +2195,540 @@ static void fixup_phi(lower_env_t *env, ir_node *phi)
 /**
  * Translate a Mux.
  */
-static void lower_Mux(ir_node *mux, ir_mode *mode, lower_env_t *env)
-{
-       ir_node            *truen       = get_Mux_true(mux);
-       ir_node            *falsen      = get_Mux_false(mux);
-       ir_node            *sel         = get_Mux_sel(mux);
-       const node_entry_t *true_entry  = get_node_entry(env, truen);
-       const node_entry_t *false_entry = get_node_entry(env, falsen);
-       ir_node            *true_l      = true_entry->low_word;
-       ir_node            *true_h      = true_entry->high_word;
-       ir_node            *false_l     = false_entry->low_word;
-       ir_node            *false_h     = false_entry->high_word;
-       dbg_info           *dbgi        = get_irn_dbg_info(mux);
-       ir_node            *block       = get_nodes_block(mux);
-       ir_node            *res_low
+static void lower_Mux(ir_node *mux, ir_mode *mode)
+{
+       ir_node               *truen       = get_Mux_true(mux);
+       ir_node               *falsen      = get_Mux_false(mux);
+       ir_node               *sel         = get_Mux_sel(mux);
+       const lower64_entry_t *true_entry  = get_node_entry(truen);
+       const lower64_entry_t *false_entry = get_node_entry(falsen);
+       ir_node               *true_l      = true_entry->low_word;
+       ir_node               *true_h      = true_entry->high_word;
+       ir_node               *false_l     = false_entry->low_word;
+       ir_node               *false_h     = false_entry->high_word;
+       dbg_info              *dbgi        = get_irn_dbg_info(mux);
+       ir_node               *block       = get_nodes_block(mux);
+       ir_node               *res_low
                = new_rd_Mux(dbgi, block, sel, false_l, true_l, env->low_unsigned);
-       ir_node            *res_high
+       ir_node               *res_high
                = new_rd_Mux(dbgi, block, sel, false_h, true_h, mode);
-       set_lowered(env, mux, res_low, res_high);
+       ir_set_dw_lowered(mux, res_low, res_high);
 }
 
 /**
  * Translate an ASM node.
  */
-static void lower_ASM(ir_node *asmn, ir_mode *mode, lower_env_t *env)
+static void lower_ASM(ir_node *asmn, ir_mode *mode)
 {
-       ir_mode *his = env->high_signed;
-       ir_mode *hiu = env->high_unsigned;
-       int      i;
-       ir_node *n;
+       ir_mode           *high_signed        = env->high_signed;
+       ir_mode           *high_unsigned      = env->high_unsigned;
+       int                n_outs             = get_ASM_n_output_constraints(asmn);
+       ir_asm_constraint *output_constraints = get_ASM_output_constraints(asmn);
+       ir_asm_constraint *input_constraints  = get_ASM_input_constraints(asmn);
+       unsigned           n_64bit_outs       = 0;
 
        (void)mode;
 
-       for (i = get_irn_arity(asmn) - 1; i >= 0; --i) {
-               ir_mode *op_mode = get_irn_mode(get_irn_n(asmn, i));
-               if (op_mode == his || op_mode == hiu) {
-                       panic("lowering ASM unimplemented");
+       for (int i = get_irn_arity(asmn) - 1; i >= 0; --i) {
+               ir_node *op      = get_irn_n(asmn, i);
+               ir_mode *op_mode = get_irn_mode(op);
+               if (op_mode == high_signed || op_mode == high_unsigned) {
+                       panic("lowering ASM 64bit input unimplemented");
                }
        }
 
-       for (n = asmn;;) {
-               ir_mode *proj_mode;
+       for (int o = 0; o < n_outs; ++o) {
+               const ir_asm_constraint *constraint = &output_constraints[o];
+               if (constraint->mode == high_signed || constraint->mode == high_unsigned) {
+                       const char *constr = get_id_str(constraint->constraint);
+                       ++n_64bit_outs;
+                       /* TODO: How to do this architecture neutral? This is very
+                        * i386 specific... */
+                       if (constr[0] != '=' || constr[1] != 'A') {
+                               panic("lowering ASM 64bit output only supports '=A' currently");
+                       }
+               }
+       }
 
-               n = (ir_node*)get_irn_link(n);
-               if (n == NULL)
-                       break;
+       if (n_64bit_outs == 0)
+               return;
+
+       dbg_info          *dbgi       = get_irn_dbg_info(asmn);
+       ir_node           *block      = get_nodes_block(asmn);
+       ir_node           *mem        = get_ASM_mem(asmn);
+       int                new_n_outs = 0;
+       int                n_clobber  = get_ASM_n_clobbers(asmn);
+       long              *proj_map   = ALLOCAN(long, n_outs);
+       ident            **clobbers   = get_ASM_clobbers(asmn);
+       ident             *asm_text   = get_ASM_text(asmn);
+       ir_asm_constraint *new_outputs
+               = ALLOCAN(ir_asm_constraint, n_outs+n_64bit_outs);
+       ir_node           *new_asm;
+
+       for (int o = 0; o < n_outs; ++o) {
+               const ir_asm_constraint *constraint = &output_constraints[o];
+               if (constraint->mode == high_signed || constraint->mode == high_unsigned) {
+                       new_outputs[new_n_outs].pos        = constraint->pos;
+                       new_outputs[new_n_outs].constraint = new_id_from_str("=a");
+                       new_outputs[new_n_outs].mode       = env->low_unsigned;
+                       proj_map[o] = new_n_outs;
+                       ++new_n_outs;
+                       new_outputs[new_n_outs].pos        = constraint->pos;
+                       new_outputs[new_n_outs].constraint = new_id_from_str("=d");
+                       if (constraint->mode == high_signed)
+                               new_outputs[new_n_outs].mode = env->low_signed;
+                       else
+                               new_outputs[new_n_outs].mode = env->low_unsigned;
+                       ++new_n_outs;
+               } else {
+                       new_outputs[new_n_outs] = *constraint;
+                       proj_map[o] = new_n_outs;
+                       ++new_n_outs;
+               }
+       }
+       assert(new_n_outs == n_outs+(int)n_64bit_outs);
+
+       int       n_inputs = get_ASM_n_inputs(asmn);
+       ir_node **new_ins  = ALLOCAN(ir_node*, n_inputs);
+       for (int i = 0; i < n_inputs; ++i)
+               new_ins[i] = get_ASM_input(asmn, i);
+
+       new_asm = new_rd_ASM(dbgi, block, mem, n_inputs, new_ins, input_constraints,
+                                                new_n_outs, new_outputs, n_clobber, clobbers,
+                                                asm_text);
+
+       foreach_out_edge_safe(asmn, edge) {
+               ir_node *proj      = get_edge_src_irn(edge);
+               ir_mode *proj_mode = get_irn_mode(proj);
+               long     pn;
 
-               proj_mode = get_irn_mode(n);
-               if (proj_mode == his || proj_mode == hiu) {
-                       panic("lowering ASM unimplemented");
+               if (!is_Proj(proj))
+                       continue;
+               pn = get_Proj_proj(proj);
+
+               if (pn < n_outs)
+                       pn = proj_map[pn];
+               else
+                       pn = new_n_outs + pn - n_outs;
+
+               if (proj_mode == high_signed || proj_mode == high_unsigned) {
+                       ir_mode *high_mode
+                               = proj_mode == high_signed ? env->low_signed : env->low_unsigned;
+                       ir_node *np_low  = new_r_Proj(new_asm, env->low_unsigned, pn);
+                       ir_node *np_high = new_r_Proj(new_asm, high_mode, pn+1);
+                       ir_set_dw_lowered(proj, np_low, np_high);
+               } else {
+                       ir_node *np = new_r_Proj(new_asm, proj_mode, pn);
+                       exchange(proj, np);
                }
        }
 }
 
 /**
- * Translate a Sel node.
+ * Lower the builtin type to its higher part.
+ *
+ * @param mtp  the builtin type to lower
+ *
+ * @return the lowered type
  */
-static void lower_Sel(ir_node *sel, ir_mode *mode, lower_env_t *env)
+static ir_type *lower_Builtin_type_high(ir_type *mtp)
 {
+       ir_type *res;
+       size_t   i;
+       size_t   n_params;
+       size_t   n_results;
+       bool     must_be_lowered;
+
+       res = pmap_get(ir_type, lowered_builtin_type_high, mtp);
+       if (res != NULL)
+               return res;
+
+       n_params        = get_method_n_params(mtp);
+       n_results       = get_method_n_ress(mtp);
+       must_be_lowered = false;
+
+       /* check for double word parameter */
+       for (i = n_params; i > 0;) {
+               ir_type *tp = get_method_param_type(mtp, --i);
+
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
+
+                       if (mode == env->high_signed || mode == env->high_unsigned) {
+                               must_be_lowered = true;
+                               break;
+                       }
+               }
+       }
+
+       if (!must_be_lowered) {
+               set_type_link(mtp, NULL);
+               return mtp;
+       }
+
+       res = new_d_type_method(n_params, n_results, get_type_dbg_info(mtp));
+
+       /* set param types and result types */
+       for (i = 0; i < n_params; ++i) {
+               ir_type *tp = get_method_param_type(mtp, i);
+
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
+
+                       if (mode == env->high_signed) {
+                               if (env->params->little_endian) {
+                                       set_method_param_type(res, i, tp_u);
+                               } else {
+                                       set_method_param_type(res, i, tp_s);
+                               }
+                       } else if (mode == env->high_unsigned) {
+                               set_method_param_type(res, i, tp_u);
+                       } else {
+                               set_method_param_type(res, i, tp);
+                       }
+               } else {
+                       set_method_param_type(res, i, tp);
+               }
+       }
+       for (i = n_results = 0; i < n_results; ++i) {
+               ir_type *tp = get_method_res_type(mtp, i);
+
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
+
+                       if (mode == env->high_signed) {
+                               if (env->params->little_endian) {
+                                       set_method_res_type(res, i, tp_u);
+                               } else {
+                                       set_method_res_type(res, i, tp_s);
+                               }
+                       } else if (mode == env->high_unsigned) {
+                               set_method_res_type(res, i, tp_u);
+                       } else {
+                               set_method_res_type(res, i, tp);
+                       }
+               } else {
+                       set_method_res_type(res, i, tp);
+               }
+       }
+
+       set_method_variadicity(res, get_method_variadicity(mtp));
+       set_method_calling_convention(res, get_method_calling_convention(mtp));
+       set_method_additional_properties(res, get_method_additional_properties(mtp));
+
+       pmap_insert(lowered_builtin_type_high, mtp, res);
+       return res;
+}
+
+/**
+ * Lower the builtin type to its lower part.
+ *
+ * @param mtp  the builtin type to lower
+ *
+ * @return the lowered type
+ */
+static ir_type *lower_Builtin_type_low(ir_type *mtp)
+{
+       ir_type *res;
+       size_t   i;
+       size_t   n_params;
+       size_t   n_results;
+       bool     must_be_lowered;
+
+       res = pmap_get(ir_type, lowered_builtin_type_low, mtp);
+       if (res != NULL)
+               return res;
+
+       n_params        = get_method_n_params(mtp);
+       n_results       = get_method_n_ress(mtp);
+       must_be_lowered = false;
+
+       /* check for double word parameter */
+       for (i = n_params; i > 0;) {
+               ir_type *tp = get_method_param_type(mtp, --i);
+
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
+
+                       if (mode == env->high_signed || mode == env->high_unsigned) {
+                               must_be_lowered = true;
+                               break;
+                       }
+               }
+       }
+
+       if (!must_be_lowered) {
+               set_type_link(mtp, NULL);
+               return mtp;
+       }
+
+       res = new_d_type_method(n_params, n_results, get_type_dbg_info(mtp));
+
+       /* set param types and result types */
+       for (i = 0; i < n_params; ++i) {
+               ir_type *tp = get_method_param_type(mtp, i);
+
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
+
+                       if (mode == env->high_signed) {
+                               if (env->params->little_endian) {
+                                       set_method_param_type(res, i, tp_s);
+                               } else {
+                                       set_method_param_type(res, i, tp_u);
+                               }
+                       } else if (mode == env->high_unsigned) {
+                               set_method_param_type(res, i, tp_u);
+                       } else {
+                               set_method_param_type(res, i, tp);
+                       }
+               } else {
+                       set_method_param_type(res, i, tp);
+               }
+       }
+       for (i = 0; i < n_results; ++i) {
+               ir_type *tp = get_method_res_type(mtp, i);
+
+               if (is_Primitive_type(tp)) {
+                       ir_mode *mode = get_type_mode(tp);
+
+                       if (mode == env->high_signed) {
+                               if (env->params->little_endian) {
+                                       set_method_res_type(res, i, tp_s);
+                               } else {
+                                       set_method_res_type(res, i, tp_u);
+                               }
+                       } else if (mode == env->high_unsigned) {
+                               set_method_res_type(res, i, tp_u);
+                       } else {
+                               set_method_res_type(res, i, tp);
+                       }
+               } else {
+                       set_method_res_type(res, i, tp);
+               }
+       }
+
+       set_method_variadicity(res, get_method_variadicity(mtp));
+       set_method_calling_convention(res, get_method_calling_convention(mtp));
+       set_method_additional_properties(res, get_method_additional_properties(mtp));
+
+       pmap_insert(lowered_builtin_type_low, mtp, res);
+       return res;
+}
+
+/**
+ * lowers a builtin which reduces a 64bit value to a simple summary value
+ * (popcount, ffs, ...)
+ */
+static void lower_reduce_builtin(ir_node *builtin, ir_mode *mode)
+{
+       ir_builtin_kind  kind         = get_Builtin_kind(builtin);
+       ir_node         *operand      = get_Builtin_param(builtin, 0);
+       ir_mode         *operand_mode = get_irn_mode(operand);
+       if (operand_mode != env->high_signed && operand_mode != env->high_unsigned)
+               return;
+
+       {
+       arch_allow_ifconv_func  allow_ifconv      = be_get_backend_param()->allow_ifconv;
+       int                     arity             = get_irn_arity(builtin);
+       dbg_info               *dbgi              = get_irn_dbg_info(builtin);
+       ir_graph               *irg               = get_irn_irg(builtin);
+       ir_type                *type              = get_Builtin_type(builtin);
+       ir_type                *lowered_type_high = lower_Builtin_type_high(type);
+       ir_type                *lowered_type_low  = lower_Builtin_type_low(type);
+       ir_type                *result_type       = get_method_res_type(lowered_type_low, 0);
+       ir_mode                *result_mode       = get_type_mode(result_type);
+       ir_node                *block             = get_nodes_block(builtin);
+       ir_node                *mem               = get_Builtin_mem(builtin);
+       const lower64_entry_t  *entry             = get_node_entry(operand);
+       ir_mode                *high_mode         = get_irn_mode(entry->high_word);
+       ir_node                *in_high[1]        = {entry->high_word};
+       ir_node                *in_low[1]         = {entry->low_word};
+       ir_node                *res;
+
+       assert(is_NoMem(mem));
+       assert(arity == 2);
+
+       switch (kind) {
+       case ir_bk_ffs: {
+               ir_node               *number_of_bits = new_r_Const_long(irg, result_mode, get_mode_size_bits(env->low_unsigned));
+               ir_node               *zero_high      = new_rd_Const(dbgi, irg, get_mode_null(high_mode));
+               ir_node               *zero_unsigned  = new_rd_Const(dbgi, irg, get_mode_null(env->low_unsigned));
+               ir_node               *zero_result    = new_rd_Const(dbgi, irg, get_mode_null(result_mode));
+               ir_node               *cmp_low        = new_rd_Cmp(dbgi, block, entry->low_word, zero_unsigned, ir_relation_equal);
+               ir_node               *cmp_high       = new_rd_Cmp(dbgi, block, entry->high_word, zero_high, ir_relation_equal);
+               ir_node               *ffs_high       = new_rd_Builtin(dbgi, block, mem, 1, in_high, kind, lowered_type_high);
+               ir_node               *high_proj      = new_r_Proj(ffs_high, result_mode, pn_Builtin_max+1);
+               ir_node               *high           = new_rd_Add(dbgi, block, high_proj, number_of_bits, result_mode);
+               ir_node               *ffs_low        = new_rd_Builtin(dbgi, block, mem, 1, in_low, kind, lowered_type_low);
+               ir_node               *low            = new_r_Proj(ffs_low, result_mode, pn_Builtin_max+1);
+               ir_node               *mux_high       = new_rd_Mux(dbgi, block, cmp_high, high, zero_result, result_mode);
+
+               if (! allow_ifconv(cmp_high, high, zero_result))
+                       ir_nodeset_insert(&created_mux_nodes, mux_high);
+
+               res = new_rd_Mux(dbgi, block, cmp_low, low, mux_high, result_mode);
+
+               if (! allow_ifconv(cmp_low, low, mux_high))
+                       ir_nodeset_insert(&created_mux_nodes, res);
+               break;
+       }
+       case ir_bk_clz: {
+               ir_node               *zero           = new_rd_Const(dbgi, irg, get_mode_null(high_mode));
+               ir_node               *cmp_high       = new_rd_Cmp(dbgi, block, entry->high_word, zero, ir_relation_equal);
+               ir_node               *clz_high       = new_rd_Builtin(dbgi, block, mem, 1, in_high, kind, lowered_type_high);
+               ir_node               *high           = new_r_Proj(clz_high, result_mode, pn_Builtin_max+1);
+               ir_node               *clz_low        = new_rd_Builtin(dbgi, block, mem, 1, in_low, kind, lowered_type_low);
+               ir_node               *low_proj       = new_r_Proj(clz_low, result_mode, pn_Builtin_max+1);
+               ir_node               *number_of_bits = new_r_Const_long(irg, result_mode, get_mode_size_bits(mode));
+               ir_node               *low            = new_rd_Add(dbgi, block, low_proj, number_of_bits, result_mode);
+
+               res = new_rd_Mux(dbgi, block, cmp_high, high, low, result_mode);
+
+               if (! allow_ifconv(cmp_high, high, low))
+                       ir_nodeset_insert(&created_mux_nodes, res);
+               break;
+       }
+       case ir_bk_ctz: {
+               ir_node               *zero_unsigned  = new_rd_Const(dbgi, irg, get_mode_null(env->low_unsigned));
+               ir_node               *cmp_low        = new_rd_Cmp(dbgi, block, entry->low_word, zero_unsigned, ir_relation_equal);
+               ir_node               *ffs_high       = new_rd_Builtin(dbgi, block, mem, 1, in_high, kind, lowered_type_high);
+               ir_node               *high_proj      = new_r_Proj(ffs_high, result_mode, pn_Builtin_max+1);
+               ir_node               *number_of_bits = new_r_Const_long(irg, result_mode, get_mode_size_bits(env->low_unsigned));
+               ir_node               *high           = new_rd_Add(dbgi, block, high_proj, number_of_bits, result_mode);
+               ir_node               *ffs_low        = new_rd_Builtin(dbgi, block, mem, 1, in_low, kind, lowered_type_low);
+               ir_node               *low            = new_r_Proj(ffs_low, result_mode, pn_Builtin_max+1);
+
+               res = new_rd_Mux(dbgi, block, cmp_low, low, high, result_mode);
+
+               if (! allow_ifconv(cmp_low, low, high))
+                       ir_nodeset_insert(&created_mux_nodes, res);
+               break;
+       }
+       case ir_bk_popcount: {
+               ir_node               *popcount_high = new_rd_Builtin(dbgi, block, mem, 1, in_high, kind, lowered_type_high);
+               ir_node               *popcount_low  = new_rd_Builtin(dbgi, block, mem, 1, in_low, kind, lowered_type_low);
+               ir_node               *high          = new_r_Proj(popcount_high, result_mode, pn_Builtin_max+1);
+               ir_node               *low           = new_r_Proj(popcount_low, result_mode, pn_Builtin_max+1);
+
+               res = new_rd_Add(dbgi, block, high, low, result_mode);
+               break;
+       }
+       case ir_bk_parity: {
+               ir_node  *parity_high;
+               ir_node  *parity_low;
+               ir_node  *high;
+               ir_node  *low;
+
+               assert(arity == 2);
+
+               parity_high = new_rd_Builtin(dbgi, block, mem, 1, in_high, kind, lowered_type_high);
+               high        = new_r_Proj(parity_high, result_mode, pn_Builtin_max+1);
+               parity_low  = new_rd_Builtin(dbgi, block, mem, 1, in_low, kind, lowered_type_low);
+               low         = new_r_Proj(parity_low, result_mode, pn_Builtin_max+1);
+               res         = new_rd_Eor(dbgi, block, high, low, result_mode);
+               break;
+       }
+       default:
+               panic("unexpected builtin");
+       }
+
+       ir_node *const in[] = {
+               [pn_Builtin_M]       = mem,
+               [pn_Builtin_max + 1] = res,
+       };
+       turn_into_tuple(builtin, ARRAY_SIZE(in), in);
+       }
+}
+
+/**
+ * lowers builtins performing arithmetic (bswap)
+ */
+static void lower_arithmetic_builtin(ir_node *builtin, ir_mode *mode)
+{
+       ir_builtin_kind  kind         = get_Builtin_kind(builtin);
+       ir_node         *operand      = get_Builtin_param(builtin, 0);
+       ir_mode         *operand_mode = get_irn_mode(operand);
        (void) mode;
+       if (operand_mode != env->high_signed && operand_mode != env->high_unsigned)
+               return;
 
-       /* we must only lower value parameter Sels if we change the
-          value parameter type. */
-       if (env->value_param_tp != NULL) {
-               ir_entity *ent = get_Sel_entity(sel);
-           if (get_entity_owner(ent) == env->value_param_tp) {
-                       int pos = get_entity_arg_idx(ent);
+       {
+       dbg_info              *dbgi              = get_irn_dbg_info(builtin);
+       ir_type               *type              = get_Builtin_type(builtin);
+       ir_type               *lowered_type_high = lower_Builtin_type_high(type);
+       ir_type               *lowered_type_low  = lower_Builtin_type_low(type);
+       ir_node               *block             = get_nodes_block(builtin);
+       ir_node               *mem               = get_Builtin_mem(builtin);
+       const lower64_entry_t *entry             = get_node_entry(operand);
+       ir_mode               *mode_high         = get_irn_mode(entry->high_word);
+       ir_node               *res_high;
+       ir_node               *res_low;
+
+       switch (kind) {
+       case ir_bk_bswap: {
+               ir_node               *in_high[1] = { entry->high_word };
+               ir_node               *in_low[1]  = { entry->low_word };
+               ir_node               *swap_high  = new_rd_Builtin(dbgi, block, mem, 1, in_high, kind, lowered_type_high);
+               ir_node               *swap_low   = new_rd_Builtin(dbgi, block, mem, 1, in_low, kind, lowered_type_low);
+               ir_node               *high       = new_r_Proj(swap_high, mode_high, pn_Builtin_max+1);
+               ir_node               *low        = new_r_Proj(swap_low, env->low_unsigned, pn_Builtin_max+1);
+               if (mode_high == env->low_signed) {
+                       res_high = new_rd_Conv(dbgi, block, low, env->low_signed);
+                       res_low  = new_rd_Conv(dbgi, block, high, env->low_unsigned);
+               } else {
+                       res_high = low;
+                       res_low  = high;
+               }
+               break;
+       }
+       default:
+               panic("unexpected builtin");
+       }
+
+       /* search result Proj */
+       foreach_out_edge_safe(builtin, edge) {
+               ir_node *proj = get_edge_src_irn(edge);
+               if (!is_Proj(proj))
+                       continue;
 
-                       ent = get_method_value_param_ent(env->l_mtp, pos);
-                       set_Sel_entity(sel, ent);
+               if (get_Proj_proj(proj) == pn_Builtin_max+1) {
+                       ir_set_dw_lowered(proj, res_low, res_high);
                }
        }
+       }
+}
+
+/**
+ * Lower double word builtins.
+ */
+static void lower_Builtin(ir_node *builtin, ir_mode *mode)
+{
+       ir_builtin_kind kind = get_Builtin_kind(builtin);
+
+       switch (kind) {
+       case ir_bk_trap:
+       case ir_bk_debugbreak:
+       case ir_bk_return_address:
+       case ir_bk_frame_address:
+       case ir_bk_prefetch:
+       case ir_bk_inport:
+       case ir_bk_outport:
+       case ir_bk_inner_trampoline:
+               /* Nothing to do. */
+               return;
+       case ir_bk_bswap:
+               lower_arithmetic_builtin(builtin, mode);
+               return;
+       case ir_bk_ffs:
+       case ir_bk_clz:
+       case ir_bk_ctz:
+       case ir_bk_popcount:
+       case ir_bk_parity:
+               lower_reduce_builtin(builtin, mode);
+               return;
+       }
+       panic("unknown builtin");
 }
 
 /**
@@ -1969,11 +2738,13 @@ static bool always_lower(unsigned code)
 {
        switch (code) {
        case iro_ASM:
+       case iro_Builtin:
        case iro_Proj:
        case iro_Start:
        case iro_Call:
        case iro_Return:
        case iro_Cond:
+       case iro_Switch:
        case iro_Conv:
        case iro_Sel:
                return true;
@@ -2009,50 +2780,25 @@ static int cmp_conv_tp(const void *elt, const void *key, size_t size)
 /**
  * Enter a lowering function into an ir_op.
  */
-static void enter_lower_func(ir_op *op, lower_func func)
+void ir_register_dw_lower_function(ir_op *op, lower_dw_func func)
 {
        op->ops.generic = (op_func)func;
 }
 
-/**
- * Returns non-zero if a method type must be lowered.
- *
- * @param mtp  the method type
- */
-static bool mtp_must_be_lowered(lower_env_t *env, ir_type *mtp)
-{
-       int n_params = get_method_n_params(mtp);
-       int i;
-
-       /* first check if we have parameters that must be fixed */
-       for (i = 0; i < n_params; ++i) {
-               ir_type *tp = get_method_param_type(mtp, i);
-
-               if (is_Primitive_type(tp)) {
-                       ir_mode *mode = get_type_mode(tp);
-
-                       if (mode == env->high_signed ||
-                               mode == env->high_unsigned)
-                               return true;
-               }
-       }
-       return false;
-}
-
 /* Determine which modes need to be lowered */
-static void setup_modes(lower_env_t *env)
+static void setup_modes(void)
 {
        unsigned           size_bits           = env->params->doubleword_size;
        ir_mode           *doubleword_signed   = NULL;
        ir_mode           *doubleword_unsigned = NULL;
-       size_t             n_modes             = get_irp_n_modes();
+       size_t             n_modes             = ir_get_n_modes();
        ir_mode_arithmetic arithmetic;
        unsigned           modulo_shift;
        size_t             i;
 
        /* search for doubleword modes... */
        for (i = 0; i < n_modes; ++i) {
-               ir_mode *mode = get_irp_mode(i);
+               ir_mode *mode = ir_get_mode(i);
                if (!mode_is_int(mode))
                        continue;
                if (get_mode_size_bits(mode) != size_bits)
@@ -2106,13 +2852,13 @@ static void setup_modes(lower_env_t *env)
        /* produce lowered modes */
        env->high_signed   = doubleword_signed;
        env->high_unsigned = doubleword_unsigned;
-       env->low_signed    = new_ir_mode("WS", irms_int_number, size_bits, 1,
-                                        arithmetic, modulo_shift);
-       env->low_unsigned  = new_ir_mode("WU", irms_int_number, size_bits, 0,
-                                        arithmetic, modulo_shift);
+       env->low_signed    = new_int_mode("WS", arithmetic, size_bits, 1,
+                                         modulo_shift);
+       env->low_unsigned  = new_int_mode("WU", arithmetic, size_bits, 0,
+                                         modulo_shift);
 }
 
-static void enqueue_preds(lower_env_t *env, ir_node *node)
+static void enqueue_preds(ir_node *node)
 {
        int arity = get_irn_arity(node);
        int i;
@@ -2123,44 +2869,45 @@ static void enqueue_preds(lower_env_t *env, ir_node *node)
        }
 }
 
-static void lower_node(lower_env_t *env, ir_node *node)
+static void lower_node(ir_node *node)
 {
-       int           arity;
-       int           i;
-       lower_func    func;
-       ir_op        *op;
-       ir_mode      *mode;
-       unsigned      idx;
-       node_entry_t *entry;
-
-       if (irn_visited(node))
+       int              arity;
+       int              i;
+       lower_dw_func    func;
+       ir_op           *op;
+       ir_mode         *mode;
+       unsigned         idx;
+       lower64_entry_t *entry;
+
+       if (irn_visited_else_mark(node))
                return;
-       mark_irn_visited(node);
 
        /* cycles are always broken at Phi and Block nodes. So we don't need special
         * magic in all the other lower functions */
        if (is_Block(node)) {
-               enqueue_preds(env, node);
+               enqueue_preds(node);
                return;
        } else if (is_Phi(node)) {
-               lower_Phi(env, node);
+               lower_Phi(node);
                return;
        }
 
        /* depth-first: descend into operands */
        if (!is_Block(node)) {
                ir_node *block = get_nodes_block(node);
-               lower_node(env, block);
+               lower_node(block);
        }
 
-       arity = get_irn_arity(node);
-       for (i = 0; i < arity; ++i) {
-               ir_node *pred = get_irn_n(node, i);
-               lower_node(env, pred);
+       if (!is_Cond(node)) {
+               arity = get_irn_arity(node);
+               for (i = 0; i < arity; ++i) {
+                       ir_node *pred = get_irn_n(node, i);
+                       lower_node(pred);
+               }
        }
 
        op   = get_irn_op(node);
-       func = (lower_func) op->ops.generic;
+       func = (lower_dw_func) op->ops.generic;
        if (func == NULL)
                return;
 
@@ -2174,45 +2921,60 @@ static void lower_node(lower_env_t *env, ir_node *node)
                        mode = env->low_unsigned;
                }
                DB((dbg, LEVEL_1, "  %+F\n", node));
-               func(node, mode, env);
+               func(node, mode);
+       }
+}
+
+static void clear_node_and_phi_links(ir_node *node, void *data)
+{
+       (void) data;
+       if (get_irn_mode(node) == mode_T) {
+               set_irn_link(node, node);
+       } else {
+               set_irn_link(node, NULL);
        }
+       if (is_Block(node))
+               set_Block_phis(node, NULL);
+       else if (is_Phi(node))
+               set_Phi_next(node, NULL);
 }
 
-static void lower_irg(lower_env_t *env, ir_graph *irg)
+static void lower_irg(ir_graph *irg)
 {
        ir_entity *ent;
        ir_type   *mtp;
+       ir_type   *lowered_mtp;
        unsigned   n_idx;
 
        obstack_init(&env->obst);
 
+       /* just here for debugging */
+       current_ir_graph = irg;
+       assure_edges(irg);
+
        n_idx = get_irg_last_idx(irg);
        n_idx = n_idx + (n_idx >> 2);  /* add 25% */
        env->n_entries = n_idx;
-       env->entries   = NEW_ARR_F(node_entry_t*, n_idx);
-       memset(env->entries, 0, sizeof(env->entries[0]) * n_idx);
+       env->entries   = NEW_ARR_FZ(lower64_entry_t*, n_idx);
 
        env->irg            = irg;
-       env->l_mtp          = NULL;
        env->flags          = 0;
-       env->proj_2_block   = pmap_create();
-       env->value_param_tp = NULL;
 
        ent = get_irg_entity(irg);
        mtp = get_entity_type(ent);
+       lowered_mtp = lower_mtp(mtp);
 
-       if (mtp_must_be_lowered(env, mtp)) {
-               ir_type *ltp = lower_mtp(env, mtp);
+       if (lowered_mtp != mtp) {
+               set_entity_type(ent, lowered_mtp);
                env->flags |= MUST_BE_LOWERED;
-               set_entity_type(ent, ltp);
-               env->l_mtp = ltp;
-               env->value_param_tp = get_method_value_param_type(mtp);
+
+               fix_parameter_entities(irg, mtp);
        }
 
        /* first step: link all nodes and allocate data */
        ir_reserve_resources(irg, IR_RESOURCE_PHI_LIST | IR_RESOURCE_IRN_LINK);
-       irg_walk_graph(irg, firm_clear_node_and_phi_links,
-                      prepare_links_and_handle_rotl, env);
+       visit_all_identities(irg, clear_node_and_phi_links, NULL);
+       irg_walk_graph(irg, NULL, prepare_links_and_handle_rotl, env);
 
        if (env->flags & MUST_BE_LOWERED) {
                size_t i;
@@ -2225,51 +2987,94 @@ static void lower_irg(lower_env_t *env, ir_graph *irg)
                env->lowered_phis = NEW_ARR_F(ir_node*, 0);
                while (!pdeq_empty(env->waitq)) {
                        ir_node *node = (ir_node*)pdeq_getl(env->waitq);
-                       lower_node(env, node);
+                       lower_node(node);
                }
 
                /* we need to fixup phis */
                for (i = 0; i < ARR_LEN(env->lowered_phis); ++i) {
                        ir_node *phi = env->lowered_phis[i];
-                       fixup_phi(env, phi);
+                       fixup_phi(phi);
                }
                DEL_ARR_F(env->lowered_phis);
 
 
                ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
 
-               /* outs are invalid, we changed the graph */
-               set_irg_outs_inconsistent(irg);
-
                if (env->flags & CF_CHANGED) {
                        /* control flow changed, dominance info is invalid */
-                       set_irg_doms_inconsistent(irg);
-                       set_irg_extblk_inconsistent(irg);
-                       set_irg_loopinfo_inconsistent(irg);
+                       clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
                }
+               edges_deactivate(irg);
        }
 
        ir_free_resources(irg, IR_RESOURCE_PHI_LIST | IR_RESOURCE_IRN_LINK);
 
-       pmap_destroy(env->proj_2_block);
        DEL_ARR_F(env->entries);
        obstack_free(&env->obst, NULL);
 }
 
+static const lwrdw_param_t *param;
+
+void ir_prepare_dw_lowering(const lwrdw_param_t *new_param)
+{
+       assert(new_param != NULL);
+       FIRM_DBG_REGISTER(dbg, "firm.lower.dw");
+
+       param = new_param;
+
+       ir_clear_opcodes_generic_func();
+       ir_register_dw_lower_function(op_ASM,     lower_ASM);
+       ir_register_dw_lower_function(op_Add,     lower_binop);
+       ir_register_dw_lower_function(op_And,     lower_And);
+       ir_register_dw_lower_function(op_Bad,     lower_Bad);
+       ir_register_dw_lower_function(op_Builtin, lower_Builtin);
+       ir_register_dw_lower_function(op_Call,    lower_Call);
+       ir_register_dw_lower_function(op_Cmp,     lower_Cmp);
+       ir_register_dw_lower_function(op_Cond,    lower_Cond);
+       ir_register_dw_lower_function(op_Const,   lower_Const);
+       ir_register_dw_lower_function(op_Conv,    lower_Conv);
+       ir_register_dw_lower_function(op_Div,     lower_Div);
+       ir_register_dw_lower_function(op_Eor,     lower_Eor);
+       ir_register_dw_lower_function(op_Load,    lower_Load);
+       ir_register_dw_lower_function(op_Minus,   lower_unop);
+       ir_register_dw_lower_function(op_Mod,     lower_Mod);
+       ir_register_dw_lower_function(op_Mul,     lower_binop);
+       ir_register_dw_lower_function(op_Mux,     lower_Mux);
+       ir_register_dw_lower_function(op_Not,     lower_Not);
+       ir_register_dw_lower_function(op_Or,      lower_Or);
+       ir_register_dw_lower_function(op_Proj,    lower_Proj);
+       ir_register_dw_lower_function(op_Return,  lower_Return);
+       ir_register_dw_lower_function(op_Shl,     lower_Shl);
+       ir_register_dw_lower_function(op_Shr,     lower_Shr);
+       ir_register_dw_lower_function(op_Shrs,    lower_Shrs);
+       ir_register_dw_lower_function(op_Start,   lower_Start);
+       ir_register_dw_lower_function(op_Store,   lower_Store);
+       ir_register_dw_lower_function(op_Sub,     lower_binop);
+       ir_register_dw_lower_function(op_Switch,  lower_Switch);
+       ir_register_dw_lower_function(op_Unknown, lower_Unknown);
+}
+
+/**
+ * Callback to lower only the Mux nodes we created.
+ */
+static int lower_mux_cb(ir_node *mux)
+{
+       return ir_nodeset_contains(&created_mux_nodes, mux);
+}
+
 /*
  * Do the lowering.
  */
-void lower_dw_ops(const lwrdw_param_t *param)
+void ir_lower_dw_ops(void)
 {
-       lower_env_t lenv;
+       lower_dw_env_t lenv;
        size_t      i, n;
 
-       assert(param != NULL);
-       FIRM_DBG_REGISTER(dbg, "firm.lower.dw");
-
        memset(&lenv, 0, sizeof(lenv));
        lenv.params = param;
-       setup_modes(&lenv);
+       env = &lenv;
+
+       setup_modes();
 
        /* create the necessary maps */
        if (! intrinsic_fkt)
@@ -2278,6 +3083,10 @@ void lower_dw_ops(const lwrdw_param_t *param)
                conv_types = new_set(cmp_conv_tp, 16);
        if (! lowered_type)
                lowered_type = pmap_create();
+       if (! lowered_builtin_type_low)
+               lowered_builtin_type_low = pmap_create();
+       if (! lowered_builtin_type_high)
+               lowered_builtin_type_high = pmap_create();
 
        /* create a primitive unsigned and signed type */
        if (! tp_u)
@@ -2297,28 +3106,21 @@ void lower_dw_ops(const lwrdw_param_t *param)
        }
        if (! binop_tp_s) {
                binop_tp_s = new_type_method(4, 2);
-               set_method_param_type(binop_tp_s, 0, tp_u);
-               set_method_param_type(binop_tp_s, 1, tp_s);
-               set_method_param_type(binop_tp_s, 2, tp_u);
-               set_method_param_type(binop_tp_s, 3, tp_s);
-               set_method_res_type(binop_tp_s, 0, tp_u);
-               set_method_res_type(binop_tp_s, 1, tp_s);
-       }
-       if (! shiftop_tp_u) {
-               shiftop_tp_u = new_type_method(3, 2);
-               set_method_param_type(shiftop_tp_u, 0, tp_u);
-               set_method_param_type(shiftop_tp_u, 1, tp_u);
-               set_method_param_type(shiftop_tp_u, 2, tp_u);
-               set_method_res_type(shiftop_tp_u, 0, tp_u);
-               set_method_res_type(shiftop_tp_u, 1, tp_u);
-       }
-       if (! shiftop_tp_s) {
-               shiftop_tp_s = new_type_method(3, 2);
-               set_method_param_type(shiftop_tp_s, 0, tp_u);
-               set_method_param_type(shiftop_tp_s, 1, tp_s);
-               set_method_param_type(shiftop_tp_s, 2, tp_u);
-               set_method_res_type(shiftop_tp_s, 0, tp_u);
-               set_method_res_type(shiftop_tp_s, 1, tp_s);
+               if (env->params->little_endian) {
+                       set_method_param_type(binop_tp_s, 0, tp_u);
+                       set_method_param_type(binop_tp_s, 1, tp_s);
+                       set_method_param_type(binop_tp_s, 2, tp_u);
+                       set_method_param_type(binop_tp_s, 3, tp_s);
+                       set_method_res_type(binop_tp_s, 0, tp_u);
+                       set_method_res_type(binop_tp_s, 1, tp_s);
+               } else {
+                       set_method_param_type(binop_tp_s, 0, tp_s);
+                       set_method_param_type(binop_tp_s, 1, tp_u);
+                       set_method_param_type(binop_tp_s, 2, tp_s);
+                       set_method_param_type(binop_tp_s, 3, tp_u);
+                       set_method_res_type(binop_tp_s, 0, tp_s);
+                       set_method_res_type(binop_tp_s, 1, tp_u);
+               }
        }
        if (! unop_tp_u) {
                unop_tp_u = new_type_method(2, 2);
@@ -2329,52 +3131,43 @@ void lower_dw_ops(const lwrdw_param_t *param)
        }
        if (! unop_tp_s) {
                unop_tp_s = new_type_method(2, 2);
-               set_method_param_type(unop_tp_s, 0, tp_u);
-               set_method_param_type(unop_tp_s, 1, tp_s);
-               set_method_res_type(unop_tp_s, 0, tp_u);
-               set_method_res_type(unop_tp_s, 1, tp_s);
-       }
-
-       clear_irp_opcodes_generic_func();
-       enter_lower_func(op_ASM,     lower_ASM);
-       enter_lower_func(op_Add,     lower_binop);
-       enter_lower_func(op_And,     lower_And);
-       enter_lower_func(op_Call,    lower_Call);
-       enter_lower_func(op_Cmp,     lower_Cmp);
-       enter_lower_func(op_Cond,    lower_Cond);
-       enter_lower_func(op_Const,   lower_Const);
-       enter_lower_func(op_Conv,    lower_Conv);
-       enter_lower_func(op_Div,     lower_Div);
-       enter_lower_func(op_Eor,     lower_Eor);
-       enter_lower_func(op_Load,    lower_Load);
-       enter_lower_func(op_Minus,   lower_Unop);
-       enter_lower_func(op_Mod,     lower_Mod);
-       enter_lower_func(op_Mul,     lower_binop);
-       enter_lower_func(op_Mux,     lower_Mux);
-       enter_lower_func(op_Not,     lower_Not);
-       enter_lower_func(op_Or,      lower_Or);
-       enter_lower_func(op_Return,  lower_Return);
-       enter_lower_func(op_Sel,     lower_Sel);
-       enter_lower_func(op_Shl,     lower_Shl);
-       enter_lower_func(op_Shr,     lower_Shr);
-       enter_lower_func(op_Shrs,    lower_Shrs);
-       enter_lower_func(op_Start,   lower_Start);
-       enter_lower_func(op_Store,   lower_Store);
-       enter_lower_func(op_Sub,     lower_binop);
-       enter_lower_func(op_Unknown, lower_Unknown);
+               if (env->params->little_endian) {
+                       set_method_param_type(unop_tp_s, 0, tp_u);
+                       set_method_param_type(unop_tp_s, 1, tp_s);
+                       set_method_res_type(unop_tp_s, 0, tp_u);
+                       set_method_res_type(unop_tp_s, 1, tp_s);
+               } else {
+                       set_method_param_type(unop_tp_s, 0, tp_s);
+                       set_method_param_type(unop_tp_s, 1, tp_u);
+                       set_method_res_type(unop_tp_s, 0, tp_s);
+                       set_method_res_type(unop_tp_s, 1, tp_u);
+               }
+       }
 
        lenv.tv_mode_bytes = new_tarval_from_long(param->doubleword_size/(2*8), lenv.low_unsigned);
-       lenv.tv_mode_bits  = new_tarval_from_long(param->doubleword_size/2, lenv.low_unsigned);
        lenv.waitq         = new_pdeq();
        lenv.first_id      = new_id_from_chars(param->little_endian ? ".l" : ".h", 2);
        lenv.next_id       = new_id_from_chars(param->little_endian ? ".h" : ".l", 2);
 
+       irp_reserve_resources(irp, IRP_RESOURCE_TYPE_LINK | IRP_RESOURCE_TYPE_VISITED);
+       inc_master_type_visited();
        /* transform all graphs */
        for (i = 0, n = get_irp_n_irgs(); i < n; ++i) {
                ir_graph *irg = get_irp_irg(i);
-               lower_irg(&lenv, irg);
+
+               ir_nodeset_init(&created_mux_nodes);
+
+               lower_irg(irg);
+
+               if (ir_nodeset_size(&created_mux_nodes) > 0)
+                       lower_mux(irg, lower_mux_cb);
+
+               ir_nodeset_destroy(&created_mux_nodes);
        }
+       irp_free_resources(irp, IRP_RESOURCE_TYPE_LINK | IRP_RESOURCE_TYPE_VISITED);
        del_pdeq(lenv.waitq);
+
+       env = NULL;
 }
 
 /* Default implementation. */
@@ -2397,5 +3190,6 @@ ir_entity *def_create_intrinsic_fkt(ir_type *method, const ir_op *op,
 
        ent = new_entity(get_glob_type(), id, method);
        set_entity_ld_ident(ent, get_entity_ident(ent));
+       set_entity_visibility(ent, ir_visibility_external);
        return ent;
 }