nsz Git - libfirm/blob - ir/opt/loop.c

   1 /*
   2  * Copyright (C) 1995-2010 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @author   Christian Helmer
  23  * @brief    loop inversion and loop unrolling
  24  *
  25  * @version  $Id$
  26  */
  27
  28 #include "config.h"
  29
  30 #include "iroptimize.h"
  31 #include "opt_init.h"
  32 #include "irnode.h"
  33 #include "debug.h"
  34 #include "error.h"
  35
  36 #include "ircons.h"
  37 #include "irgopt.h"
  38 #include "irgmod.h"
  39 #include "irgwalk.h"
  40 #include "irouts.h"
  41 #include "iredges.h"
  42 #include "irtools.h"
  43 #include "array_t.h"
  44 #include "beutil.h"
  45 #include "irpass.h"
  46 #include "irdom.h"
  47
  48 #include "irbackedge_t.h"
  49 #include "irphase_t.h"
  50 #include "irloop_t.h"
  51
  52
  53 DEBUG_ONLY(static firm_dbg_module_t *dbg);
  54
  55 /* DBG print stats for every procedure.  */
  56 #define LOOP_OPT_STATS 0
  57
  58 /* DBG: Ignore node limits and process every possible loop. */
  59 #define LOOP_IGNORE_NODE_LIMITS 0
  60
  61 /**
  62  * Convenience macro for iterating over every phi node of the given block.
  63  * Requires phi list per block.
  64  */
  65 #define for_each_phi(block, phi) \
  66         for ((phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next((phi)))
  67
  68 #define for_each_phi_safe(head, phi, next) \
  69         for ((phi) = (head), (next) = (head) ? get_Phi_next((head)) : NULL; \
  70                         (phi) ; (phi) = (next), (next) = (next) ? get_Phi_next((next)) : NULL)
  71
  72 /* Currently processed loop. */
  73 static ir_loop *cur_loop;
  74
  75 /* Flag for kind of unrolling. */
  76 typedef enum {
  77         constant,
  78         invariant
  79 } unrolling_kind_flag;
  80
  81 /* Condition for performing visiting a node during copy_walk. */
  82 typedef unsigned walker_condition(ir_node *);
  83
  84 /* Node and position of a predecessor. */
  85 typedef struct entry_edge {
  86         ir_node *node;
  87         int pos;
  88         ir_node *pred;
  89 } entry_edge;
  90
  91 /* Node info for unrolling. */
  92 typedef struct unrolling_node_info {
  93         ir_node **copies;
  94         /*ir_node **ins;*/
  95 } unrolling_node_info;
  96
  97 /* Outs of the nodes head. */
  98 static entry_edge *cur_head_outs;
  99
 100 /* Information about the loop head */
 101 static ir_node *loop_head = NULL;
 102 static unsigned loop_head_valid = 1;
 103
 104 /* List of all inner loops, that are processed. */
 105 static ir_loop **loops;
 106
 107 #if LOOP_OPT_STATS
 108
 109 #define count_stats(val) (++val)
 110 #define print_stats() (do_print_stats())
 111 #define reset_stats() (do_reset_stats())
 112
 113 /* Stats */
 114 typedef struct loop_stats_t {
 115         unsigned loops;
 116         unsigned inverted;
 117         unsigned too_large;
 118         unsigned too_large_adapted;
 119         unsigned cc_limit_reached;
 120         unsigned calls_limit;
 121
 122         unsigned u_simple_counting_loop;
 123         unsigned constant_unroll;
 124         unsigned invariant_unroll;
 125
 126         unsigned unhandled;
 127 } loop_stats_t;
 128
 129 static loop_stats_t stats;
 130
 131 /* Set stats to sero */
 132 static void do_reset_stats(void)
 133 {
 134         memset(&stats, 0, sizeof(loop_stats_t));
 135 }
 136
 137 /* Print stats */
 138 static void do_print_stats(void)
 139 {
 140         DB((dbg, LEVEL_2, "---------------------------------------\n"));
 141         DB((dbg, LEVEL_2, "loops             :   %d\n",stats.loops));
 142         DB((dbg, LEVEL_2, "inverted          :   %d\n",stats.inverted));
 143         DB((dbg, LEVEL_2, "too_large         :   %d\n",stats.too_large));
 144         DB((dbg, LEVEL_2, "too_large_adapted :   %d\n",stats.too_large_adapted));
 145         DB((dbg, LEVEL_2, "cc_limit_reached  :   %d\n",stats.cc_limit_reached));
 146         DB((dbg, LEVEL_2, "calls_limit       :   %d\n",stats.calls_limit));
 147         DB((dbg, LEVEL_2, "u_simple_counting :   %d\n",stats.u_simple_counting_loop));
 148         DB((dbg, LEVEL_2, "constant_unroll   :   %d\n",stats.constant_unroll));
 149         DB((dbg, LEVEL_2, "invariant_unroll  :   %d\n",stats.invariant_unroll));
 150         DB((dbg, LEVEL_2, "=======================================\n"));
 151 }
 152 #else
 153 /* No stats */
 154 #define count_stats(val) ((void)0)
 155 #define print_stats() ((void)0)
 156 #define reset_stats() ((void)0)
 157
 158 #endif
 159
 160 /* Commandline parameters */
 161 typedef struct loop_opt_params_t {
 162         unsigned max_loop_size;     /* Maximum number of nodes */
 163         int      depth_adaption;    /* Loop nest depth adaption */
 164         unsigned allowed_calls;     /* Number of calls allowed */
 165         unsigned count_phi:1;       /* Count phi nodes */
 166         unsigned count_proj:1;      /* Count projections */
 167
 168         unsigned max_cc_size;       /* Maximum condition chain size */
 169
 170         unsigned allow_const_unrolling:1;
 171         unsigned allow_invar_unrolling:1;
 172
 173 } loop_opt_params_t;
 174
 175 static loop_opt_params_t opt_params;
 176
 177 /* Loop analysis informations */
 178 typedef struct loop_info_t {
 179         unsigned nodes;         /* node count */
 180         unsigned ld_st;         /* load and store nodes */
 181         unsigned calls;         /* number of calls */
 182         unsigned cf_outs;       /* number of cf edges which leave the loop */
 183         entry_edge cf_out;      /* single loop leaving cf edge */
 184         int be_src_pos;         /* position of the single own backedge in the head */
 185
 186         /* for inversion */
 187         unsigned cc_size;       /* nodes in the condition chain */
 188
 189         /* for unrolling */
 190         unsigned max_unroll;        /* Number of unrolls satisfying max_loop_size */
 191         unsigned exit_cond;         /* 1 if condition==true exits the loop.  */
 192         unsigned latest_value:1;    /* 1 if condition is checked against latest counter value */
 193         unsigned needs_backedge:1;  /* 0 if loop is completely unrolled */
 194         unsigned decreasing:1;      /* Step operation is_Sub, or step is<0 */
 195
 196         /* IV informations of a simple loop */
 197         ir_node *start_val;
 198         ir_node *step;
 199         ir_node *end_val;
 200         ir_node *iteration_phi;
 201         ir_node *add;
 202
 203         tarval *count_tar;                  /* Number of loop iterations */
 204
 205         ir_node *duff_cond;                 /* Duff mod */
 206         unrolling_kind_flag unroll_kind;    /* constant or invariant unrolling */
 207 } loop_info_t;
 208
 209 /* Information about the current loop */
 210 static loop_info_t loop_info;
 211
 212 /* Outs of the condition chain (loop inversion). */
 213 static ir_node **cc_blocks;
 214 /* df/cf edges with def in the condition chain */
 215 static entry_edge *cond_chain_entries;
 216 /* Array of df loops found in the condition chain. */
 217 static entry_edge *head_df_loop;
 218 /* Number of blocks in cc */
 219 static unsigned inversion_blocks_in_cc;
 220
 221
 222 /* Cf/df edges leaving the loop.
 223  * Called entries here, as they are used to enter the loop with walkers. */
 224 static entry_edge *loop_entries;
 225 /* Number of unrolls to perform */
 226 static int unroll_nr;
 227 /* Phase is used to keep copies of nodes. */
 228 static ir_phase *phase;
 229
 230 /* Loop operations.  */
 231 typedef enum loop_op_t {
 232         loop_op_inversion,
 233         loop_op_unrolling,
 234         loop_op_peeling
 235 } loop_op_t;
 236
 237 /* Saves which loop operation to do until after basic tests. */
 238 static loop_op_t loop_op;
 239
 240 /************************************************************************/
 241
 242 /* Returns the maximum nodes for the given nest depth */
 243 static unsigned get_max_nodes_adapted(unsigned depth)
 244 {
 245         int adapt_permil = opt_params.depth_adaption * depth;
 246         unsigned permil_change;
 247
 248         if (adapt_permil < -1000)
 249                 return 0;
 250
 251         permil_change = 1000 + adapt_permil;
 252         return (opt_params.max_loop_size * permil_change) / 1000;
 253 }
 254
 255 /* Reset nodes link. For use with a walker. */
 256 static void reset_link(ir_node *node, void *env)
 257 {
 258         (void)env;
 259         set_irn_link(node, NULL);
 260 }
 261
 262 /* Returns 0 if the node or block is not in cur_loop. */
 263 static unsigned is_in_loop(ir_node *node)
 264 {
 265         return (get_irn_loop(get_block(node)) == cur_loop);
 266 }
 267
 268 /* Returns 0 if the given edge is not a backedge
 269  * with its pred in the cur_loop. */
 270 static unsigned is_own_backedge(ir_node *n, int pos)
 271 {
 272         return (is_backedge(n, pos) && is_in_loop(get_irn_n(n, pos)));
 273 }
 274
 275 /* Finds loop head and some loop_info as calls or else if necessary. */
 276 static void get_loop_info(ir_node *node, void *env)
 277 {
 278         unsigned node_in_loop, pred_in_loop;
 279         int i, arity;
 280         (void)env;
 281
 282         arity = get_irn_arity(node);
 283         for (i = 0; i < arity; i++) {
 284                 ir_node *pred = get_irn_n(node, i);
 285
 286                 pred_in_loop = is_in_loop(pred);
 287                 node_in_loop = is_in_loop(node);
 288
 289                 /* collect some loop information */
 290                 if (node_in_loop) {
 291                         if (is_Phi(node) && opt_params.count_phi)
 292                                 ++loop_info.nodes;
 293                         else if (is_Proj(node) && opt_params.count_proj)
 294                                 ++loop_info.nodes;
 295                         else if (!is_Confirm(node) && !is_Const(node) && !is_SymConst(node))
 296                                 ++loop_info.nodes;
 297
 298                         if (is_Load(node) || is_Store(node))
 299                                 ++loop_info.ld_st;
 300
 301                         if (is_Call(node))
 302                                 ++loop_info.calls;
 303                 }
 304
 305                 /* Find the loops head/the blocks with cfpred outside of the loop */
 306                 if (is_Block(node) && node_in_loop && !pred_in_loop && loop_head_valid) {
 307                         ir_node *cfgpred = get_Block_cfgpred(node, i);
 308
 309                         if (!is_in_loop(cfgpred)) {
 310                                 DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n",
 311                                                         node, pred));
 312                                 /* another head? We do not touch this. */
 313                                 if (loop_head && loop_head != node) {
 314                                         loop_head_valid = 0;
 315                                 } else {
 316                                         loop_head = node;
 317                                 }
 318                         }
 319                 }
 320         }
 321 }
 322
 323 /* Finds all edges with users outside of the loop
 324  * and definition inside the loop. */
 325 static void get_loop_entries(ir_node *node, void *env)
 326 {
 327         unsigned node_in_loop, pred_in_loop;
 328         int i, arity;
 329         (void) env;
 330
 331         arity = get_irn_arity(node);
 332         for (i = 0; i < arity; ++i) {
 333                 ir_node *pred = get_irn_n(node, i);
 334
 335                 pred_in_loop = is_in_loop(pred);
 336                 node_in_loop = is_in_loop(node);
 337
 338                 if (pred_in_loop && !node_in_loop) {
 339                         entry_edge entry;
 340                         entry.node = node;
 341                         entry.pos = i;
 342                         entry.pred = pred;
 343                         ARR_APP1(entry_edge, loop_entries, entry);
 344                         /* Count cf outs */
 345                         if (is_Block(node)) {
 346                                 ++loop_info.cf_outs;
 347                                 loop_info.cf_out = entry;
 348                         }
 349                 }
 350         }
 351 }
 352
 353 /* ssa */
 354 static ir_node *ssa_second_def;
 355 static ir_node *ssa_second_def_block;
 356
 357 /**
 358  * Walks the graph bottom up, searching for definitions and creates phis.
 359  */
 360 static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int first)
 361 {
 362         int i;
 363         int n_cfgpreds;
 364         ir_graph *irg;
 365         ir_node *phi;
 366         ir_node **in;
 367         ir_node  *dummy;
 368
 369         DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block));
 370
 371         /* Prevents creation of phi that would be bad anyway.
 372          * Dead and bad blocks. */
 373         if (get_irn_arity(block) < 1 || is_Bad(block)) {
 374                 ir_graph *irg = get_irn_irg(block);
 375                 DB((dbg, LEVEL_5, "ssa bad %N\n", block));
 376                 return new_r_Bad(irg);
 377         }
 378
 379         if (block == ssa_second_def_block && !first) {
 380                 DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def));
 381                 return ssa_second_def;
 382         }
 383
 384         /* already processed this block? */
 385         if (irn_visited(block)) {
 386                 ir_node *value = (ir_node *) get_irn_link(block);
 387                 DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value));
 388                 return value;
 389         }
 390
 391         irg = get_irn_irg(block);
 392         assert(block != get_irg_start_block(irg));
 393
 394         /* a Block with only 1 predecessor needs no Phi */
 395         n_cfgpreds = get_Block_n_cfgpreds(block);
 396         if (n_cfgpreds == 1) {
 397                 ir_node *pred_block = get_Block_cfgpred_block(block, 0);
 398                 ir_node *value;
 399
 400                 DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block));
 401
 402                 value = search_def_and_create_phis(pred_block, mode, 0);
 403                 set_irn_link(block, value);
 404                 mark_irn_visited(block);
 405
 406                 return value;
 407         }
 408
 409         /* create a new Phi */
 410         NEW_ARR_A(ir_node*, in, n_cfgpreds);
 411         dummy = new_r_Dummy(irg, mode);
 412         for (i = 0; i < n_cfgpreds; ++i)
 413                 in[i] = dummy;
 414
 415         phi = new_r_Phi(block, n_cfgpreds, in, mode);
 416         /* Important: always keep block phi list up to date. */
 417         add_Block_phi(block, phi);
 418         DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block));
 419         set_irn_link(block, phi);
 420         mark_irn_visited(block);
 421
 422         /* set Phi predecessors */
 423         for (i = 0; i < n_cfgpreds; ++i) {
 424                 ir_node *pred_val;
 425                 ir_node *pred_block = get_Block_cfgpred_block(block, i);
 426                 assert(pred_block != NULL);
 427                 pred_val = search_def_and_create_phis(pred_block, mode, 0);
 428
 429                 assert(pred_val != NULL);
 430
 431                 DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val));
 432                 set_irn_n(phi, i, pred_val);
 433         }
 434
 435         return phi;
 436 }
 437
 438
 439 /**
 440  * Given a set of values this function constructs SSA-form for the users of the
 441  * first value (the users are determined through the out-edges of the value).
 442  * Works without using the dominance tree.
 443  */
 444 static void construct_ssa(ir_node *orig_block, ir_node *orig_val,
 445                 ir_node *second_block, ir_node *second_val)
 446 {
 447         ir_graph *irg;
 448         ir_mode *mode;
 449         const ir_edge_t *edge;
 450         const ir_edge_t *next;
 451
 452         assert(orig_block && orig_val && second_block && second_val &&
 453                         "no parameter of construct_ssa may be NULL");
 454
 455         if (orig_val == second_val)
 456                 return;
 457
 458         irg = get_irn_irg(orig_val);
 459
 460         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
 461         inc_irg_visited(irg);
 462
 463         mode = get_irn_mode(orig_val);
 464         set_irn_link(orig_block, orig_val);
 465         mark_irn_visited(orig_block);
 466
 467         ssa_second_def_block = second_block;
 468         ssa_second_def       = second_val;
 469
 470         /* Only fix the users of the first, i.e. the original node */
 471         foreach_out_edge_safe(orig_val, edge, next) {
 472                 ir_node *user = get_edge_src_irn(edge);
 473                 int j = get_edge_src_pos(edge);
 474                 ir_node *user_block = get_nodes_block(user);
 475                 ir_node *newval;
 476
 477                 /* ignore keeps */
 478                 if (is_End(user))
 479                         continue;
 480
 481                 DB((dbg, LEVEL_5, "original user %N\n", user));
 482
 483                 if (is_Phi(user)) {
 484                         ir_node *pred_block = get_Block_cfgpred_block(user_block, j);
 485                         newval = search_def_and_create_phis(pred_block, mode, 1);
 486                 } else {
 487                         newval = search_def_and_create_phis(user_block, mode, 1);
 488                 }
 489                 if (newval != user && !is_Bad(newval))
 490                         set_irn_n(user, j, newval);
 491         }
 492
 493         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
 494 }
 495
 496
 497 /***** Unrolling Helper Functions *****/
 498
 499 /* Assign the copy with index nr to node n */
 500 static void set_unroll_copy(ir_node *n, int nr, ir_node *cp)
 501 {
 502         unrolling_node_info *info;
 503         assert(nr != 0 && "0 reserved");
 504
 505         info = (unrolling_node_info *)phase_get_irn_data(phase, n);
 506         if (! info) {
 507                 ir_node **arr;
 508
 509                 info = XMALLOCZ(unrolling_node_info);
 510                 arr = NEW_ARR_F(ir_node *, unroll_nr);
 511                 info->copies = arr;
 512                 memset(info->copies, 0, (unroll_nr) * sizeof(ir_node *));
 513
 514                 phase_set_irn_data(phase, n, info);
 515         }
 516         /* Original node */
 517         info->copies[0] = n;
 518
 519         info->copies[nr] = cp;
 520 }
 521
 522 /* Returns a nodes copy if it exists, else NULL. */
 523 static ir_node *get_unroll_copy(ir_node *n, int nr)
 524 {
 525         ir_node             *cp;
 526         unrolling_node_info *info = (unrolling_node_info *)phase_get_irn_data(phase, n);
 527         if (! info)
 528                 return NULL;
 529
 530         cp = info->copies[nr];
 531         return cp;
 532 }
 533
 534
 535 /***** Inversion Helper Functions *****/
 536
 537 /* Sets copy cp of node n. */
 538 static void set_inversion_copy(ir_node *n, ir_node *cp)
 539 {
 540         phase_set_irn_data(phase, n, cp);
 541 }
 542
 543 /* Getter of copy of n for inversion */
 544 static ir_node *get_inversion_copy(ir_node *n)
 545 {
 546         ir_node *cp = (ir_node *)phase_get_irn_data(phase, n);
 547         return cp;
 548 }
 549
 550 /* Resets block mark for given node. For use with walker */
 551 static void reset_block_mark(ir_node *node, void * env)
 552 {
 553         (void) env;
 554
 555         if (is_Block(node))
 556                 set_Block_mark(node, 0);
 557 }
 558
 559 /* Returns mark of node, or its block if node is not a block.
 560  * Used in this context to determine if node is in the condition chain. */
 561 static unsigned is_nodes_block_marked(ir_node* node)
 562 {
 563         if (is_Block(node))
 564                 return get_Block_mark(node);
 565         else
 566                 return get_Block_mark(get_block(node));
 567 }
 568
 569 /* Extends a nodes ins by node new.
 570  * NOTE: This is slow if a node n needs to be extended more than once. */
 571 static int extend_irn(ir_node *n, ir_node *new)
 572 {
 573         ir_node **ins;
 574         int i;
 575         int arity = get_irn_arity(n);
 576         int new_arity = arity + 1;
 577
 578         NEW_ARR_A(ir_node *, ins, new_arity);
 579
 580         for(i = 0; i < arity; ++i) {
 581                 ins[i] = get_irn_n(n, i);
 582         }
 583         ins[i] = new;
 584
 585         set_irn_in(n, new_arity, ins);
 586         return arity;
 587 }
 588
 589 /* Extends a block by a copy of its pred at pos,
 590  * fixing also the phis in the same way. */
 591 static void extend_ins_by_copy(ir_node *block, int pos)
 592 {
 593         ir_node *new_in;
 594         ir_node *phi;
 595         ir_node *pred;
 596         assert(is_Block(block));
 597
 598         /* Extend block by copy of definition at pos */
 599         pred = get_irn_n(block, pos);
 600         new_in = get_inversion_copy(pred);
 601         DB((dbg, LEVEL_5, "Extend block %N by %N cp of %N\n", block, new_in, pred));
 602         extend_irn(block, new_in);
 603
 604         /* Extend block phis by copy of definition at pos */
 605         for_each_phi(block, phi) {
 606                 ir_node *pred, *cp;
 607
 608                 pred = get_irn_n(phi, pos);
 609                 cp = get_inversion_copy(pred);
 610                 /* If the phis in is not in the condition chain (eg. a constant),
 611                  * there is no copy. */
 612                 if (cp == NULL)
 613                         new_in = pred;
 614                 else
 615                         new_in = cp;
 616
 617                 DB((dbg, LEVEL_5, "Extend phi %N by %N cp of %N\n", phi, new_in, pred));
 618                 extend_irn(phi, new_in);
 619         }
 620 }
 621
 622 /* Returns the number of blocks backedges. With or without alien bes. */
 623 static int get_backedge_n(ir_node *block, unsigned with_alien)
 624 {
 625         int i;
 626         int be_n = 0;
 627         int arity = get_irn_arity(block);
 628
 629         assert(is_Block(block) && "We only required backedges of blocks.");
 630
 631         for (i = 0; i < arity; ++i) {
 632                 ir_node *pred = get_irn_n(block, i);
 633                 if (is_backedge(block, i) && (with_alien || is_in_loop(pred)))
 634                         ++be_n;
 635         }
 636         return be_n;
 637 }
 638
 639 /* Returns a raw copy of the given node.
 640  * Attributes are kept/set according to the needs of loop inversion. */
 641 static ir_node *copy_node(ir_node *node)
 642 {
 643         int i, arity;
 644         ir_node *cp;
 645
 646         cp = exact_copy(node);
 647         arity = get_irn_arity(node);
 648
 649         /* Keep backedge info */
 650         for (i = 0; i < arity; ++i) {
 651                 if (is_backedge(node, i))
 652                         set_backedge(cp, i);
 653         }
 654
 655         if (is_Block(cp)) {
 656                 set_Block_mark(cp, 0);
 657         }
 658
 659         return cp;
 660 }
 661
 662
 663 /**
 664  * This walker copies all walked nodes.
 665  * If the walk_condition is true for a node, it is copied.
 666  * All nodes node_info->copy have to be NULL prior to every walk.
 667  * Order of ins is important for later usage.
 668  */
 669 static void copy_walk(ir_node *node, walker_condition *walk_condition,
 670                       ir_loop *set_loop)
 671 {
 672         int i;
 673         int arity;
 674         ir_node *cp;
 675         ir_node **cpin;
 676         ir_graph *irg = current_ir_graph;
 677
 678         /**
 679          * break condition and cycle resolver, creating temporary node copies
 680          */
 681         if (get_irn_visited(node) >= get_irg_visited(irg)) {
 682                 /* Here we rely on nodestate's copy being initialized with NULL */
 683                 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
 684                 if (get_inversion_copy(node) == NULL) {
 685                         cp = copy_node(node);
 686                         set_inversion_copy(node, cp);
 687
 688                         DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp));
 689                 }
 690                 return;
 691         }
 692
 693         /* Walk */
 694         mark_irn_visited(node);
 695
 696         if (!is_Block(node)) {
 697                 ir_node *pred = get_nodes_block(node);
 698                 if (walk_condition(pred))
 699                         DB((dbg, LEVEL_5, "walk block %N\n", pred));
 700                 copy_walk(pred, walk_condition, set_loop);
 701         }
 702
 703         arity = get_irn_arity(node);
 704
 705         NEW_ARR_A(ir_node *, cpin, arity);
 706
 707         for (i = 0; i < arity; ++i) {
 708                 ir_node *pred = get_irn_n(node, i);
 709
 710                 if (walk_condition(pred)) {
 711                         DB((dbg, LEVEL_5, "walk node %N\n", pred));
 712                         copy_walk(pred, walk_condition, set_loop);
 713                         cpin[i] = get_inversion_copy(pred);
 714                         DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n",
 715                                                 node, get_inversion_copy(pred), pred));
 716                 } else {
 717                         cpin[i] = pred;
 718                 }
 719         }
 720
 721         /* copy node / finalize temp node */
 722         if (get_inversion_copy(node) == NULL) {
 723                 /* No temporary copy existent */
 724                 cp = copy_node(node);
 725                 set_inversion_copy(node, cp);
 726                 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
 727         } else {
 728                 /* temporary copy is existent but without correct ins */
 729                 cp = get_inversion_copy(node);
 730                 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
 731         }
 732
 733         if (!is_Block(node)) {
 734                 ir_node *cpblock = get_inversion_copy(get_nodes_block(node));
 735
 736                 set_nodes_block(cp, cpblock );
 737                 if (is_Phi(cp))
 738                         add_Block_phi(cpblock, cp);
 739         }
 740
 741         /* Keeps phi list of temporary node. */
 742         set_irn_in(cp, ARR_LEN(cpin), cpin);
 743 }
 744
 745 /**
 746  * This walker copies all walked nodes.
 747  * If the walk_condition is true for a node, it is copied.
 748  * All nodes node_info->copy have to be NULL prior to every walk.
 749  * Order of ins is important for later usage.
 750  * Takes copy_index, to phase-link copy at specific index.
 751  */
 752 static void copy_walk_n(ir_node *node,
 753                 walker_condition *walk_condition, int copy_index)
 754 {
 755         int i;
 756         int arity;
 757         ir_node *cp;
 758         ir_node **cpin;
 759
 760         /**
 761          * break condition and cycle resolver, creating temporary node copies
 762          */
 763         if (irn_visited(node)) {
 764                 /* Here we rely on nodestate's copy being initialized with NULL */
 765                 DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node));
 766                 if (get_unroll_copy(node, copy_index) == NULL) {
 767                         ir_node *u;
 768                         u = copy_node(node);
 769                         set_unroll_copy(node, copy_index, u);
 770                         DB((dbg, LEVEL_5, "The TEMP unknown of %N is created %N\n", node, u));
 771                 }
 772                 return;
 773         }
 774
 775         /* Walk */
 776         mark_irn_visited(node);
 777
 778         if (!is_Block(node)) {
 779                 ir_node *block = get_nodes_block(node);
 780                 if (walk_condition(block))
 781                         DB((dbg, LEVEL_5, "walk block %N\n", block));
 782                 copy_walk_n(block, walk_condition, copy_index);
 783         }
 784
 785         arity = get_irn_arity(node);
 786         NEW_ARR_A(ir_node *, cpin, arity);
 787
 788         for (i = 0; i < arity; ++i) {
 789                 ir_node *pred = get_irn_n(node, i);
 790
 791                 if (walk_condition(pred)) {
 792                         DB((dbg, LEVEL_5, "walk node %N\n", pred));
 793                         copy_walk_n(pred, walk_condition, copy_index);
 794                         cpin[i] = get_unroll_copy(pred, copy_index);
 795                 } else {
 796                         cpin[i] = pred;
 797                 }
 798         }
 799
 800         /* copy node / finalize temp node */
 801         cp = get_unroll_copy(node, copy_index);
 802         if (cp == NULL || is_Unknown(cp)) {
 803                 cp = copy_node(node);
 804                 set_unroll_copy(node, copy_index, cp);
 805                 DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp));
 806         } else {
 807                 /* temporary copy is existent but without correct ins */
 808                 cp = get_unroll_copy(node, copy_index);
 809                 DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp));
 810         }
 811
 812         if (!is_Block(node)) {
 813                 ir_node *cpblock = get_unroll_copy(get_nodes_block(node), copy_index);
 814
 815                 set_nodes_block(cp, cpblock );
 816                 if (is_Phi(cp))
 817                         add_Block_phi(cpblock, cp);
 818         }
 819
 820         /* Keeps phi list of temporary node. */
 821         set_irn_in(cp, ARR_LEN(cpin), cpin);
 822 }
 823
 824 /* Removes alle Blocks with non marked predecessors from the condition chain. */
 825 static void unmark_not_allowed_cc_blocks(void)
 826 {
 827         int blocks = ARR_LEN(cc_blocks);
 828         int i;
 829
 830         for(i = 0; i < blocks; ++i) {
 831                 ir_node *block = cc_blocks[i];
 832                 int a;
 833                 int arity = get_irn_arity(block);
 834
 835                 /* Head is an exception. */
 836                 if (block == loop_head)
 837                         continue;
 838
 839                 for(a = 0; a < arity; ++a) {
 840                         if (! is_nodes_block_marked(get_irn_n(block, a))) {
 841                                 set_Block_mark(block, 0);
 842                                 --inversion_blocks_in_cc;
 843                                 DB((dbg, LEVEL_5, "Removed %N from cc (blocks in cc %d)\n",
 844                                                 block, inversion_blocks_in_cc));
 845
 846                                 break;
 847                         }
 848                 }
 849         }
 850 }
 851
 852 /* Unmarks all cc blocks using cc_blocks except head. */
 853 static void unmark_cc_blocks(void)
 854 {
 855         int blocks = ARR_LEN(cc_blocks);
 856         int i;
 857
 858         for(i = 0; i < blocks; ++i) {
 859                 ir_node *block = cc_blocks[i];
 860
 861                 /* Head is an exception. */
 862                 if (block != loop_head)
 863                         set_Block_mark(block, 0);
 864         }
 865         inversion_blocks_in_cc = 1;
 866
 867         /* invalidate */
 868         loop_info.cc_size = 0;
 869 }
 870
 871 /**
 872  * Populates head_entries with (node, pred_pos) tuple
 873  * whereas the node's pred at pred_pos is in the cc but not the node itself.
 874  * Also finds df loops inside the cc.
 875  * Head and condition chain blocks have been marked previously.
 876  */
 877 static void get_head_outs(ir_node *node, void *env)
 878 {
 879         int i;
 880         int arity = get_irn_arity(node);
 881         (void) env;
 882
 883         for (i = 0; i < arity; ++i) {
 884                 if (!is_nodes_block_marked(node) && is_nodes_block_marked(get_irn_n(node, i))) {
 885                         entry_edge entry;
 886                         entry.node = node;
 887                         entry.pos = i;
 888                         /* Saving also predecessor seems redundant, but becomes
 889                          * necessary when changing position of it, before
 890                          * dereferencing it.*/
 891                         entry.pred = get_irn_n(node, i);
 892                         ARR_APP1(entry_edge, cur_head_outs, entry);
 893                 }
 894         }
 895
 896         arity = get_irn_arity(loop_head);
 897
 898         /* Find df loops inside the cc */
 899         if (is_Phi(node) && get_nodes_block(node) == loop_head) {
 900                 for (i = 0; i < arity; ++i) {
 901                         if (is_own_backedge(loop_head, i)) {
 902                                 if (is_nodes_block_marked(get_irn_n(node, i))) {
 903                                         entry_edge entry;
 904                                         entry.node = node;
 905                                         entry.pos = i;
 906                                         entry.pred = get_irn_n(node, i);
 907                                         ARR_APP1(entry_edge, head_df_loop, entry);
 908                                         DB((dbg, LEVEL_5, "Found incc assignment node %N @%d is pred %N, graph %N %N\n",
 909                                                         node, i, entry.pred, current_ir_graph, get_irg_start_block(current_ir_graph)));
 910                                 }
 911                         }
 912                 }
 913         }
 914 }
 915
 916 /**
 917  * Find condition chains, and add them to be inverted
 918  * A block belongs to the chain if a condition branches out of the loop.
 919  * (Some blocks need to be removed once again.)
 920  * Returns 1 if the given block belongs to the condition chain.
 921  */
 922 static unsigned find_condition_chain(ir_node *block)
 923 {
 924         const    ir_edge_t *edge;
 925         unsigned mark = 0;
 926         unsigned has_be = 0;
 927         unsigned jmp_only;
 928         unsigned nodes_n = 0;
 929
 930         mark_irn_visited(block);
 931
 932         DB((dbg, LEVEL_5, "condition_chains for block %N\n", block));
 933
 934         /* Get node count */
 935         foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) {
 936                 ++nodes_n;
 937         }
 938
 939         /* Check if node count would exceed maximum cc size.
 940          * TODO
 941          * This is not optimal, as we search depth-first and break here,
 942          * continuing with another subtree. */
 943         if (loop_info.cc_size + nodes_n > opt_params.max_cc_size) {
 944                 set_Block_mark(block, 0);
 945                 return 0;
 946         }
 947
 948         /* Check if block only has a jmp instruction. */
 949         jmp_only = 1;
 950         foreach_out_edge(block, edge) {
 951                 ir_node *src = get_edge_src_irn(edge);
 952
 953                 if (! is_Block(src) && ! is_Jmp(src)) {
 954                         jmp_only = 0;
 955                 }
 956         }
 957
 958         /* Check cf outs if one is leaving the loop,
 959          * or if this node has a backedge. */
 960         foreach_block_succ(block, edge) {
 961                 ir_node *src = get_edge_src_irn(edge);
 962                 int pos = get_edge_src_pos(edge);
 963
 964                 if (! is_in_loop(src))
 965                         mark = 1;
 966
 967                 /* Inverting blocks with backedge outs leads to a cf edge
 968                  * from the inverted head, into the inverted head (skipping the body).
 969                  * As the body becomes the new loop head,
 970                  * this would introduce another loop in the existing loop.
 971                  * This loop inversion cannot cope with this case. */
 972                 if (is_backedge(src, pos)) {
 973                         has_be = 1;
 974                         break;
 975                 }
 976         }
 977
 978         /* We need all predecessors to already belong to the condition chain.
 979          * Example of wrong case:  * == in cc
 980          *
 981          *     Head*             ,--.
 982          *    /|   \            B   |
 983          *   / A*  B           /    |
 984          *  / /\   /          ?     |
 985          *   /   C*      =>      D  |
 986          *      /  D           Head |
 987          *     /               A  \_|
 988          *                      C
 989          */
 990         /* Collect blocks containing only a Jmp.
 991          * Do not collect blocks with backedge outs. */
 992         if ((jmp_only == 1 || mark == 1) && has_be == 0) {
 993                 set_Block_mark(block, 1);
 994                 ++inversion_blocks_in_cc;
 995                 loop_info.cc_size += nodes_n;
 996                 DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block));
 997                 ARR_APP1(ir_node *, cc_blocks, block);
 998         } else {
 999                 set_Block_mark(block, 0);
1000         }
1001
1002         foreach_block_succ(block, edge) {
1003                 ir_node *src = get_edge_src_irn( edge );
1004
1005                 if (is_in_loop(src) && ! irn_visited(src))
1006                         find_condition_chain(src);
1007         }
1008
1009         return mark;
1010 }
1011
1012 /**
1013  * Rewires the copied condition chain. Removes backedges.
1014  * as this condition chain is prior to the loop.
1015  * Copy of loop_head must have phi list and old (unfixed) backedge info of the loop head.
1016  * (loop_head is already fixed, we cannot rely on it.)
1017  */
1018 static void fix_copy_inversion(void)
1019 {
1020         ir_node *new_head;
1021         ir_node **ins;
1022         ir_node **phis;
1023         ir_node *phi, *next;
1024         ir_node *head_cp    = get_inversion_copy(loop_head);
1025         ir_graph *irg       = get_irn_irg(head_cp);
1026         int arity           = get_irn_arity(head_cp);
1027         int backedges       = get_backedge_n(head_cp, 0);
1028         int new_arity       = arity - backedges;
1029         int pos;
1030         int i;
1031
1032         NEW_ARR_A(ir_node *, ins, new_arity);
1033
1034         pos = 0;
1035         /* Remove block backedges */
1036         for(i = 0; i < arity; ++i) {
1037                 if (!is_backedge(head_cp, i))
1038                         ins[pos++] = get_irn_n(head_cp, i);
1039         }
1040
1041         new_head = new_r_Block(irg, new_arity, ins);
1042
1043         phis = NEW_ARR_F(ir_node *, 0);
1044
1045         for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1046                 ir_node *new_phi;
1047                 NEW_ARR_A(ir_node *, ins, new_arity);
1048                 pos = 0;
1049                 for(i = 0; i < arity; ++i) {
1050                         if (!is_backedge(head_cp, i))
1051                                 ins[pos++] = get_irn_n(phi, i);
1052                 }
1053                 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1054                                 new_head, new_arity, ins,
1055                                 get_irn_mode(phi));
1056                 ARR_APP1(ir_node *, phis, new_phi);
1057         }
1058
1059         pos = 0;
1060         for_each_phi_safe(get_Block_phis(head_cp), phi, next) {
1061                 exchange(phi, phis[pos++]);
1062         }
1063
1064         exchange(head_cp, new_head);
1065
1066         DEL_ARR_F(phis);
1067 }
1068
1069 /* Puts the original condition chain at the end of the loop,
1070  * subsequently to the body.
1071  * Relies on block phi list and correct backedges.
1072  */
1073 static void fix_head_inversion(void)
1074 {
1075         ir_node *new_head;
1076         ir_node **ins;
1077         ir_node *phi, *next;
1078         ir_node **phis;
1079         ir_graph *irg       = get_irn_irg(loop_head);
1080         int arity           = get_irn_arity(loop_head);
1081         int backedges       = get_backedge_n(loop_head, 0);
1082         int new_arity       = backedges;
1083         int pos;
1084         int i;
1085
1086         NEW_ARR_A(ir_node *, ins, new_arity);
1087
1088         pos = 0;
1089         /* Keep only backedges */
1090         for(i = 0; i < arity; ++i) {
1091                 if (is_own_backedge(loop_head, i))
1092                         ins[pos++] = get_irn_n(loop_head, i);
1093         }
1094
1095         new_head = new_r_Block(irg, new_arity, ins);
1096
1097         phis = NEW_ARR_F(ir_node *, 0);
1098
1099         for_each_phi(loop_head, phi) {
1100                 ir_node *new_phi;
1101                 DB((dbg, LEVEL_5, "Fixing phi %N of loop head\n", phi));
1102
1103                 NEW_ARR_A(ir_node *, ins, new_arity);
1104
1105                 pos = 0;
1106                 for (i = 0; i < arity; ++i) {
1107                         ir_node *pred = get_irn_n(phi, i);
1108
1109                         if (is_own_backedge(loop_head, i)) {
1110                                 /* If assignment is in the condition chain,
1111                                  * we need to create a phi in the new loop head.
1112                                  * This can only happen for df, not cf. See find_condition_chains. */
1113                                 if (is_nodes_block_marked(pred)) {
1114                                         /* Cannot do this here. */
1115                                         ins[pos++] = pred; /*fix_inner_cc_definitions(phi, pred);*/
1116                                 } else {
1117                                         ins[pos++] = pred;
1118                                 }
1119                         }
1120                 }
1121
1122                 new_phi = new_rd_Phi(get_irn_dbg_info(phi),
1123                         new_head, new_arity, ins,
1124                         get_irn_mode(phi));
1125
1126                 ARR_APP1(ir_node *, phis, new_phi);
1127
1128                 DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N (arity %d)\n", phi, new_phi, pos ));
1129         }
1130
1131         pos = 0;
1132         for_each_phi_safe(get_Block_phis(loop_head), phi, next) {
1133                 DB((dbg, LEVEL_5, "fix inverted exch phi %N by %N\n", phi, phis[pos]));
1134                 if (phis[pos] != phi)
1135                         exchange(phi, phis[pos++]);
1136         }
1137
1138         DEL_ARR_F(phis);
1139
1140         DB((dbg, LEVEL_5, "fix inverted head exch head block %N by %N\n", loop_head, new_head));
1141         exchange(loop_head, new_head);
1142 }
1143
1144 /* Does the loop inversion.  */
1145 static void inversion_walk(entry_edge *head_entries)
1146 {
1147         int i;
1148
1149         /*
1150          * The order of rewiring bottom-up is crucial.
1151          * Any change of the order leads to lost information that would be needed later.
1152          */
1153
1154         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1155
1156         /* 1. clone condition chain */
1157         inc_irg_visited(current_ir_graph);
1158
1159         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1160                 entry_edge entry = head_entries[i];
1161                 ir_node *pred = get_irn_n(entry.node, entry.pos);
1162
1163                 DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred));
1164
1165                 copy_walk(pred, is_nodes_block_marked, cur_loop);
1166         }
1167
1168         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1169
1170         /* 2. Extends the head control flow successors ins
1171          *    with the definitions of the copied head node. */
1172         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1173                 entry_edge head_out = head_entries[i];
1174
1175                 if (is_Block(head_out.node))
1176                         extend_ins_by_copy(head_out.node, head_out.pos);
1177         }
1178
1179         /* 3. construct_ssa for users of definitions in the condition chain,
1180          *    as there is now a second definition. */
1181         for (i = 0; i < ARR_LEN(head_entries); ++i) {
1182                 entry_edge head_out = head_entries[i];
1183
1184                 /* Ignore keepalives */
1185                 if (is_End(head_out.node))
1186                         continue;
1187
1188                 /* Construct ssa for assignments in the condition chain. */
1189                 if (!is_Block(head_out.node)) {
1190                         ir_node *pred, *cppred, *block, *cpblock;
1191
1192                         pred = head_out.pred;
1193                         cppred = get_inversion_copy(pred);
1194                         block = get_nodes_block(pred);
1195                         cpblock = get_nodes_block(cppred);
1196                         construct_ssa(block, pred, cpblock, cppred);
1197                 }
1198         }
1199
1200         /*
1201          * If there is an assignment in the condition chain
1202          * with a user also in the condition chain,
1203          * the dominance frontier is in the new loop head.
1204          * The dataflow loop is completely in the condition chain.
1205          * Goal:
1206          *  To be wired: >|
1207          *
1208          *  | ,--.   |
1209          * Phi_cp |  | copied condition chain
1210          * >| |   |  |
1211          * >| ?__/   |
1212          * >| ,-.
1213          *  Phi* |   | new loop head with newly created phi.
1214          *   |   |
1215          *  Phi  |   | original, inverted condition chain
1216          *   |   |   |
1217          *   ?__/    |
1218          *
1219          */
1220         for (i = 0; i < ARR_LEN(head_df_loop); ++i) {
1221                 entry_edge head_out = head_df_loop[i];
1222
1223                 /* Construct ssa for assignments in the condition chain. */
1224                 ir_node *pred, *cppred, *block, *cpblock;
1225
1226                 pred = head_out.pred;
1227                 cppred = get_inversion_copy(pred);
1228                 assert(cppred && pred);
1229                 block = get_nodes_block(pred);
1230                 cpblock = get_nodes_block(cppred);
1231                 construct_ssa(block, pred, cpblock, cppred);
1232         }
1233
1234         /* 4. Remove the ins which are no backedges from the original condition chain
1235          *    as the cc is now subsequent to the body. */
1236         fix_head_inversion();
1237
1238         /* 5. Remove the backedges of the copied condition chain,
1239          *    because it is going to be the new 'head' in advance to the loop. */
1240         fix_copy_inversion();
1241 }
1242
1243 /* Performs loop inversion of cur_loop if possible and reasonable. */
1244 static void loop_inversion(void)
1245 {
1246         unsigned do_inversion = 1;
1247         unsigned has_cc = 0;
1248
1249         /*inversion_head_node_limit = INT_MAX;*/
1250         ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1251
1252         /* Reset block marks.
1253          * We use block marks to flag blocks of the original condition chain. */
1254         irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL);
1255
1256         /*loop_info.blocks = get_loop_n_blocks(cur_loop);*/
1257         cond_chain_entries = NEW_ARR_F(entry_edge, 0);
1258         head_df_loop = NEW_ARR_F(entry_edge, 0);
1259
1260         /*head_inversion_node_count = 0;*/
1261         inversion_blocks_in_cc = 0;
1262
1263         /* Use phase to keep copy of nodes from the condition chain. */
1264         phase = new_phase(current_ir_graph, phase_irn_init_default);
1265
1266         /* Search for condition chains and temporarily save the blocks in an array. */
1267         cc_blocks = NEW_ARR_F(ir_node *, 0);
1268         inc_irg_visited(current_ir_graph);
1269         has_cc = find_condition_chain(loop_head);
1270
1271         unmark_not_allowed_cc_blocks();
1272         DEL_ARR_F(cc_blocks);
1273
1274 #if LOOP_IGNORE_NODE_LIMITS
1275         (void) unmark_cc_blocks;
1276 #else
1277         /* Condition chain too large.
1278          * Loop should better be small enough to fit into the cache. */
1279         /* FIXME Of course, we should take a small enough cc in the first place,
1280          * which is not that simple. (bin packing)  */
1281         if (loop_info.cc_size > opt_params.max_cc_size) {
1282                 count_stats(stats.cc_limit_reached);
1283
1284                 /* Only head taken? */
1285                 if (inversion_blocks_in_cc == 1)
1286                         do_inversion = 0;
1287                 else
1288                         /* Unmark cc blocks except the head.
1289                          * Invert head only for possible unrolling. */
1290                         unmark_cc_blocks();
1291         }
1292 #endif
1293
1294         /* We also catch endless loops here,
1295          * because they do not have a condition chain. */
1296         if (inversion_blocks_in_cc < 1) {
1297                 do_inversion = 0;
1298                 DB((dbg, LEVEL_3,
1299                         "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n",
1300                         inversion_blocks_in_cc));
1301         }
1302
1303         if (do_inversion) {
1304                 cur_head_outs = NEW_ARR_F(entry_edge, 0);
1305
1306                 /* Get all edges pointing into the condition chain. */
1307                 irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL);
1308
1309                 /* Do the inversion */
1310                 inversion_walk(cur_head_outs);
1311
1312                 DEL_ARR_F(cur_head_outs);
1313
1314                 /* Duplicated blocks changed doms */
1315                 set_irg_doms_inconsistent(current_ir_graph);
1316                 /* Loop content changed */
1317                 set_irg_loopinfo_inconsistent(current_ir_graph);
1318                 /* TODO are they? Depends on set_irn_in and set_irn_n exchange and new_node. */
1319                 set_irg_outs_inconsistent(current_ir_graph);
1320
1321                 count_stats(stats.inverted);
1322         }
1323
1324         /* free */
1325         phase_free(phase);
1326         DEL_ARR_F(cond_chain_entries);
1327         DEL_ARR_F(head_df_loop);
1328
1329         ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK);
1330 }
1331
1332 /* Fix the original loop_heads ins for invariant unrolling case. */
1333 static void unrolling_fix_loop_head_inv(void)
1334 {
1335         ir_node *ins[2];
1336         ir_node *phi;
1337         ir_node *proj = new_r_Proj(loop_info.duff_cond, mode_X, 0);
1338         ir_node *head_pred = get_irn_n(loop_head, loop_info.be_src_pos);
1339         ir_node *loop_condition = get_unroll_copy(head_pred, unroll_nr - 1);
1340
1341         /* Original loop_heads ins are:
1342          * duff block and the own backedge */
1343
1344         ins[0] = loop_condition;
1345         ins[1] = proj;
1346
1347         set_irn_in(loop_head, 2, ins);
1348
1349         for_each_phi(loop_head, phi) {
1350                 ir_node *pred = get_irn_n(phi, loop_info.be_src_pos);
1351                 ir_node *last_pred = get_unroll_copy(pred, unroll_nr - 1);
1352
1353                 ins[0] = last_pred;
1354                 ins[1] = get_irn_link(phi);
1355
1356                 set_irn_in(phi, 2, ins);
1357         }
1358 }
1359
1360 /* Removes previously created phis with only 1 in. */
1361 static void correct_phis(ir_node *node, void *env)
1362 {
1363         (void)env;
1364         if (is_Phi(node) && get_irn_arity(node) == 1) {
1365                 ir_node *exch;
1366                 ir_node *in[1];
1367
1368                 in[0] = get_irn_n(node, 0);
1369
1370                 exch = new_rd_Phi(get_irn_dbg_info(node),
1371                     get_nodes_block(node), 1, in,
1372                 get_irn_mode(node));
1373
1374                 exchange(node, exch);
1375         }
1376 }
1377
1378 /* Unrolling: Rewire floating copies. */
1379 static void place_copies(int copies)
1380 {
1381         ir_node *loophead = loop_head;
1382         int c, i;
1383         int be_src_pos = loop_info.be_src_pos;
1384
1385         /* Serialize loops by fixing their head ins.
1386          * Processed are the copies.
1387          * The original loop is done after that, to keep backedge infos. */
1388         for (c = 0; c < copies; ++c) {
1389                 ir_node *upper = get_unroll_copy(loophead, c);
1390                 ir_node *lower = get_unroll_copy(loophead, c + 1);
1391                 ir_node *phi;
1392                 ir_node *topmost_be_block = get_nodes_block(get_irn_n(loophead, be_src_pos));
1393
1394                 /* Important: get the preds first and then their copy. */
1395                 ir_node *upper_be_block = get_unroll_copy(topmost_be_block, c);
1396                 ir_node *new_jmp = new_r_Jmp(upper_be_block);
1397                 DB((dbg, LEVEL_5, " place_copies upper %N lower %N\n", upper, lower));
1398
1399                 DB((dbg, LEVEL_5, "topmost be block %N \n", topmost_be_block));
1400
1401                 if (loop_info.unroll_kind == constant) {
1402                         ir_node *ins[1];
1403                         ins[0] = new_jmp;
1404                         set_irn_in(lower, 1, ins);
1405
1406                         for_each_phi(loophead, phi) {
1407                                 ir_node *topmost_def = get_irn_n(phi, be_src_pos);
1408                                 ir_node *upper_def = get_unroll_copy(topmost_def, c);
1409                                 ir_node *lower_phi = get_unroll_copy(phi, c + 1);
1410
1411                                 /* It is possible, that the value used
1412                                  * in the OWN backedge path is NOT defined in this loop. */
1413                                 if (is_in_loop(topmost_def))
1414                                         ins[0] = upper_def;
1415                                 else
1416                                         ins[0] = topmost_def;
1417
1418                                 set_irn_in(lower_phi, 1, ins);
1419                                 /* Need to replace phis with 1 in later. */
1420                         }
1421                 } else {
1422                         /* Invariant case */
1423                         /* Every node has 2 ins. One from the duff blocks
1424                          * and one from the previous unrolled loop. */
1425                         ir_node *ins[2];
1426                         /* Calculate corresponding projection of mod result for this copy c */
1427                         ir_node *proj = new_r_Proj(loop_info.duff_cond, mode_X, unroll_nr - c - 1);
1428
1429                         ins[0] = new_jmp;
1430                         ins[1] = proj;
1431                         set_irn_in(lower, 1, ins);
1432
1433                         for_each_phi(loophead, phi) {
1434                                 ir_node *topmost_phi_pred = get_irn_n(phi, be_src_pos);
1435                                 ir_node *upper_phi_pred;
1436                                 ir_node *lower_phi;
1437                                 ir_node *duff_phi;
1438
1439                                 lower_phi = get_unroll_copy(phi, c + 1);
1440                                 duff_phi = get_irn_link(lower_phi);
1441
1442                                 if (is_in_loop(topmost_phi_pred)) {
1443                                         upper_phi_pred = get_unroll_copy(topmost_phi_pred, c);
1444                                 } else {
1445                                         upper_phi_pred = topmost_phi_pred;
1446                                 }
1447
1448                                 ins[0] = upper_phi_pred;
1449                                 ins[1] = duff_phi;
1450
1451                                 set_irn_in(lower_phi, 2, ins);
1452                         }
1453                 }
1454         }
1455
1456         /* Reconnect loop landing pad with last copy. */
1457         for (i = 0; i < ARR_LEN(loop_entries); ++i) {
1458                 entry_edge edge = loop_entries[i];
1459                 /* Last copy is at the bottom */
1460                 ir_node *new_pred = get_unroll_copy(edge.pred, copies);
1461                 set_irn_n(edge.node, edge.pos, new_pred);
1462         }
1463
1464         /* Fix original loops head.
1465          * Done in the end, as ins and be info were needed before. */
1466         if (loop_info.unroll_kind == constant) {
1467                 ir_node *phi;
1468                 ir_node *head_pred = get_irn_n(loop_head, be_src_pos);
1469                 ir_node *loop_condition = get_unroll_copy(head_pred, unroll_nr - 1);
1470
1471                 set_irn_n(loop_head, loop_info.be_src_pos, loop_condition);
1472
1473                 for_each_phi(loop_head, phi) {
1474                         ir_node *pred = get_irn_n(phi, be_src_pos);
1475                         ir_node *last_pred;
1476
1477                         /* It is possible, that the value used
1478                          * in the OWN backedge path is NOT defined in this loop. */
1479                         if (is_in_loop(pred))
1480                                 last_pred = get_unroll_copy(pred, copies);
1481                         else
1482                                 last_pred = pred;
1483                         set_irn_n(phi, be_src_pos, last_pred);
1484                 }
1485         } else {
1486                 unrolling_fix_loop_head_inv();
1487         }
1488 }
1489
1490 /* Copies the cur_loop several times. */
1491 static void copy_loop(entry_edge *cur_loop_outs, int copies)
1492 {
1493         int i, c;
1494
1495         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1496
1497         for (c = 0; c < copies; ++c) {
1498
1499                 inc_irg_visited(current_ir_graph);
1500
1501                 DB((dbg, LEVEL_5, "         ### Copy_loop  copy nr: %d ###\n", c));
1502                 for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) {
1503                         entry_edge entry = cur_loop_outs[i];
1504                         ir_node *pred = get_irn_n(entry.node, entry.pos);
1505
1506                         copy_walk_n(pred, is_in_loop, c + 1);
1507                 }
1508         }
1509
1510         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED);
1511 }
1512
1513
1514 /* Creates a new phi from the given phi node omitting own bes,
1515  * using be_block as supplier of backedge informations. */
1516 static ir_node *clone_phis_sans_bes(ir_node *node, ir_node *be_block)
1517 {
1518         ir_node **ins;
1519         int arity = get_irn_arity(node);
1520         int i, c = 0;
1521
1522         assert(get_irn_arity(node) == get_irn_arity(be_block));
1523         assert(is_Phi(node));
1524
1525         ins = NEW_ARR_F(ir_node *, arity);
1526         for (i = 0; i < arity; ++i) {
1527                 if (! is_own_backedge(be_block, i)) {
1528                         ins[c] = get_irn_n(node, i);
1529                         ++c;
1530 #if 0
1531                 } else {
1532                         ir_node *pred = get_inr_n(node, i);
1533                         if (! is_in_loop(pred)) {
1534                                 ins[c] = pred;
1535                                 ++c;
1536                         }
1537 #endif
1538                 }
1539         }
1540
1541         return new_r_Phi(get_nodes_block(node), c, ins, get_irn_mode(node));
1542 }
1543
1544 /* Creates a new block from the given block node omitting own bes,
1545  * using be_block as supplier of backedge informations. */
1546 static ir_node *clone_block_sans_bes(ir_node *node, ir_node *be_block)
1547 {
1548         ir_node **ins;
1549         ir_graph *irg = get_irn_irg(node);
1550         int arity = get_irn_arity(node);
1551         int i, c = 0;
1552
1553         assert(get_irn_arity(node) == get_irn_arity(be_block));
1554         assert(is_Block(node));
1555
1556         ins = NEW_ARR_F(ir_node *, arity);
1557         for (i = 0; i < arity; ++i) {
1558                 if (! is_own_backedge(be_block, i)) {
1559                         ins[c] = get_irn_n(node, i);
1560                         ++c;
1561                 }
1562         }
1563
1564         return new_r_Block(irg, c, ins);
1565 }
1566
1567 /* Creates blocks for duffs device, using previously obtained
1568  * informations about the iv.
1569  * TODO split */
1570 static void create_duffs_block(void)
1571 {
1572         ir_mode *mode;
1573
1574         ir_graph *irg = get_irn_irg(loop_head);
1575         ir_node *block1, *count_block, *duff_block;
1576         ir_node *ems, *ems_divmod, *ems_mod_proj, *cmp_null,
1577                 *cmp_proj, *ems_mode_cond, *x_true, *x_false, *const_null;
1578         ir_node *true_val, *false_val;
1579         ir_node *ins[2];
1580
1581         ir_node *duff_mod, *proj, *cond;
1582
1583         ir_node *count, *correction, *unroll_c;
1584         ir_node *cmp_bad_count, *good_count, *bad_count, *count_phi, *bad_count_neg;
1585
1586         mode = get_irn_mode(loop_info.end_val);
1587         const_null = new_r_Const(irg, get_mode_null(mode));
1588
1589         /* TODO naming
1590          * 1. Calculate first approach to count.
1591          *    Condition: (end - start) % step == 0 */
1592         block1 = clone_block_sans_bes(loop_head, loop_head);
1593
1594         /* Create loop entry phis in first duff block
1595          * as it becomes the loops preheader */
1596         if (loop_info.unroll_kind == invariant) {
1597                 ir_node *phi;
1598                 for_each_phi(loop_head, phi) {
1599                         ir_node *new_phi = clone_phis_sans_bes(phi, loop_head);
1600                         set_nodes_block(new_phi, block1);
1601                 }
1602         }
1603
1604         ems = new_r_Sub(block1, loop_info.end_val, loop_info.start_val,
1605                 get_irn_mode(loop_info.end_val));
1606
1607         ems_divmod = new_r_DivMod(block1,
1608                 new_r_NoMem(irg),
1609                 ems,
1610                 loop_info.step,
1611                 mode,
1612                 op_pin_state_pinned);
1613
1614         ems_mod_proj = new_r_Proj(ems_divmod, mode, pn_DivMod_res_mod);
1615         cmp_null = new_r_Cmp(block1, ems_mod_proj, const_null);
1616         cmp_proj = new_r_Proj(cmp_null, mode, pn_Cmp_Eq);
1617         ems_mode_cond = new_r_Cond(block1, cmp_proj);
1618
1619         /* ems % step == 0 */
1620         x_true = new_r_Proj(ems_mode_cond, mode_X, pn_Cond_true);
1621         /* ems % step != 0 */
1622         x_false = new_r_Proj(ems_mode_cond, mode_X, pn_Cond_false);
1623
1624
1625         /* 2. Second block.
1626          * Assures, duffs device receives a valid count.
1627          * Condition:
1628          *     decreasing: count < 0
1629          *     increasing: count > 0
1630          */
1631         ins[0] = x_true;
1632         ins[1] = x_false;
1633
1634         count_block = new_r_Block(irg, 2, ins);
1635
1636         /* Increase loop-taken-count depending on the loop condition
1637          * uses the latest iv to compare to. */
1638         if (loop_info.latest_value == 1) {
1639                 /* ems % step == 0 :  +0 */
1640                 true_val = new_r_Const(irg, get_mode_null(mode));
1641                 /* ems % step != 0 :  +1 */
1642                 false_val = new_r_Const(irg, get_mode_one(mode));
1643         } else {
1644                 tarval *tv_two = new_tarval_from_long(2, mode);
1645                 /* ems % step == 0 :  +1 */
1646                 true_val = new_r_Const(irg, get_mode_one(mode));
1647                 /* ems % step != 0 :  +2 */
1648                 false_val = new_r_Const(irg, tv_two);
1649         }
1650
1651         ins[0] = true_val;
1652         ins[1] = false_val;
1653
1654         correction = new_r_Phi(count_block, 2, ins, mode);
1655
1656         count = new_r_Proj(ems_divmod, mode, pn_DivMod_res_div);
1657
1658         /* (end - start) / step  +  correction */
1659         count = new_r_Add(count_block, count, correction, mode);
1660
1661         cmp_bad_count = new_r_Cmp(count_block, count, const_null);
1662
1663         /* We preconditioned the loop to be tail-controlled.
1664          * So, if count is something 'wrong' like 0,
1665          * negative/positive (depending on step direction),
1666          * we may take the loop once (tail-contr.) and leave it
1667          * to the existing condition, to break; */
1668
1669         /* Depending on step direction, we have to check for > or < 0 */
1670         if (loop_info.decreasing == 1) {
1671                 bad_count_neg = new_r_Proj(cmp_bad_count, mode_X, pn_Cmp_Lt);
1672         } else {
1673                 bad_count_neg = new_r_Proj(cmp_bad_count, mode_X, pn_Cmp_Gt);
1674         }
1675
1676         bad_count_neg = new_r_Cond(count_block, bad_count_neg);
1677         good_count = new_r_Proj(bad_count_neg, mode_X, pn_Cond_true);
1678         bad_count = new_r_Proj(ems_mode_cond, mode_X, pn_Cond_false);
1679
1680         /* 3. Duff Block
1681          *    Contains module to decide which loop to start from. */
1682
1683         ins[0] = good_count;
1684         ins[1] = bad_count;
1685         duff_block = new_r_Block(irg, 2, ins);
1686
1687         /* Matze: I commented this line out because I was in the process of
1688          * removing the Abs node. I don't understand that line at all anyway
1689          * since no other code here checks for the presence of an Abs or creates
1690          * one. So how can we know here that "count" is an Abs node... */
1691 #if 0
1692         /* count wants to be positive */
1693         ins[0] = get_Abs_op(count);
1694 #endif
1695         /* Manually feed the aforementioned count = 1 (bad case)*/
1696         ins[1] = new_r_Const(irg, get_mode_one(mode));
1697         count_phi = new_r_Phi(duff_block, 2, ins, mode);
1698
1699         unroll_c = new_r_Const(irg, new_tarval_from_long((long)unroll_nr, mode));
1700
1701         /* count % unroll_nr */
1702         duff_mod = new_r_Mod(duff_block,
1703                 new_r_NoMem(irg),
1704                 count_phi,
1705                 unroll_c,
1706                 mode,
1707                 op_pin_state_pinned);
1708
1709         proj = new_r_Proj(duff_mod, mode_X, pn_Mod_res);
1710         cond = new_r_Cond(duff_block, proj);
1711
1712         loop_info.duff_cond = cond;
1713 }
1714
1715 /* Returns 1 if given node is not in loop,
1716  * or if it is a phi of the loop head with only loop invariant defs.
1717  */
1718 static unsigned is_loop_invariant_def(ir_node *node)
1719 {
1720         int i;
1721
1722         if (! is_in_loop(node))
1723                 return 1;
1724
1725         /* If this is a phi of the loophead shared by more than 1 loop,
1726          * we need to check if all defs are not in the loop.  */
1727         if (is_Phi(node)) {
1728                 ir_node *block;
1729                 block = get_nodes_block(node);
1730
1731                 /* To prevent unexpected situations. */
1732                 if (block != loop_head)
1733                         return 0;
1734
1735                 for (i = 0; i < get_irn_arity(node); ++i) {
1736                         /* Check if all bes are just loopbacks. */
1737                         if (is_own_backedge(block, i) && get_irn_n(node, i) != node)
1738                                 return 0;
1739                 }
1740         }
1741         return 1;
1742 }
1743
1744 /* Returns 1 if one pred of node is invariant and the other is not.
1745  * invar_pred and other are set analogously. */
1746 static unsigned get_invariant_pred(ir_node *node, ir_node **invar_pred, ir_node **other)
1747 {
1748         ir_node *pred0 = get_irn_n(node, 0);
1749         ir_node *pred1 = get_irn_n(node, 1);
1750
1751         *invar_pred = NULL;
1752         *other = NULL;
1753
1754         if (is_loop_invariant_def(pred0)) {
1755                 *invar_pred = pred0;
1756                 *other = pred1;
1757         }
1758
1759         if (is_loop_invariant_def(pred1)) {
1760                 if (invar_pred != NULL)
1761                         /* RETURN. We do not want both preds to be invariant. */
1762                         return 0;
1763
1764                 *other = pred0;
1765                 *invar_pred = pred1;
1766                 return 1;
1767         } else {
1768                 return 0;
1769         }
1770 }
1771
1772 /* Starts from a phi that may belong to an iv.
1773  * If an add forms a loop with iteration_phi,
1774  * and add uses a constant, 1 is returned
1775  * and 'start' as well as 'add' are sane. */
1776 static unsigned get_start_and_add(ir_node *iteration_phi, unrolling_kind_flag role)
1777 {
1778         int i;
1779         ir_node *found_add = loop_info.add;
1780         int arity = get_irn_arity(iteration_phi);
1781
1782         DB((dbg, LEVEL_4, "Find start and add from %N\n", iteration_phi));
1783
1784         for (i = 0; i < arity; ++i) {
1785
1786                 /* Find start_val which needs to be pred of the iteration_phi.
1787                  * If start_val already known, sanity check. */
1788                 if (!is_backedge(get_nodes_block(loop_info.iteration_phi), i)) {
1789                         ir_node *found_start_val = get_irn_n(loop_info.iteration_phi, i);
1790
1791                         DB((dbg, LEVEL_4, "found_start_val %N\n", found_start_val));
1792
1793                         /* We already found a start_val it has to be always the same. */
1794                         if (loop_info.start_val && found_start_val != loop_info.start_val)
1795                                 return 0;
1796
1797                         if ((role == constant) && !(is_SymConst(found_start_val) || is_Const(found_start_val)))
1798                                         return 0;
1799                         else if((role == constant) && !(is_loop_invariant_def(found_start_val)))
1800                                         return 0;
1801
1802                         loop_info.start_val = found_start_val;
1803                 }
1804
1805                 /* The phi has to be in the loop head.
1806                  * Follow all own backedges. Every value supplied from these preds of the phi
1807                  * needs to origin from the same add. */
1808                 if (is_own_backedge(get_nodes_block(loop_info.iteration_phi), i)) {
1809                         ir_node *new_found = get_irn_n(loop_info.iteration_phi,i);
1810
1811                         DB((dbg, LEVEL_4, "is add? %N\n", new_found));
1812
1813                         if (! (is_Add(new_found) || is_Sub(new_found)) || (found_add && found_add != new_found))
1814                                 return 0;
1815                         else
1816                                 found_add = new_found;
1817                 }
1818         }
1819
1820         loop_info.add = found_add;
1821
1822         return 1;
1823 }
1824
1825
1826 /* Returns 1 if one pred of node is a const value and the other is not.
1827  * const_pred and other are set analogously. */
1828 static unsigned get_const_pred(ir_node *node, ir_node **const_pred, ir_node **other)
1829 {
1830         ir_node *pred0 = get_irn_n(node, 0);
1831         ir_node *pred1 = get_irn_n(node, 1);
1832
1833         DB((dbg, LEVEL_4, "Checking for constant pred of %N\n", node));
1834
1835         *const_pred = NULL;
1836         *other = NULL;
1837
1838         /*DB((dbg, LEVEL_4, "is %N const\n", pred0));*/
1839         if (is_Const(pred0) || is_SymConst(pred0)) {
1840                 DB((dbg, LEVEL_1, "%N is constant\n", pred0));
1841                 *const_pred = pred0;
1842                 *other = pred1;
1843         }
1844
1845         /*DB((dbg, LEVEL_4, "is %N const\n", pred1));*/
1846         if (is_Const(pred1) || is_SymConst(pred1)) {
1847                 if (*const_pred != NULL) {
1848                         DB((dbg, LEVEL_1, "%N is ALSO constant\n", pred1));
1849                         /* RETURN. We do not want both preds to be constant. */
1850                         return 0;
1851                 }
1852
1853                 DB((dbg, LEVEL_4, "%N is constant\n", pred1));
1854                 *other = pred0;
1855                 *const_pred = pred1;
1856         }
1857
1858         if (*const_pred == NULL)
1859                 return 0;
1860         else
1861                 return 1;
1862 }
1863
1864 /* Returns the mathematically inverted pn_Cmp. */
1865 static pn_Cmp get_math_inverted_case(pn_Cmp proj)
1866 {
1867         switch(proj) {
1868                 case pn_Cmp_Eq:
1869                         return pn_Cmp_Lg;
1870                 case pn_Cmp_Lg:
1871                         return pn_Cmp_Eq;
1872                 case pn_Cmp_Lt:
1873                         return pn_Cmp_Ge;
1874                 case pn_Cmp_Le:
1875                         return pn_Cmp_Gt;
1876                 case pn_Cmp_Gt:
1877                         return pn_Cmp_Le;
1878                 case pn_Cmp_Ge:
1879                         return pn_Cmp_Lt;
1880                 default:
1881                         panic("Unhandled pn_Cmp.");
1882         }
1883 }
1884
1885 /* norm_proj means we do not exit the loop. */
1886 static unsigned simulate_next(tarval **count_tar,
1887                 tarval *stepped, tarval *step_tar, tarval *end_tar, pn_Cmp norm_proj)
1888 {
1889         tarval *next;
1890
1891         DB((dbg, LEVEL_1, "Loop taken if (stepped)%ld %s (end)%ld ",
1892                                 get_tarval_long(stepped),
1893                                 get_pnc_string((norm_proj)),
1894                                 get_tarval_long(end_tar)));
1895         DB((dbg, LEVEL_1, "comparing latest value %d\n", loop_info.latest_value));
1896
1897         /* If current iv does not stay in the loop,
1898          * this run satisfied the exit condition. */
1899         if (! (tarval_cmp(stepped, end_tar) & norm_proj))
1900                 return 1;
1901
1902         DB((dbg, LEVEL_1, "Result: (stepped)%ld IS %s (end)%ld\n",
1903                                 get_tarval_long(stepped),
1904                                 get_pnc_string(tarval_cmp(stepped, end_tar)),
1905                                 get_tarval_long(end_tar)));
1906
1907         /* next step */
1908         if (is_Add(loop_info.add))
1909                 next = tarval_add(stepped, step_tar);
1910         else
1911                 /* sub */
1912                 next = tarval_sub(stepped, step_tar, get_irn_mode(loop_info.end_val));
1913
1914         DB((dbg, LEVEL_1, "Loop taken if %ld %s %ld ",
1915                                 get_tarval_long(next),
1916                                 get_pnc_string(norm_proj),
1917                                 get_tarval_long(end_tar)));
1918         DB((dbg, LEVEL_1, "comparing latest value %d\n", loop_info.latest_value));
1919
1920         /* Increase steps. */
1921         *count_tar = tarval_add(*count_tar, get_tarval_one(get_tarval_mode(*count_tar)));
1922
1923         /* Next has to fail the loop condition, or we will never exit. */
1924         if (! (tarval_cmp(next, end_tar) & norm_proj))
1925                 return 1;
1926         else
1927                 return 0;
1928 }
1929
1930 /* Check if loop meets requirements for a 'simple loop':
1931  * - Exactly one cf out
1932  * - Allowed calls
1933  * - Max nodes after unrolling
1934  * - tail-controlled
1935  * - exactly one be
1936  * - cmp
1937  * Returns Projection of cmp node or NULL; */
1938 static ir_node *is_simple_loop(void)
1939 {
1940         int arity, i;
1941         unsigned loop_depth, max_loop_nodes_adapted;
1942         ir_node *loop_block, *exit_block, *projx, *cond, *projres, *loop_condition;
1943
1944         /* Maximum of one condition, and no endless loops. */
1945         if (loop_info.cf_outs != 1)
1946                 return NULL;
1947
1948         DB((dbg, LEVEL_4, "1 loop exit\n"));
1949
1950 #if 0
1951         /* Ignore loop size. Probably not wise in other than testcases. */
1952         (void) max_loop_nodes_adapted;
1953         (void) loop_depth;
1954
1955         loop_info.max_unroll = 6;
1956 #else
1957         /* Calculate maximum unroll_nr keeping node count below limit. */
1958         loop_depth = get_loop_depth(cur_loop) - 1;
1959         max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth);
1960
1961         loop_info.max_unroll = opt_params.max_loop_size / loop_info.nodes;
1962         if (loop_info.max_unroll < 2) {
1963                 count_stats(stats.too_large);
1964                 return NULL;
1965         }
1966 #endif
1967         DB((dbg, LEVEL_4, "maximum unroll factor %u, to not exceed node limit \n",
1968                 loop_info.max_unroll));
1969
1970         arity = get_irn_arity(loop_head);
1971         /* RETURN if we have more than 1 be. */
1972         /* Get my backedges without alien bes. */
1973         loop_block = NULL;
1974         for (i = 0; i < arity; ++i) {
1975                 ir_node *pred = get_irn_n(loop_head, i);
1976                 if (is_own_backedge(loop_head, i)) {
1977                         if (loop_block)
1978                                 /* Our simple loops may have only one backedge. */
1979                                 return NULL;
1980                         else {
1981                                 loop_block = get_nodes_block(pred);
1982                                 loop_info.be_src_pos = i;
1983                         }
1984                 }
1985         }
1986
1987         DB((dbg, LEVEL_4, "loop has 1 own backedge.\n"));
1988
1989         exit_block = get_nodes_block(loop_info.cf_out.pred);
1990         /* The loop has to be tail-controlled.
1991          * This can be changed/improved,
1992          * but we would need a duff iv. */
1993         if (exit_block != loop_block)
1994                 return NULL;
1995
1996         DB((dbg, LEVEL_4, "tail-controlled loop.\n"));
1997
1998         /* find value on which loop exit depends */
1999         projx = loop_info.cf_out.pred;
2000         cond = get_irn_n(projx, 0);
2001         projres = get_irn_n(cond, 0);
2002         loop_condition = get_irn_n(projres, 0);
2003
2004         if (!is_Cmp(loop_condition))
2005                 return NULL;
2006
2007         DB((dbg, LEVEL_5, "projection is %s\n", get_pnc_string(get_Proj_proj(projx))));
2008
2009         switch(get_Proj_proj(projx)) {
2010                 case pn_Cond_false:
2011                         loop_info.exit_cond = 0;
2012                         break;
2013                 case pn_Cond_true:
2014                         loop_info.exit_cond = 1;
2015                         break;
2016                 default:
2017                         panic("Cond Proj_proj other than true/false");
2018         }
2019
2020         DB((dbg, LEVEL_4, "Valid Cmp.\n"));
2021
2022         return projres;
2023 }
2024
2025 /* Returns 1 if all nodes are mode_Iu or mode_Is. */
2026 static unsigned are_mode_I(ir_node *n1, ir_node* n2, ir_node *n3)
2027 {
2028         ir_mode *m1 = get_irn_mode(n1);
2029         ir_mode *m2 = get_irn_mode(n2);
2030         ir_mode *m3 = get_irn_mode(n3);
2031
2032         if ((m1 == mode_Iu && m2 == mode_Iu && m3 == mode_Iu) ||
2033             (m1 == mode_Is && m2 == mode_Is && m3 == mode_Is))
2034                 return 1;
2035         else
2036                 return 0;
2037 }
2038
2039 /* Checks if cur_loop is a simple tail-controlled counting loop
2040  * with start and end value loop invariant, step constant. */
2041 static unsigned get_unroll_decision_invariant(void)
2042 {
2043
2044         ir_node  *projres, *loop_condition, *iteration_path;
2045         unsigned  success, is_latest_val;
2046         tarval   *start_tar, *step_tar;
2047         ir_mode  *mode;
2048
2049         /* RETURN if loop is not 'simple' */
2050         projres = is_simple_loop();
2051         if (projres == NULL)
2052                 return 0;
2053
2054         loop_condition = get_irn_n(projres, 0);
2055
2056         success = get_invariant_pred(loop_condition, &loop_info.end_val, &iteration_path);
2057         if (! success)
2058                 return 0;
2059
2060         DB((dbg, LEVEL_4, "Invariant End_val %N, other %N\n", loop_info.end_val, iteration_path));
2061
2062         /* We may find the add or the phi first.
2063          * Until now we only have end_val. */
2064         if (is_Add(iteration_path) || is_Sub(iteration_path)) {
2065
2066                 /* We test against the latest value of the iv. */
2067                 is_latest_val = 1;
2068
2069                 loop_info.add = iteration_path;
2070                 DB((dbg, LEVEL_4, "Got add %N (maybe not sane)\n", loop_info.add));
2071
2072                 /* Preds of the add should be step and the iteration_phi */
2073                 success = get_const_pred(loop_info.add, &loop_info.step, &loop_info.iteration_phi);
2074                 if (! success)
2075                         return 0;
2076
2077                 DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step));
2078
2079                 if (! is_Phi(loop_info.iteration_phi))
2080                         return 0;
2081
2082                 DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi));
2083
2084                 /* Find start_val.
2085                  * Does necessary sanity check of add, if it is already set.  */
2086                 success = get_start_and_add(loop_info.iteration_phi, invariant);
2087                 if (! success)
2088                         return 0;
2089
2090                 DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val));
2091
2092         } else if (is_Phi(iteration_path)) {
2093                 ir_node *new_iteration_phi;
2094
2095                 /* We compare with the value the iv had entering this run. */
2096                 is_latest_val = 0;
2097
2098                 loop_info.iteration_phi = iteration_path;
2099                 DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi));
2100
2101                 /* Find start_val and add-node.
2102                  * Does necessary sanity check of add, if it is already set.  */
2103                 success = get_start_and_add(loop_info.iteration_phi, invariant);
2104                 if (! success)
2105                         return 0;
2106
2107                 DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val));
2108                 DB((dbg, LEVEL_4, "Got add or sub %N\n", loop_info.add));
2109
2110                 success = get_const_pred(loop_info.add, &loop_info.step, &new_iteration_phi);
2111                 if (! success)
2112                         return 0;
2113
2114                 DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step));
2115
2116                 if (loop_info.iteration_phi != new_iteration_phi)
2117                         return 0;
2118
2119         } else {
2120                 return 0;
2121         }
2122
2123         mode = get_irn_mode(loop_info.end_val);
2124
2125         DB((dbg, LEVEL_4, "start %N, end %N, step %N\n",
2126                                 loop_info.start_val, loop_info.end_val, loop_info.step));
2127
2128         if (mode != mode_Is && mode != mode_Iu)
2129                 return 0;
2130
2131         /* TODO necessary? */
2132         if (!are_mode_I(loop_info.start_val, loop_info.step, loop_info.end_val))
2133                 return 0;
2134
2135         DB((dbg, LEVEL_4, "mode integer\n"));
2136
2137         step_tar = get_Const_tarval(loop_info.step);
2138         start_tar = get_Const_tarval(loop_info.start_val);
2139
2140         if (tarval_is_null(step_tar)) {
2141                 /* TODO Might be worth a warning. */
2142                 return 0;
2143         }
2144
2145         DB((dbg, LEVEL_4, "step is not 0\n"));
2146
2147         create_duffs_block();
2148
2149         return loop_info.max_unroll;
2150 }
2151
2152 /* Returns unroll factor,
2153  * given maximum unroll factor and number of loop passes. */
2154 static unsigned get_preferred_factor_constant(tarval *count_tar)
2155 {
2156         tarval *tar_6, *tar_5, *tar_4, *tar_3, *tar_2;
2157         unsigned prefer;
2158         ir_mode *mode = get_irn_mode(loop_info.end_val);
2159
2160         tar_6 = new_tarval_from_long(6, mode);
2161         tar_5 = new_tarval_from_long(5, mode);
2162         tar_4 = new_tarval_from_long(4, mode);
2163         tar_3 = new_tarval_from_long(3, mode);
2164         tar_2 = new_tarval_from_long(2, mode);
2165
2166         /* loop passes % {6, 5, 4, 3, 2} == 0  */
2167         if (tarval_is_null(tarval_mod(count_tar, tar_6)))
2168                 prefer = 6;
2169         else if (tarval_is_null(tarval_mod(count_tar, tar_5)))
2170                 prefer = 5;
2171         else if (tarval_is_null(tarval_mod(count_tar, tar_4)))
2172                 prefer = 4;
2173         else if (tarval_is_null(tarval_mod(count_tar, tar_3)))
2174                 prefer = 3;
2175         else if (tarval_is_null(tarval_mod(count_tar, tar_2)))
2176                 prefer = 2;
2177         else {
2178                 /* gcd(max_unroll, count_tar) */
2179                 int a = loop_info.max_unroll;
2180                 int b = (int)get_tarval_long(count_tar);
2181                 int c;
2182
2183                 DB((dbg, LEVEL_4, "gcd of max_unroll %d and count_tar %d: ", a, b));
2184
2185                 do {
2186                 c = a % b;
2187                 a = b; b = c;
2188                 } while( c != 0);
2189
2190                 DB((dbg, LEVEL_4, "%d\n", a));
2191                 return a;
2192         }
2193
2194         DB((dbg, LEVEL_4, "preferred unroll factor %d\n", prefer));
2195
2196         /*
2197          * If our preference is greater than the allowed unroll factor
2198          * we either might reduce the preferred factor and prevent a duffs device block,
2199          * or create a duffs device block, from which in this case (constants only)
2200          * we know the startloop at compiletime.
2201          * The latter yields the following graphs.
2202          * but for code generation we would want to use graph A.
2203          * The graphs are equivalent. So, we can only reduce the preferred factor.
2204          * A)                   B)
2205          *     PreHead             PreHead
2206          *        |      ,--.         |   ,--.
2207          *         \ Loop1   \        Loop2   \
2208          *          \  |     |       /  |     |
2209          *           Loop2   /      / Loop1   /
2210          *           |   `--'      |      `--'
2211          */
2212
2213         if (prefer <= loop_info.max_unroll)
2214                 return prefer;
2215         else {
2216                 switch(prefer) {
2217                         case 6:
2218                                 if (loop_info.max_unroll >= 3)
2219                                         return 3;
2220                                 else if (loop_info.max_unroll >= 2)
2221                                         return 2;
2222                                 else
2223                                         return 0;
2224
2225                         case 4:
2226                                 if (loop_info.max_unroll >= 2)
2227                                         return 2;
2228                                 else
2229                                         return 0;
2230
2231                         default:
2232                                 return 0;
2233                 }
2234         }
2235 }
2236
2237 /* Check if cur_loop is a simple counting loop.
2238  * Start, step and end are constants. */
2239 /* TODO split. */
2240 static unsigned get_unroll_decision_constant(void)
2241 {
2242         ir_node  *projres, *loop_condition, *iteration_path;
2243         unsigned  success, is_latest_val;
2244         tarval   *start_tar, *end_tar, *step_tar, *diff_tar, *count_tar, *stepped;
2245         pn_Cmp    proj_proj, norm_proj;
2246         ir_mode  *mode;
2247
2248         /* RETURN if loop is not 'simple' */
2249         projres = is_simple_loop();
2250         if (projres == NULL)
2251                 return 0;
2252
2253         /* One in of the loop condition needs to be loop invariant. => end_val
2254          * The other in is assigned by an add. => add
2255          * The add uses a loop invariant value => step
2256          * and a phi with a loop invariant start_val and the add node as ins.
2257
2258            ^   ^
2259            |   | .-,
2260            |   Phi |
2261                 \  |   |
2262           ^  Add   |
2263            \  | \__|
2264             cond
2265              /\
2266         */
2267
2268         loop_condition = get_irn_n(projres, 0);
2269
2270         success = get_const_pred(loop_condition, &loop_info.end_val, &iteration_path);
2271         if (! success)
2272                 return 0;
2273
2274         DB((dbg, LEVEL_4, "End_val %N, other %N\n", loop_info.end_val, iteration_path));
2275
2276         /* We may find the add or the phi first.
2277          * Until now we only have end_val. */
2278         if (is_Add(iteration_path) || is_Sub(iteration_path)) {
2279
2280                 /* We test against the latest value of the iv. */
2281                 is_latest_val = 1;
2282
2283                 loop_info.add = iteration_path;
2284                 DB((dbg, LEVEL_4, "Got add %N (maybe not sane)\n", loop_info.add));
2285
2286                 /* Preds of the add should be step and the iteration_phi */
2287                 success = get_const_pred(loop_info.add, &loop_info.step, &loop_info.iteration_phi);
2288                 if (! success)
2289                         return 0;
2290
2291                 DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step));
2292
2293                 if (! is_Phi(loop_info.iteration_phi))
2294                         return 0;
2295
2296                 DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi));
2297
2298                 /* Find start_val.
2299                  * Does necessary sanity check of add, if it is already set.  */
2300                 success = get_start_and_add(loop_info.iteration_phi, constant);
2301                 if (! success)
2302                         return 0;
2303
2304                 DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val));
2305
2306         } else if (is_Phi(iteration_path)) {
2307                 ir_node *new_iteration_phi;
2308
2309                 /* We compare with the value the iv had entering this run. */
2310                 is_latest_val = 0;
2311
2312                 loop_info.iteration_phi = iteration_path;
2313                 DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi));
2314
2315                 /* Find start_val and add-node.
2316                  * Does necessary sanity check of add, if it is already set.  */
2317                 success = get_start_and_add(loop_info.iteration_phi, constant);
2318                 if (! success)
2319                         return 0;
2320
2321                 DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val));
2322                 DB((dbg, LEVEL_4, "Got add or sub %N\n", loop_info.add));
2323
2324                 success = get_const_pred(loop_info.add, &loop_info.step, &new_iteration_phi);
2325                 if (! success)
2326                         return 0;
2327
2328                 DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step));
2329
2330                 if (loop_info.iteration_phi != new_iteration_phi)
2331                         return 0;
2332
2333         } else {
2334                 /* RETURN */
2335                 return 0;
2336         }
2337
2338         mode = get_irn_mode(loop_info.end_val);
2339
2340         DB((dbg, LEVEL_4, "start %N, end %N, step %N\n",
2341                                 loop_info.start_val, loop_info.end_val, loop_info.step));
2342
2343         if (mode != mode_Is && mode != mode_Iu)
2344                 return 0;
2345
2346         /* TODO necessary? */
2347         if (!are_mode_I(loop_info.start_val, loop_info.step, loop_info.end_val))
2348                 return 0;
2349
2350         DB((dbg, LEVEL_4, "mode integer\n"));
2351
2352         end_tar = get_Const_tarval(loop_info.end_val);
2353         start_tar = get_Const_tarval(loop_info.start_val);
2354         step_tar = get_Const_tarval(loop_info.step);
2355
2356         if (tarval_is_null(step_tar))
2357                 /* TODO Might be worth a warning. */
2358                 return 0;
2359
2360         DB((dbg, LEVEL_4, "step is not 0\n"));
2361
2362         if ((!tarval_is_negative(step_tar)) ^ (!is_Sub(loop_info.add)))
2363                 loop_info.decreasing = 1;
2364
2365         diff_tar = tarval_sub(end_tar, start_tar, mode);
2366
2367         /* We need at least count_tar steps to be close to end_val, maybe more.
2368          * No way, that we have gone too many steps.
2369          * This represents the 'latest value'.
2370          * (If condition checks against latest value, is checked later) */
2371         count_tar = tarval_div(diff_tar, step_tar);
2372
2373         /* Iv will not pass end_val (except overflows).
2374          * Nothing done, as it would yield to no advantage. */
2375         if (tarval_is_negative(count_tar)) {
2376                 DB((dbg, LEVEL_1, "Loop is endless or never taken."));
2377                 /* TODO Might be worth a warning. */
2378                 return 0;
2379         }
2380
2381         count_stats(stats.u_simple_counting_loop);
2382
2383         loop_info.latest_value = is_latest_val;
2384
2385         /* TODO split here
2386         if (! is_simple_counting_loop(&count_tar))
2387                 return 0;
2388         */
2389
2390         /* stepped can be negative, if step < 0 */
2391         stepped = tarval_mul(count_tar, step_tar);
2392
2393         /* step as close to end_val as possible, */
2394         /* |stepped| <= |end_tar|, and dist(stepped, end_tar) is smaller than a step. */
2395         if (is_Sub(loop_info.add))
2396                 stepped = tarval_sub(start_tar, stepped, mode_Is);
2397         else
2398                 stepped = tarval_add(start_tar, stepped);
2399
2400         DB((dbg, LEVEL_4, "stepped to %ld\n", get_tarval_long(stepped)));
2401
2402         proj_proj = get_Proj_proj(projres);
2403         /* Assure that norm_proj is the stay-in-loop case. */
2404         if (loop_info.exit_cond == 1)
2405                 norm_proj = get_math_inverted_case(proj_proj);
2406         else
2407                 norm_proj = proj_proj;
2408
2409         DB((dbg, LEVEL_4, "normalized projection %s\n", get_pnc_string(norm_proj)));
2410
2411         /* Executed at most once (stay in counting loop if a Eq b) */
2412         if (norm_proj == pn_Cmp_Eq)
2413                 /* TODO Might be worth a warning. */
2414                 return 0;
2415
2416         /* calculates next values and increases count_tar according to it */
2417         success = simulate_next(&count_tar, stepped, step_tar, end_tar, norm_proj);
2418         if (! success)
2419                 return 0;
2420
2421         /* We run loop once more, if we compare to the
2422          * not yet in-/decreased iv. */
2423         if (is_latest_val == 0) {
2424                 DB((dbg, LEVEL_4, "condition uses not latest iv value\n"));
2425                 count_tar = tarval_add(count_tar, get_tarval_one(mode));
2426         }
2427
2428         DB((dbg, LEVEL_4, "loop taken %ld times\n", get_tarval_long(count_tar)));
2429
2430         /* Assure the loop is taken at least 1 time. */
2431         if (tarval_is_null(count_tar)) {
2432                 /* TODO Might be worth a warning. */
2433                 return 0;
2434         }
2435
2436         loop_info.count_tar = count_tar;
2437         return get_preferred_factor_constant(count_tar);
2438 }
2439
2440 /**
2441  * Loop unrolling
2442  */
2443 static void unroll_loop(void)
2444 {
2445         unroll_nr = 0;
2446
2447         /* get_unroll_decision_constant and invariant are completely
2448          * independent for flexibility.
2449          * Some checks may be performed twice. */
2450
2451         /* constant case? */
2452         if (opt_params.allow_const_unrolling)
2453                 unroll_nr = get_unroll_decision_constant();
2454         if (unroll_nr > 1) {
2455                 loop_info.unroll_kind = constant;
2456
2457         } else {
2458                 /* invariant case? */
2459                 if (opt_params.allow_invar_unrolling)
2460                         unroll_nr = get_unroll_decision_invariant();
2461                 if (unroll_nr > 1)
2462                         loop_info.unroll_kind = invariant;
2463         }
2464
2465         DB((dbg, LEVEL_1, " *** Unrolling %d times ***\n", unroll_nr));
2466
2467         if (unroll_nr > 1) {
2468                 loop_entries = NEW_ARR_F(entry_edge, 0);
2469
2470                 /* Get loop outs */
2471                 irg_walk_graph(current_ir_graph, get_loop_entries, NULL, NULL);
2472
2473                 if ((int)get_tarval_long(loop_info.count_tar) == unroll_nr)
2474                         loop_info.needs_backedge = 0;
2475                 else
2476                         loop_info.needs_backedge = 1;
2477
2478                 /* Use phase to keep copy of nodes from the condition chain. */
2479                 phase = new_phase(current_ir_graph, phase_irn_init_default);
2480
2481                 /* Copies the loop */
2482                 copy_loop(loop_entries, unroll_nr - 1);
2483
2484                 /* Line up the floating copies. */
2485                 place_copies(unroll_nr - 1);
2486
2487                 /* Remove phis with 1 in*/
2488                 irg_walk_graph(current_ir_graph, correct_phis, NULL, NULL);
2489
2490                 /* dump_ir_block_graph(current_ir_graph, "-DONE"); */
2491
2492                 if (loop_info.unroll_kind == constant)
2493                         count_stats(stats.constant_unroll);
2494                 else
2495                         count_stats(stats.invariant_unroll);
2496
2497                 set_irg_doms_inconsistent(current_ir_graph);
2498                 set_irg_loopinfo_inconsistent(current_ir_graph);
2499                 /* TODO is it? */
2500                 set_irg_outs_inconsistent(current_ir_graph);
2501
2502                 DEL_ARR_F(loop_entries);
2503         }
2504
2505 }
2506
2507 /* Analyzes the loop, and checks if size is within allowed range.
2508  * Decides if loop will be processed. */
2509 static void init_analyze(ir_loop *loop)
2510 {
2511         /* Expect no benefit of big loops. */
2512         /* TODO tuning/make parameter */
2513         int      loop_depth;
2514         unsigned max_loop_nodes = opt_params.max_loop_size;
2515         unsigned max_loop_nodes_adapted;
2516         int      max_calls = opt_params.allowed_calls;
2517         int      depth_adaption = opt_params.depth_adaption;
2518
2519         cur_loop = loop;
2520
2521         loop_head = NULL;
2522         loop_head_valid = 1;
2523
2524         /* Reset loop info */
2525         memset(&loop_info, 0, sizeof(loop_info_t));
2526
2527         DB((dbg, LEVEL_1, "    >>>> current loop includes node %N <<<\n",
2528                 get_loop_node(loop, 0)));
2529
2530         /* Collect loop informations: head, node counts. */
2531         irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL);
2532
2533         /* Depth of 0 is the procedure and 1 a topmost loop. */
2534         loop_depth = get_loop_depth(loop) - 1;
2535
2536         /* Calculating in per mil. */
2537         max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth);
2538
2539         DB((dbg, LEVEL_1, "max_nodes: %d\nmax_nodes_adapted %d at depth of %d (adaption %d)\n",
2540                         max_loop_nodes, max_loop_nodes_adapted, loop_depth, depth_adaption));
2541
2542         if (! (loop_info.nodes > 0))
2543                 return;
2544
2545 #if LOOP_IGNORE_NODE_LIMITS
2546         DB((dbg, LEVEL_1, "WARNING: Loop node limitations ignored."));
2547 #else
2548         if (loop_info.nodes > max_loop_nodes) {
2549                 /* Only for stats */
2550                 DB((dbg, LEVEL_1, "Nodes %d > allowed nodes %d\n",
2551                         loop_info.nodes, loop_depth, max_loop_nodes));
2552                 count_stats(stats.too_large);
2553                 /* no RETURN */
2554                 /* Adaption might change it */
2555         }
2556
2557         /* Limit processing to loops smaller than given parameter. */
2558         if (loop_info.nodes > max_loop_nodes_adapted) {
2559                 DB((dbg, LEVEL_1, "Nodes %d > allowed nodes (depth %d adapted) %d\n",
2560                         loop_info.nodes, loop_depth, max_loop_nodes_adapted));
2561                 count_stats(stats.too_large_adapted);
2562                 return;
2563         }
2564
2565         if (loop_info.calls > opt_params.allowed_calls) {
2566                 DB((dbg, LEVEL_1, "Calls %d > allowed calls %d\n",
2567                         loop_info.calls, max_calls));
2568                 count_stats(stats.calls_limit);
2569                 return;
2570         }
2571 #endif
2572
2573         /* RETURN if there is no valid head */
2574         if (!loop_head || !loop_head_valid) {
2575                 DB((dbg, LEVEL_1,   "No valid loop head. Nothing done.\n"));
2576                 return;
2577         } else {
2578                 DB((dbg, LEVEL_1,   "Loophead: %N\n", loop_head));
2579         }
2580
2581         switch (loop_op) {
2582                 case loop_op_inversion:
2583                         loop_inversion();
2584                         break;
2585
2586                 case loop_op_unrolling:
2587                         unroll_loop();
2588                         break;
2589
2590                 default:
2591                         panic("Loop optimization not implemented.");
2592         }
2593         DB((dbg, LEVEL_1, "       <<<< end of loop with node %N >>>>\n",
2594                 get_loop_node(loop, 0)));
2595 }
2596
2597 /* Find innermost loops and add them to loops. */
2598 static void find_innermost_loop(ir_loop *loop)
2599 {
2600         /* descend into sons */
2601         int sons = get_loop_n_sons(loop);
2602
2603         if (sons == 0) {
2604                 ARR_APP1(ir_loop *, loops, loop);
2605         } else {
2606                 int s;
2607                 for (s=0; s<sons; s++) {
2608                         find_innermost_loop(get_loop_son(loop, s));
2609                 }
2610         }
2611 }
2612
2613 /* Assure preconditions are met and go through all loops. */
2614 void loop_optimization(ir_graph *irg)
2615 {
2616         ir_loop *loop;
2617         int     i, sons, nr;
2618
2619         /* SPEC2000: Total time 98.9% with inversion only */
2620         opt_params.max_loop_size = 100;
2621         opt_params.depth_adaption = 400;
2622         opt_params.count_phi = 0;
2623         opt_params.count_proj = 0;
2624         opt_params.allowed_calls = 0;
2625
2626         opt_params.max_cc_size = 100;
2627
2628         /* Unrolling not yet tested */
2629         opt_params.allow_const_unrolling = 1;
2630         opt_params.allow_invar_unrolling = 1;
2631
2632         /* Reset stats for this procedure */
2633         reset_stats();
2634
2635         /* Preconditions */
2636         set_current_ir_graph(irg);
2637
2638         edges_assure(irg);
2639         assure_irg_outs(irg);
2640
2641         /* NOTE: sets only the loop attribute of blocks, not nodes */
2642         /* NOTE: Kills links */
2643         assure_cf_loop(irg);
2644
2645         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);
2646         collect_phiprojs(irg);
2647         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2648
2649         loop = get_irg_loop(irg);
2650         sons = get_loop_n_sons(loop);
2651
2652         loops = NEW_ARR_F(ir_loop *, 0);
2653         /* List all inner loops */
2654         for (nr = 0; nr < sons; ++nr) {
2655                 find_innermost_loop(get_loop_son(loop, nr));
2656         }
2657
2658         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
2659         /* Set all links to NULL */
2660         irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2661
2662         for (i = 0; i < ARR_LEN(loops); ++i) {
2663                 ir_loop *loop = loops[i];
2664
2665                 count_stats(stats.loops);
2666
2667                 /* Analyze and handle loop */
2668                 init_analyze(loop);
2669
2670                 /* Copied blocks do not have their phi list yet */
2671                 collect_phiprojs(irg);
2672
2673                 /* Set links to NULL
2674                  * TODO Still necessary? */
2675                 irg_walk_graph(current_ir_graph, reset_link, NULL, NULL);
2676         }
2677
2678         print_stats();
2679
2680         DEL_ARR_F(loops);
2681         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2682         ir_free_resources(irg, IR_RESOURCE_PHI_LIST);
2683 }
2684
2685 void do_loop_unrolling(ir_graph *irg)
2686 {
2687         loop_op = loop_op_unrolling;
2688
2689         DB((dbg, LEVEL_1, " >>> unrolling (Startnode %N) <<<\n",
2690                                 get_irg_start(irg)));
2691
2692         loop_optimization(irg);
2693
2694         DB((dbg, LEVEL_1, " >>> unrolling done (Startnode %N) <<<\n",
2695                                 get_irg_start(irg)));
2696 }
2697
2698 void do_loop_inversion(ir_graph *irg)
2699 {
2700         loop_op = loop_op_inversion;
2701
2702         DB((dbg, LEVEL_1, " >>> inversion (Startnode %N) <<<\n",
2703                                 get_irg_start(irg)));
2704
2705         loop_optimization(irg);
2706
2707         DB((dbg, LEVEL_1, " >>> inversion done (Startnode %N) <<<\n",
2708                                 get_irg_start(irg)));
2709 }
2710
2711 void do_loop_peeling(ir_graph *irg)
2712 {
2713         loop_op = loop_op_peeling;
2714
2715         DB((dbg, LEVEL_1, " >>> peeling (Startnode %N) <<<\n",
2716                                 get_irg_start(irg)));
2717
2718         loop_optimization(irg);
2719
2720         DB((dbg, LEVEL_1, " >>> peeling done (Startnode %N) <<<\n",
2721                                 get_irg_start(irg)));
2722
2723 }
2724
2725 ir_graph_pass_t *loop_inversion_pass(const char *name)
2726 {
2727         return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion);
2728 }
2729
2730 ir_graph_pass_t *loop_unroll_pass(const char *name)
2731 {
2732         return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling);
2733 }
2734
2735 ir_graph_pass_t *loop_peeling_pass(const char *name)
2736 {
2737         return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling);
2738 }
2739
2740 void firm_init_loop_opt(void)
2741 {
2742         FIRM_DBG_REGISTER(dbg, "firm.opt.loop");
2743 }