beifg: Simplify the implementation of be_ifg_foreach_node().
[libfirm] / ir / opt / tailrec.c
1 /*
2  * This file is part of libFirm.
3  * Copyright (C) 2012 University of Karlsruhe.
4  */
5
6 /**
7  * @file
8  * @brief   Tail-recursion call optimization.
9  * @date    08.06.2004
10  * @author  Michael Beck
11  */
12 #include "config.h"
13
14 #include <string.h>
15 #include <assert.h>
16
17 #include "debug.h"
18 #include "iroptimize.h"
19 #include "scalar_replace.h"
20 #include "array_t.h"
21 #include "irprog_t.h"
22 #include "irgwalk.h"
23 #include "irgmod.h"
24 #include "irop.h"
25 #include "irnode_t.h"
26 #include "irgraph_t.h"
27 #include "ircons.h"
28 #include "irflag.h"
29 #include "trouts.h"
30 #include "irouts.h"
31 #include "irhooks.h"
32 #include "ircons_t.h"
33 #include "irpass.h"
34 #include "util.h"
35
36 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
37
38 /**
39  * the environment for collecting data
40  */
41 typedef struct collect_t {
42         ir_node *proj_X;      /**< initial exec proj */
43         ir_node *block;       /**< old first block */
44         int     blk_idx;      /**< cfgpred index of the initial exec in block */
45         ir_node *proj_m;      /**< memory from start proj's */
46         ir_node *proj_data;   /**< linked list of all parameter access proj's */
47 } collect_t;
48
49 /**
50  * walker for collecting data, fills a collect_t environment
51  */
52 static void collect_data(ir_node *node, void *env)
53 {
54         collect_t *data = (collect_t*)env;
55         ir_node *pred;
56         ir_opcode opcode;
57
58         switch (get_irn_opcode(node)) {
59         case iro_Proj:
60                 pred = get_Proj_pred(node);
61
62                 opcode = (ir_opcode)get_irn_opcode(pred);
63                 if (opcode == iro_Proj) {
64                         ir_node *start = get_Proj_pred(pred);
65
66                         if (is_Start(start)) {
67                                 if (get_Proj_proj(pred) == pn_Start_T_args) {
68                                         /* found Proj(ProjT(Start)) */
69                                         set_irn_link(node, data->proj_data);
70                                         data->proj_data = node;
71                                 }
72                         }
73                 } else if (opcode == iro_Start) {
74                         if (get_Proj_proj(node) == pn_Start_X_initial_exec) {
75                                 /* found ProjX(Start) */
76                                 data->proj_X = node;
77                         }
78                 }
79                 break;
80         case iro_Block: {
81                 int i, n_pred = get_Block_n_cfgpreds(node);
82                 for (i = 0; i < n_pred; ++i) {
83                         if (get_Block_cfgpred(node, i) == data->proj_X) {
84                                 data->block   = node;
85                                 data->blk_idx = i;
86                                 break;
87                         }
88                 }
89                 break;
90         }
91         default:
92                 break;
93         }
94 }
95
96 typedef enum tail_rec_variants {
97         TR_DIRECT,  /**< direct return value, i.e. return func(). */
98         TR_ADD,     /**< additive return value, i.e. return x +/- func() */
99         TR_MUL,     /**< multiplicative return value, i.e. return x * func() or return -func() */
100         TR_BAD,     /**< any other transformation */
101         TR_UNKNOWN  /**< during construction */
102 } tail_rec_variants;
103
104 typedef struct tr_env {
105         int               n_tail_calls;  /**< number of tail calls found */
106         int               n_ress;        /**< number of return values */
107         tail_rec_variants *variants;     /**< return value variants */
108         ir_node           *rets;         /**< list of returns that can be transformed */
109 } tr_env;
110
111
112 /**
113  * do the graph reconstruction for tail-recursion elimination
114  *
115  * @param irg  the graph that will reconstructed
116  * @param env  tail recursion environment
117  */
118 static void do_opt_tail_rec(ir_graph *irg, tr_env *env)
119 {
120         ir_node *end_block = get_irg_end_block(irg);
121         ir_node *block, *jmp, *call, *calls;
122         ir_node **in;
123         ir_node **phis;
124         ir_node ***call_params;
125         ir_node *p, *n;
126         int i, j, n_params, n_locs;
127         collect_t data;
128         int rem            = get_optimize();
129         ir_entity *ent     = get_irg_entity(irg);
130         ir_type *method_tp = get_entity_type(ent);
131
132         assert(env->n_tail_calls > 0);
133
134         /* we add new blocks and change the control flow */
135         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
136
137         /* we must build some new nodes WITHOUT CSE */
138         set_optimize(0);
139
140         /* collect needed data */
141         data.proj_X    = NULL;
142         data.block     = NULL;
143         data.blk_idx   = -1;
144         data.proj_m    = get_irg_initial_mem(irg);
145         data.proj_data = NULL;
146         irg_walk_graph(irg, NULL, collect_data, &data);
147
148         /* check number of arguments */
149         call     = (ir_node*)get_irn_link(end_block);
150         n_params = get_Call_n_params(call);
151
152         assert(data.proj_X && "Could not find initial exec from Start");
153         assert(data.block  && "Could not find first block");
154         assert(data.proj_m && "Could not find initial memory");
155         assert((data.proj_data || n_params == 0) && "Could not find Proj(ProjT(Start)) of non-void function");
156
157         /* allocate in's for phi and block construction */
158         NEW_ARR_A(ir_node *, in, env->n_tail_calls + 1);
159
160         /* build a new header block for the loop we create */
161         i = 0;
162         in[i++] = data.proj_X;
163
164         /* turn Return's into Jmp's */
165         for (p = env->rets; p; p = n) {
166                 ir_node *block = get_nodes_block(p);
167
168                 n = (ir_node*)get_irn_link(p);
169                 in[i++] = new_r_Jmp(block);
170
171                 // exchange(p, new_r_Bad(irg));
172
173                 /* we might generate an endless loop, so add
174                  * the block to the keep-alive list */
175                 add_End_keepalive(get_irg_end(irg), block);
176         }
177         assert(i == env->n_tail_calls + 1);
178
179         /* now create it */
180         block = new_r_Block(irg, i, in);
181         jmp   = new_r_Jmp(block);
182
183         /* the old first block is now the second one */
184         set_Block_cfgpred(data.block, data.blk_idx, jmp);
185
186         /* allocate phi's, position 0 contains the memory phi */
187         NEW_ARR_A(ir_node *, phis, n_params + 1);
188
189         /* build the memory phi */
190         i = 0;
191         in[i] = new_r_Proj(get_irg_start(irg), mode_M, pn_Start_M);
192         set_irg_initial_mem(irg, in[i]);
193         ++i;
194
195         for (calls = call; calls != NULL; calls = (ir_node*)get_irn_link(calls)) {
196                 in[i] = get_Call_mem(calls);
197                 ++i;
198         }
199         assert(i == env->n_tail_calls + 1);
200
201         phis[0] = new_r_Phi(block, env->n_tail_calls + 1, in, mode_M);
202
203         /* build the data Phi's */
204         if (n_params > 0) {
205                 ir_node *calls;
206                 ir_node *args;
207
208                 NEW_ARR_A(ir_node **, call_params, env->n_tail_calls);
209
210                 /* collect all parameters */
211                 for (i = 0, calls = call; calls != NULL;
212                      calls = (ir_node*)get_irn_link(calls)) {
213                         call_params[i] = get_Call_param_arr(calls);
214                         ++i;
215                 }
216
217                 /* build new Proj's and Phi's */
218                 args    = get_irg_args(irg);
219                 for (i = 0; i < n_params; ++i) {
220                         ir_mode *mode = get_type_mode(get_method_param_type(method_tp, i));
221
222                         in[0] = new_r_Proj(args, mode, i);
223                         for (j = 0; j < env->n_tail_calls; ++j)
224                                 in[j + 1] = call_params[j][i];
225
226                         phis[i + 1] = new_r_Phi(block, env->n_tail_calls + 1, in, mode);
227                 }
228         }
229
230         /*
231          * ok, we are here, so we have build and collected all needed Phi's
232          * now exchange all Projs into links to Phi
233          */
234         exchange(data.proj_m, phis[0]);
235         for (p = data.proj_data; p; p = n) {
236                 long proj = get_Proj_proj(p);
237
238                 assert(0 <= proj && proj < n_params);
239                 n = (ir_node*)get_irn_link(p);
240                 exchange(p, phis[proj + 1]);
241         }
242
243         /* tail recursion was done, all info is invalid */
244         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
245                            | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
246         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
247
248         set_optimize(rem);
249
250         /* check if we need new values */
251         n_locs = 0;
252         for (i = 0; i < env->n_ress; ++i) {
253                 if (env->variants[i] != TR_DIRECT) {
254                         ++n_locs;
255                         break;
256                 }
257         }
258
259         if (n_locs > 0) {
260                 ir_node *start_block;
261                 ir_node **in;
262                 ir_mode **modes;
263
264                 NEW_ARR_A(ir_node *, in, env->n_ress);
265                 NEW_ARR_A(ir_mode *, modes, env->n_ress);
266                 ssa_cons_start(irg, env->n_ress);
267
268                 start_block = get_irg_start_block(irg);
269                 set_r_cur_block(irg, start_block);
270
271                 /* set the neutral elements for the iteration start */
272                 for (i = 0; i < env->n_ress; ++i) {
273                         ir_type *tp = get_method_res_type(method_tp, i);
274                         ir_mode *mode = get_type_mode(tp);
275
276                         modes[i] = mode;
277                         if (env->variants[i] == TR_ADD) {
278                                 set_r_value(irg, i, new_r_Const(irg, get_mode_null(mode)));
279                         } else if (env->variants[i] == TR_MUL) {
280                                 set_r_value(irg, i, new_r_Const(irg, get_mode_one(mode)));
281                         }
282                 }
283                 mature_immBlock(start_block);
284
285                 /* no: we can kill all returns */
286                 for (p = env->rets; p; p = n) {
287                         ir_node *block = get_nodes_block(p);
288                         ir_node *jmp, *tuple;
289
290                         set_r_cur_block(irg, block);
291                         n = (ir_node*)get_irn_link(p);
292
293                         ir_node *const call = skip_Proj(get_Return_mem(p));
294                         ir_node *const mem  = get_Call_mem(call);
295
296                         /* create a new jump, free of CSE */
297                         set_optimize(0);
298                         jmp = new_r_Jmp(block);
299                         set_optimize(rem);
300
301                         for (i = 0; i < env->n_ress; ++i) {
302                                 ir_mode *mode = modes[i];
303                                 if (env->variants[i] != TR_DIRECT) {
304                                         in[i] = get_r_value(irg, i, mode);
305                                 } else {
306                                         in[i] = new_r_Bad(irg, mode);
307                                 }
308                         }
309                         /* create a new tuple for the return values */
310                         tuple = new_r_Tuple(block, env->n_ress, in);
311
312                         ir_node *const in[] = {
313                                 [pn_Call_M]         = mem,
314                                 [pn_Call_T_result]  = tuple,
315                                 [pn_Call_X_regular] = jmp,
316                                 [pn_Call_X_except]  = new_r_Bad(irg, mode_X),
317                         };
318                         turn_into_tuple(call, ARRAY_SIZE(in), in);
319
320                         for (i = 0; i < env->n_ress; ++i) {
321                                 ir_node *res = get_Return_res(p, i);
322                                 if (env->variants[i] != TR_DIRECT) {
323                                         set_r_value(irg, i, res);
324                                 }
325                         }
326
327                         exchange(p, new_r_Bad(irg, mode_X));
328                 }
329
330                 /* finally fix all other returns */
331                 end_block = get_irg_end_block(irg);
332                 for (i = get_Block_n_cfgpreds(end_block) - 1; i >= 0; --i) {
333                         ir_node *ret = get_Block_cfgpred(end_block, i);
334                         ir_node *block;
335
336                         /* search all Returns of a block */
337                         if (! is_Return(ret))
338                                 continue;
339
340                         block = get_nodes_block(ret);
341                         set_r_cur_block(irg, block);
342                         for (j = 0; j < env->n_ress; ++j) {
343                                 ir_node *pred = get_Return_res(ret, j);
344                                 ir_node *n;
345
346                                 switch (env->variants[j]) {
347                                 case TR_DIRECT:
348                                         continue;
349
350                                 case TR_ADD:
351                                         n = get_r_value(irg, j, modes[j]);
352                                         n = new_r_Add(block, n, pred, modes[j]);
353                                         set_Return_res(ret, j, n);
354                                         break;
355
356                                 case TR_MUL:
357                                         n = get_r_value(irg, j, modes[j]);
358                                         n = new_r_Mul(block, n, pred, modes[j]);
359                                         set_Return_res(ret, j, n);
360                                         break;
361
362                                 default:
363                                         assert(!"unexpected tail recursion variant");
364                                 }
365                         }
366                 }
367                 ssa_cons_finish(irg);
368         } else {
369                 ir_node *bad = new_r_Bad(irg, mode_X);
370
371                 /* no: we can kill all returns */
372                 for (p = env->rets; p; p = n) {
373                         n = (ir_node*)get_irn_link(p);
374                         exchange(p, bad);
375                 }
376         }
377 }
378
379 /**
380  * Check the lifetime of locals in the given graph.
381  * Tail recursion can only be done, if we can prove that
382  * the lifetime of locals end with the recursive call.
383  * We do this by checking that no address of a local variable is
384  * stored or transmitted as an argument to a call.
385  *
386  * @return non-zero if it's ok to do tail recursion
387  */
388 static int check_lifetime_of_locals(ir_graph *irg)
389 {
390         ir_node *irg_frame;
391         int i;
392         ir_type *frame_tp = get_irg_frame_type(irg);
393
394         irg_frame = get_irg_frame(irg);
395         for (i = get_irn_n_outs(irg_frame) - 1; i >= 0; --i) {
396                 ir_node *succ = get_irn_out(irg_frame, i);
397
398                 if (is_Sel(succ)) {
399                         /* Check if we have compound arguments.
400                            For now, we cannot handle them, */
401                         if (get_entity_owner(get_Sel_entity(succ)) != frame_tp)
402                                 return 0;
403
404                         if (is_address_taken(succ))
405                                 return 0;
406                 }
407         }
408         return 1;
409 }
410
411 /**
412  * Examine irn and detect the recursion variant.
413  */
414 static tail_rec_variants find_variant(ir_node *irn, ir_node *call)
415 {
416         ir_node           *a, *b;
417         tail_rec_variants va, vb, res;
418
419         if (skip_Proj(skip_Proj(irn)) == call) {
420                 /* found it */
421                 return TR_DIRECT;
422         }
423         switch (get_irn_opcode(irn)) {
424         case iro_Add:
425                 /* try additive */
426                 a = get_Add_left(irn);
427                 if (get_nodes_block(a) != get_nodes_block(call)) {
428                         /* we are outside, ignore */
429                         va = TR_UNKNOWN;
430                 } else {
431                         va = find_variant(a, call);
432                         if (va == TR_BAD)
433                                 return TR_BAD;
434                 }
435                 b = get_Add_right(irn);
436                 if (get_nodes_block(b) != get_nodes_block(call)) {
437                         /* we are outside, ignore */
438                         vb = TR_UNKNOWN;
439                 } else {
440                         vb = find_variant(b, call);
441                         if (vb == TR_BAD)
442                                 return TR_BAD;
443                 }
444                 if (va == vb) {
445                         res = va;
446                 }
447                 else if (va == TR_UNKNOWN)
448                         res = vb;
449                 else if (vb == TR_UNKNOWN)
450                         res = va;
451                 else {
452                         /* they are different but none is TR_UNKNOWN -> incompatible */
453                         return TR_BAD;
454                 }
455                 if (res == TR_DIRECT || res == TR_ADD)
456                         return TR_ADD;
457                 /* not compatible */
458                 return TR_BAD;
459
460         case iro_Sub:
461                 /* try additive, but return value must be left */
462                 a = get_Sub_left(irn);
463                 if (get_nodes_block(a) != get_nodes_block(call)) {
464                         /* we are outside, ignore */
465                         va = TR_UNKNOWN;
466                 } else {
467                         va = find_variant(a, call);
468                         if (va == TR_BAD)
469                                 return TR_BAD;
470                 }
471                 b = get_Sub_right(irn);
472                 if (get_nodes_block(b) != get_nodes_block(call)) {
473                         /* we are outside, ignore */
474                         vb = TR_UNKNOWN;
475                 } else {
476                         vb = find_variant(b, call);
477                         if (vb != TR_UNKNOWN)
478                                 return TR_BAD;
479                 }
480                 res = va;
481                 if (res == TR_DIRECT || res == TR_ADD)
482                         return res;
483                 /* not compatible */
484                 return TR_BAD;
485
486         case iro_Mul:
487                 /* try multiplicative */
488                 a = get_Mul_left(irn);
489                 if (get_nodes_block(a) != get_nodes_block(call)) {
490                         /* we are outside, ignore */
491                         va = TR_UNKNOWN;
492                 } else {
493                         va = find_variant(a, call);
494                         if (va == TR_BAD)
495                                 return TR_BAD;
496                 }
497                 b = get_Mul_right(irn);
498                 if (get_nodes_block(b) != get_nodes_block(call)) {
499                         /* we are outside, ignore */
500                         vb = TR_UNKNOWN;
501                 } else {
502                         vb = find_variant(b, call);
503                         if (vb == TR_BAD)
504                                 return TR_BAD;
505                 }
506                 if (va == vb) {
507                         res = va;
508                 }
509                 else if (va == TR_UNKNOWN)
510                         res = vb;
511                 else if (vb == TR_UNKNOWN)
512                         res = va;
513                 else {
514                         /* they are different but none is TR_UNKNOWN -> incompatible */
515                         return TR_BAD;
516                 }
517                 if (res == TR_DIRECT || res == TR_MUL)
518                         return TR_MUL;
519                 /* not compatible */
520                 return TR_BAD;
521
522         case iro_Minus:
523                 /* try multiplicative */
524                 a = get_Minus_op(irn);
525                 res =  find_variant(a, call);
526                 if (res == TR_DIRECT)
527                         return TR_MUL;
528                 if (res == TR_MUL || res == TR_UNKNOWN)
529                         return res;
530                 /* not compatible */
531                 return TR_BAD;
532
533         default:
534                 return TR_UNKNOWN;
535         }
536 }
537
538
539 /*
540  * convert simple tail-calls into loops
541  */
542 void opt_tail_rec_irg(ir_graph *irg)
543 {
544         tr_env    env;
545         ir_node   *end_block;
546         int       i, n_ress, n_tail_calls = 0;
547         ir_node   *rets = NULL;
548         ir_type   *mtd_type, *call_type;
549         ir_entity *ent;
550         ir_graph  *rem;
551
552         assure_irg_properties(irg,
553                 IR_GRAPH_PROPERTY_MANY_RETURNS
554                 | IR_GRAPH_PROPERTY_NO_BADS
555                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS);
556
557         FIRM_DBG_REGISTER(dbg, "firm.opt.tailrec");
558
559         if (! check_lifetime_of_locals(irg)) {
560                 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
561                 return;
562         }
563
564         rem = current_ir_graph;
565         current_ir_graph = irg;
566
567         ent      = get_irg_entity(irg);
568         mtd_type = get_entity_type(ent);
569         n_ress   = get_method_n_ress(mtd_type);
570
571         env.variants = NULL;
572         env.n_ress   = n_ress;
573
574         if (n_ress > 0) {
575                 NEW_ARR_A(tail_rec_variants, env.variants, n_ress);
576
577                 for (i = 0; i < n_ress; ++i)
578                         env.variants[i] = TR_DIRECT;
579         }
580
581         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
582
583         end_block = get_irg_end_block(irg);
584         set_irn_link(end_block, NULL);
585
586         for (i = get_Block_n_cfgpreds(end_block) - 1; i >= 0; --i) {
587                 ir_node *ret = get_Block_cfgpred(end_block, i);
588                 ir_node *call, *call_ptr;
589                 int j;
590                 ir_node **ress;
591
592                 /* search all Returns of a block */
593                 if (! is_Return(ret))
594                         continue;
595
596                 /* check, if it's a Return self() */
597                 call = skip_Proj(get_Return_mem(ret));
598                 if (! is_Call(call))
599                         continue;
600
601                 /* the call must be in the same block as the return */
602                 if (get_nodes_block(call) != get_nodes_block(ret))
603                         continue;
604
605                 /* check if it's a recursive call */
606                 call_ptr = get_Call_ptr(call);
607
608                 if (! is_SymConst_addr_ent(call_ptr))
609                         continue;
610
611                 ent = get_SymConst_entity(call_ptr);
612                 if (!ent || get_entity_irg(ent) != irg)
613                         continue;
614
615                 /*
616                  * Check, that the types match. At least in C
617                  * this might fail.
618                  */
619                 mtd_type  = get_entity_type(ent);
620                 call_type = get_Call_type(call);
621
622                 if (mtd_type != call_type) {
623                         /*
624                          * Hmm, the types did not match, bad.
625                          * This can happen in C when no prototype is given
626                          * or K&R style is used.
627                          */
628                         DB((dbg, LEVEL_3, "  tail recursion fails because of call type mismatch: %+F != %+F\n", mtd_type, call_type));
629                         continue;
630                 }
631
632                 /* ok, mem is routed to a recursive call, check return args */
633                 ress = get_Return_res_arr(ret);
634                 for (j = get_Return_n_ress(ret) - 1; j >= 0; --j) {
635                         tail_rec_variants var = find_variant(ress[j], call);
636
637                         if (var >= TR_BAD) {
638                                 /* cannot be transformed */
639                                 break;
640                         }
641                         if (var == TR_DIRECT) {
642                                 var = env.variants[j];
643                         } else if (env.variants[j] == TR_DIRECT) {
644                                 env.variants[j] = var;
645                         }
646                         if (env.variants[j] != var) {
647                                 /* not compatible */
648                                 DB((dbg, LEVEL_3, "  tail recursion fails for %d return value of %+F\n", j, ret));
649                                 break;
650                         }
651                 }
652                 if (j >= 0)
653                         continue;
654
655                 /* here, we have found a call */
656                 set_irn_link(call, get_irn_link(end_block));
657                 set_irn_link(end_block, call);
658                 ++n_tail_calls;
659
660                 /* link all returns, we will need this */
661                 set_irn_link(ret, rets);
662                 rets = ret;
663         }
664
665         /* now, end_block->link contains the list of all tail calls */
666         if (n_tail_calls > 0) {
667                 DB((dbg, LEVEL_2, "  Performing tail recursion for graph %s and %d Calls\n",
668                     get_entity_ld_name(get_irg_entity(irg)), n_tail_calls));
669
670                 hook_tail_rec(irg, n_tail_calls);
671
672                 env.n_tail_calls = n_tail_calls;
673                 env.rets         = rets;
674                 do_opt_tail_rec(irg, &env);
675                 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
676         } else {
677                 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
678         }
679         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
680         current_ir_graph = rem;
681 }
682
683 ir_graph_pass_t *opt_tail_rec_irg_pass(const char *name)
684 {
685         return def_graph_pass(name ? name : "tailrec", opt_tail_rec_irg);
686 }
687
688 /*
689  * optimize tail recursion away
690  */
691 void opt_tail_recursion(void)
692 {
693         size_t i, n;
694
695         FIRM_DBG_REGISTER(dbg, "firm.opt.tailrec");
696
697         DB((dbg, LEVEL_1, "Performing tail recursion ...\n"));
698         for (i = 0, n = get_irp_n_irgs(); i < n; ++i) {
699                 ir_graph *irg = get_irp_irg(i);
700                 opt_tail_rec_irg(irg);
701         }
702 }
703
704 ir_prog_pass_t *opt_tail_recursion_pass(const char *name)
705 {
706         return def_prog_pass(name ? name : "tailrec", opt_tail_recursion);
707 }