First iteration to detotalise memory in loops.
[libfirm] / ir / opt / ldst2.c
1 #include <stdint.h>
2 #include "array.h"
3 #include "debug.h"
4 #include "ircons.h"
5 #include "irgraph.h"
6 #include "irgmod.h"
7 #include "irgopt.h"
8 #include "irgwalk.h"
9 #include "irmemory.h"
10 #include "irnode.h"
11 #include "irnodeset.h"
12 #include "ldst2.h"
13 #include "obst.h"
14 #include "return.h"
15
16
17 #define OPTIMISE_LOAD_AFTER_LOAD
18
19
20 #define UNIMPLEMENTED abort();
21
22
23 DEBUG_ONLY(static firm_dbg_module_t *dbg);
24
25
26 static struct obstack obst;
27 static size_t count_addrs;
28 static ir_node** addrs;
29
30
31 static void AddressCollector(ir_node* node, void* env)
32 {
33         ir_nodeset_t* addrs_set = env;
34         ir_node* addr;
35         if (is_Load(node)) {
36                 addr = get_Load_ptr(node);
37         } else if (is_Store(node)) {
38                 addr = get_Store_ptr(node);
39         } else {
40                 return;
41         }
42         ir_nodeset_insert(addrs_set, addr);
43 }
44
45
46 /* Collects all unique addresses used by load and store nodes of a graph and
47  * puts them into an array for later use */
48 static void CollectAddresses(ir_graph* irg)
49 {
50         ir_nodeset_t addrs_set;
51
52         ir_nodeset_init(&addrs_set);
53         irg_walk_graph(irg, AddressCollector, NULL, &addrs_set);
54
55         count_addrs = ir_nodeset_size(&addrs_set);
56         DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (uint)count_addrs));
57         if (count_addrs != 0) {
58                 ir_nodeset_iterator_t addr_iter;
59                 size_t i;
60
61                 addrs = NEW_ARR_D(ir_node*, &obst, count_addrs);
62                 ir_nodeset_iterator_init(&addr_iter, &addrs_set);
63                 for (i = 0; i < count_addrs; i++) {
64                         ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
65                         assert(addr != NULL);
66                         set_irn_link(addr, (void*)(uintptr_t)i);
67                         addrs[i] = addr;
68                         DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
69                 }
70         }
71 }
72
73
74 static void AliasSetAdder(ir_node* block, void* env)
75 {
76         ir_nodeset_t* alias_set;
77         size_t i;
78
79         alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
80         for (i = 0; i < count_addrs; i++) {
81                 ir_nodeset_init(&alias_set[i]);
82         }
83         set_irn_link(block, alias_set);
84 }
85
86
87 static void SetStartAddressesTop(ir_graph* irg)
88 {
89         ir_node* initial_mem;
90         ir_node* start_block;
91         ir_nodeset_t* start_addrs;
92         size_t i;
93
94         initial_mem = get_irg_initial_mem(irg);
95         start_block = get_irg_start_block(irg);
96         start_addrs = get_irn_link(start_block);
97         for (i = 0; i < count_addrs; i++) {
98                 ir_nodeset_insert(&start_addrs[i], initial_mem);
99         }
100         mark_Block_block_visited(start_block);
101 }
102
103
104 static void AliasSetDestroyer(ir_node* block, void* env)
105 {
106         ir_nodeset_t* alias_set = get_irn_link(block);
107         size_t i;
108
109         for (i = 0; i < count_addrs; i++) {
110                 ir_nodeset_destroy(&alias_set[i]);
111         }
112 }
113
114
115 static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, ir_node* other)
116 {
117         ir_node* other_addr;
118         ir_mode* other_mode;
119
120         if (is_Proj(other)) other = get_Proj_pred(other);
121
122         if (is_Load(other)) {
123                 other_addr = get_Load_ptr(other);
124         } else if (is_Store(other)) {
125                 other_addr = get_Store_ptr(other);
126         } else {
127                 return may_alias;
128         }
129
130         other_mode = get_irn_mode(other);
131         return get_alias_relation(irg, addr, mode, other_addr, other_mode);
132 }
133
134
135 static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set)
136 {
137         size_t set_size = ir_nodeset_size(after_set);
138         ir_nodeset_iterator_t iter;
139
140         assert(set_size != 0);
141
142         ir_nodeset_iterator_init(&iter, after_set);
143         if (set_size == 1) {
144                 return ir_nodeset_iterator_next(&iter);
145         } else {
146                 ir_node** in;
147                 size_t i;
148
149                 NEW_ARR_A(ir_node*, in, set_size);
150                 for (i = 0; i < set_size; i++) {
151                         in[i] = ir_nodeset_iterator_next(&iter);
152                 }
153                 return new_r_Sync(irg, block, set_size, in);
154         }
155 }
156
157
158 static ir_node** unfinished_phis;
159
160
161 static void PlaceMemPhis(ir_graph* irg, ir_node* block, ir_node* phi)
162 {
163         int unfinished = 0;
164         size_t block_n_preds = get_Block_n_cfgpreds(block);
165         ir_nodeset_t* thissets;
166         ir_node** in;
167         size_t i;
168         size_t j;
169
170         thissets = get_irn_link(block);
171         NEW_ARR_A(ir_node*, in, block_n_preds);
172         for (j = 0; j < count_addrs; j++) {
173                 ir_node* new_phi;
174
175                 for (i = 0; i < block_n_preds; i++) {
176                         ir_node* pred_block = get_nodes_block(get_Phi_pred(phi, i)); // TODO get_Block_cfgpred_block(block, i);
177                         ir_nodeset_t* predsets = get_irn_link(pred_block);
178                         size_t predset_size = ir_nodeset_size(&predsets[j]);
179
180                         if (predset_size == 0) {
181                                 in[i] = new_r_Unknown(irg, mode_M);
182                                 unfinished = 1;
183                         } else {
184                                 in[i] = GenerateSync(irg, pred_block, &predsets[j]);
185                         }
186                 }
187                 new_phi = new_r_Phi(irg, block, block_n_preds, in, mode_M);
188                 if (unfinished) {
189                         set_irn_link(new_phi, unfinished_phis[j]);
190                         unfinished_phis[j] = new_phi;
191                 }
192                 ir_nodeset_insert(&thissets[j], new_phi);
193         }
194 }
195
196
197 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
198
199
200 static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
201 {
202         size_t n = get_Phi_n_preds(phi);
203         size_t i;
204
205         for (i = 0; i < n; i++) {
206                 WalkMem(irg, get_Phi_pred(phi, i), block);
207         }
208
209         PlaceMemPhis(irg, block, phi);
210         exchange(phi, new_Bad());
211 }
212
213
214 static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
215 {
216         ir_node* addr = get_Load_ptr(load);
217         size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
218         ir_nodeset_t* interfere_sets = get_irn_link(block);
219         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
220         size_t size = ir_nodeset_size(interfere_set);
221         ir_nodeset_iterator_t interfere_iter;
222         size_t i;
223
224         assert(size > 0);
225         ir_nodeset_iterator_init(&interfere_iter, interfere_set);
226         if (size == 1) {
227                 ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
228                 if (is_Proj(after)) {
229                         ir_node* pred = get_Proj_pred(after);
230                         if (is_Load(pred)) {
231 #ifdef OPTIMISE_LOAD_AFTER_LOAD
232                                 if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
233                                         exchange(load, pred);
234                                         return;
235                                 }
236 #endif
237                                 after = get_Load_mem(pred);
238                         }
239                 }
240                 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
241                 set_Load_mem(load, after);
242         } else {
243                 ir_node** after_set;
244                 ir_node* sync;
245
246                 NEW_ARR_A(ir_node*, after_set, size);
247                 for (i = 0; i < size; i++) {
248                         ir_node* mem = ir_nodeset_iterator_next(&interfere_iter);
249                         if (is_Proj(mem)) {
250                                 ir_node* pred = get_Proj_pred(mem);
251                                 if (is_Load(pred)) {
252 #ifdef OPTIMISE_LOAD_AFTER_LOAD
253                                         if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
254                                                 exchange(load, pred);
255                                                 return;
256                                         }
257 #endif
258                                         mem = get_Load_mem(pred);
259                                 }
260                         }
261                         after_set[i] = mem;
262                         sync = new_r_Sync(irg, block, size, after_set);
263                 }
264                 set_Load_mem(load, sync);
265         }
266
267         for (i = 0; i < count_addrs; i++) {
268                 ir_mode* mode = get_Load_mode(load);
269                 ir_node* other_addr = addrs[i];
270                 ir_mode* other_mode = mode; // XXX second mode is nonsense
271                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
272                 ir_node* other_node;
273
274                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
275                 if (rel == no_alias) {
276                         continue;
277                 }
278                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
279
280                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
281                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
282                         if (is_Proj(other_node) && is_Load(get_Proj_pred(other_node))) continue;
283                         if (AliasTest(irg, addr, mode, other_node) != no_alias) {
284                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], load));
285                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
286                         }
287                 }
288
289                 ir_nodeset_insert(&interfere_sets[i], memory);
290         }
291 }
292
293
294 static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
295 {
296         ir_node* addr = get_Store_ptr(store);
297         size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
298         ir_nodeset_t* interfere_sets = get_irn_link(block);
299         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
300         ir_node* after;
301         size_t i;
302
303         after = GenerateSync(irg, block, interfere_set);
304         set_Store_mem(store, after);
305
306         for (i = 0; i < count_addrs; i++) {
307                 ir_nodeset_iterator_t interfere_iter;
308                 ir_mode* mode = get_irn_mode(get_Store_value(store));
309                 ir_node* other_addr = addrs[i];
310                 ir_mode* other_mode = mode; // XXX second mode is nonsense
311                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
312                 ir_node* other_node;
313
314                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
315                 if (rel == no_alias) {
316                         continue;
317                 }
318                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
319
320                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
321                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
322                         if (AliasTest(irg, addr, mode, other_node) != no_alias) {
323                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
324                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
325                         }
326                 }
327
328                 ir_nodeset_insert(&interfere_sets[i], memory);
329         }
330 }
331
332
333 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
334 {
335         int block_change = 0;
336         ir_node* block = get_nodes_block(node);
337         ir_node* pred;
338         ir_node* memory = node;
339         ir_nodeset_t* addr_sets;
340
341         if (block != last_block) {
342                 DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block));
343                 block_change = 1;
344                 if (Block_not_block_visited(block)) {
345                         mark_Block_block_visited(block);
346                 } else {
347                         DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
348                         return block_change;
349                 }
350         }
351
352         // Skip projs
353         if (is_Proj(node)) node = get_Proj_pred(node);
354
355         if (is_Phi(node)) {
356                 WalkMemPhi(irg, block, node);
357                 return block_change;
358         } else if (is_Sync(node)) {
359                 UNIMPLEMENTED
360         } else if (is_Return(node)) {
361                 pred = get_Return_mem(node);
362         } else {
363                 pred = get_fragile_op_mem(node);
364         }
365
366         if (WalkMem(irg, pred, block)) {
367                 // There was a block change
368                 DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node));
369                 size_t block_arity = get_Block_n_cfgpreds(block);
370                 if (block_arity == 1) {
371                         // Just one predecessor, inherit its alias sets
372                         ir_node* pred_block = get_nodes_block(pred);
373                         ir_nodeset_t* predsets = get_irn_link(pred_block);
374                         ir_nodeset_t* thissets = get_irn_link(block);
375                         size_t i;
376
377                         DB((dbg, LEVEL_3, "===> Copying the only predecessor's address sets\n"));
378
379                         if (ir_nodeset_size(&predsets[0]) == 0) {
380                                 ir_node* unknown;
381
382                                 DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n"));
383                                 assert(!Block_not_block_visited(pred_block));
384
385                                 unknown = new_r_Unknown(irg, mode_M);
386                                 for (i = 0; i < count_addrs; i++) {
387                                         ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M);
388                                         set_irn_link(phi_unk, unfinished_phis[i]);
389                                         unfinished_phis[i] = phi_unk;
390                                         ir_nodeset_insert(&thissets[i], phi_unk);
391                                 }
392                         } else {
393                                 for (i = 0; i < count_addrs; i++) {
394                                         ir_nodeset_iterator_t prediter;
395                                         ir_node* addr;
396
397                                         ir_nodeset_iterator_init(&prediter, &predsets[i]);
398                                         while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
399                                                 ir_nodeset_insert(&thissets[i], addr);
400                                         }
401                                 }
402                         }
403                 }
404         }
405
406         DB((dbg, LEVEL_3, "===> Detotalising %+F\n", node));
407
408         addr_sets = get_irn_link(block);
409
410         if (is_Load(node)) {
411                 PlaceLoad(irg, block, node, memory);
412         } else if (is_Store(node)) {
413                 PlaceStore(irg, block, node, memory);
414         } else {
415                 ir_nodeset_t sync_set;
416                 size_t i;
417                 ir_node* after;
418
419                 DB((dbg, LEVEL_3, "===> Fallback: %+F aliases everything\n", node));
420
421                 ir_nodeset_init(&sync_set);
422                 for (i = 0; i < count_addrs; i++) {
423                         ir_nodeset_iterator_t iter;
424                         ir_node* mem;
425
426                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
427                         while ((mem = ir_nodeset_iterator_next(&iter)) != NULL) {
428                                 ir_nodeset_insert(&sync_set, mem);
429                         }
430                 }
431
432                 after = GenerateSync(irg, block, &sync_set);
433                 set_irn_n(node, 0, after); // XXX unnice way to set the memory input
434
435                 for (i = 0; i < count_addrs; i++) {
436                         ir_nodeset_iterator_t iter;
437                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
438                         while (ir_nodeset_iterator_next(&iter) != NULL) {
439                                 ir_nodeset_remove_iterator(&addr_sets[i], &iter);
440                         }
441                         ir_nodeset_insert(&addr_sets[i], memory);
442                 }
443         }
444
445         return block_change;
446 }
447
448
449 static void FinalisePhis(ir_graph* irg)
450 {
451         size_t i;
452
453         for (i = 0; i < count_addrs; i++) {
454                 ir_node* next_phi;
455                 ir_node* phi;
456
457                 for (phi = unfinished_phis[i]; phi != NULL; phi = next_phi) {
458                         ir_node* block = get_nodes_block(phi);
459                         size_t block_n_preds = get_Block_n_cfgpreds(block);
460
461                         next_phi = get_irn_link(phi);
462
463                         DB((dbg, LEVEL_4, "===> Finialising phi %+F in %+F\n", phi, block));
464
465                         if (block_n_preds == 1) {
466                                 ir_node* pred_block = get_Block_cfgpred_block(block, 0);
467                                 ir_nodeset_t* pred_sets = get_irn_link(pred_block);
468                                 ir_node* after = GenerateSync(irg, pred_block, &pred_sets[i]);
469
470                                 assert(is_Unknown(get_Phi_pred(phi, 0)));
471                                 exchange(phi, after);
472                         } else {
473                                 ir_node** in;
474                                 size_t j;
475
476                                 NEW_ARR_A(ir_node*, in, block_n_preds);
477                                 for (j = 0; j < block_n_preds; j++) {
478                                         ir_node* pred_block = get_Block_cfgpred_block(block, j);
479                                         ir_nodeset_t* pred_sets = get_irn_link(pred_block);
480
481                                         if (is_Unknown(get_Phi_pred(phi, j))) {
482                                                 set_Phi_pred(phi, j, GenerateSync(irg, pred_block, &pred_sets[i]));
483                                         }
484                                 }
485                         }
486                 }
487         }
488 }
489
490
491 static void Detotalise(ir_graph* irg)
492 {
493         ir_node* end_block = get_irg_end_block(irg);
494         size_t npreds = get_Block_n_cfgpreds(end_block);
495         size_t i;
496
497         unfinished_phis = xmalloc(sizeof(*unfinished_phis) * count_addrs);
498         for (i = 0; i < count_addrs; i++) {
499                 unfinished_phis[i] = NULL;
500         }
501
502         for (i = 0; i < npreds; i++) {
503                 ir_node* pred = get_Block_cfgpred(end_block, i);
504                 assert(is_Return(pred));
505                 DB((dbg, LEVEL_2, "===> Starting memory walk at %+F\n", pred));
506                 WalkMem(irg, pred, NULL);
507         }
508
509         FinalisePhis(irg);
510         xfree(unfinished_phis);
511 }
512
513
514 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
515 {
516         size_t n = get_Sync_n_preds(sync);
517         size_t i;
518
519         for (i = 0; i < n; i++) {
520                 ir_node* pred = get_Sync_pred(sync, i);
521                 if (is_Sync(pred)) {
522                         AddSyncPreds(preds, pred);
523                 } else {
524                         ir_nodeset_insert(preds, pred);
525                 }
526         }
527 }
528
529
530 static void NormaliseSync(ir_node* node, void* env)
531 {
532         ir_nodeset_t preds;
533         ir_nodeset_iterator_t iter;
534         ir_node** in;
535         size_t count_preds;
536         size_t i;
537
538         if (!is_Sync(node)) return;
539
540         ir_nodeset_init(&preds);
541         AddSyncPreds(&preds, node);
542
543         count_preds = ir_nodeset_size(&preds);
544         if (count_preds != get_Sync_n_preds(node)) {
545                 NEW_ARR_A(ir_node*, in, count_preds);
546                 ir_nodeset_iterator_init(&iter, &preds);
547                 for (i = 0; i < count_preds; i++) {
548                         ir_node* pred = ir_nodeset_iterator_next(&iter);
549                         assert(pred != NULL);
550                         in[i] = pred;
551                 }
552                 set_irn_in(node, count_preds, in);
553         }
554
555         ir_nodeset_destroy(&preds);
556 }
557
558
559 void opt_ldst2(ir_graph* irg)
560 {
561         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
562         DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
563
564         normalize_one_return(irg);
565
566         obstack_init(&obst);
567
568         if (1 /* XXX */ || get_opt_alias_analysis()) {
569                 assure_irg_address_taken_computed(irg);
570                 assure_irp_globals_address_taken_computed();
571         }
572
573
574         CollectAddresses(irg);
575         if (count_addrs == 0) return;
576
577         irg_block_walk_graph(irg, AliasSetAdder, NULL, NULL);
578         inc_irg_block_visited(irg);
579         SetStartAddressesTop(irg);
580         Detotalise(irg);
581
582         dump_ir_block_graph(irg, "-fluffig");
583
584         irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
585         obstack_free(&obst, NULL);
586
587         normalize_proj_nodes(irg);
588         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
589   optimize_graph_df(irg);
590         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
591 }