73bbe22f02165a6d4a40323cac53beb3909a9631
[libfirm] / ir / opt / ldst2.c
1 #include <stdint.h>
2 #include "array.h"
3 #include "debug.h"
4 #include "ircons.h"
5 #include "irgraph.h"
6 #include "irgmod.h"
7 #include "irgopt.h"
8 #include "irgwalk.h"
9 #include "irmemory.h"
10 #include "irnode.h"
11 #include "irnodeset.h"
12 #include "ldst2.h"
13 #include "obst.h"
14 #include "return.h"
15
16
17 #define OPTIMISE_LOAD_AFTER_LOAD
18
19
20 #define UNIMPLEMENTED abort();
21
22
23 DEBUG_ONLY(static firm_dbg_module_t *dbg);
24
25
26 static struct obstack obst;
27 static size_t count_addrs;
28 static ir_node** addrs;
29
30
31 static void AddressCollector(ir_node* node, void* env)
32 {
33         ir_nodeset_t* addrs_set = env;
34         ir_node* addr;
35         if (is_Load(node)) {
36                 addr = get_Load_ptr(node);
37         } else if (is_Store(node)) {
38                 addr = get_Store_ptr(node);
39         } else {
40                 return;
41         }
42         ir_nodeset_insert(addrs_set, addr);
43 }
44
45
46 /* Collects all unique addresses used by load and store nodes of a graph and
47  * puts them into an array for later use */
48 static void CollectAddresses(ir_graph* irg)
49 {
50         ir_nodeset_t addrs_set;
51
52         ir_nodeset_init(&addrs_set);
53         irg_walk_graph(irg, AddressCollector, NULL, &addrs_set);
54
55         count_addrs = ir_nodeset_size(&addrs_set);
56         DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (uint)count_addrs));
57         if (count_addrs != 0) {
58                 ir_nodeset_iterator_t addr_iter;
59                 size_t i;
60
61                 addrs = NEW_ARR_D(ir_node*, &obst, count_addrs);
62                 ir_nodeset_iterator_init(&addr_iter, &addrs_set);
63                 for (i = 0; i < count_addrs; i++) {
64                         ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
65                         assert(addr != NULL);
66                         set_irn_link(addr, (void*)(uintptr_t)i);
67                         addrs[i] = addr;
68                         DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
69                 }
70         }
71 }
72
73
74 static void AliasSetAdder(ir_node* block, void* env)
75 {
76         ir_nodeset_t* alias_set;
77         size_t i;
78
79         alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
80         for (i = 0; i < count_addrs; i++) {
81                 ir_nodeset_init(&alias_set[i]);
82         }
83         set_irn_link(block, alias_set);
84 }
85
86
87 static void SetStartAddressesTop(ir_graph* irg)
88 {
89         ir_node* initial_mem;
90         ir_node* start_block;
91         ir_nodeset_t* start_addrs;
92         size_t i;
93
94         initial_mem = get_irg_initial_mem(irg);
95         start_block = get_irg_start_block(irg);
96         start_addrs = get_irn_link(start_block);
97         for (i = 0; i < count_addrs; i++) {
98                 ir_nodeset_insert(&start_addrs[i], initial_mem);
99         }
100         mark_Block_block_visited(start_block);
101 }
102
103
104 static void AliasSetDestroyer(ir_node* block, void* env)
105 {
106         ir_nodeset_t* alias_set = get_irn_link(block);
107         size_t i;
108
109         for (i = 0; i < count_addrs; i++) {
110                 ir_nodeset_destroy(&alias_set[i]);
111         }
112 }
113
114
115 static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, ir_node* other)
116 {
117         ir_node* other_addr;
118         ir_mode* other_mode;
119
120         if (is_Proj(other)) other = get_Proj_pred(other);
121
122         if (is_Load(other)) {
123                 other_addr = get_Load_ptr(other);
124         } else if (is_Store(other)) {
125                 other_addr = get_Store_ptr(other);
126         } else {
127                 return may_alias;
128         }
129
130         other_mode = get_irn_mode(other);
131         return get_alias_relation(irg, addr, mode, other_addr, other_mode);
132 }
133
134
135 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
136
137
138 static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
139 {
140         size_t n = get_Phi_n_preds(phi);
141         size_t i;
142         size_t j;
143         ir_node** in;
144         ir_nodeset_t* thissets;
145
146         for (i = 0; i < n; i++) {
147                 WalkMem(irg, get_Phi_pred(phi, i), block);
148         }
149
150         thissets = get_irn_link(block);
151         NEW_ARR_A(ir_node*, in, n);
152         for (j = 0; j < count_addrs; j++) {
153                 ir_node* new_phi;
154
155                 for (i = 0; i < n; i++) {
156                         ir_nodeset_t* predsets = get_irn_link(get_nodes_block(get_Phi_pred(phi, i)));
157                         size_t size = ir_nodeset_size(&predsets[j]);
158                         ir_nodeset_iterator_t iter;
159
160                         ir_nodeset_iterator_init(&iter, &predsets[j]);
161                         if (size == 0) {
162                                 UNIMPLEMENTED
163                         } else if (size == 1) {
164                                 in[i] = ir_nodeset_iterator_next(&iter);
165                         } else {
166                                 ir_node** sync_in;
167                                 size_t k;
168
169                                 NEW_ARR_A(ir_node*, sync_in, size);
170                                 for (k = 0; k < size; k++) {
171                                         sync_in[k] = ir_nodeset_iterator_next(&iter);
172                                 }
173                                 in[i] = new_r_Sync(irg, get_Block_cfgpred_block(block, i), size, sync_in);
174                         }
175                 }
176                 new_phi = new_r_Phi(irg, block, n, in, mode_M);
177                 ir_nodeset_insert(&thissets[j], new_phi);
178         }
179
180         exchange(phi, new_Bad());
181 }
182
183
184 static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
185 {
186         ir_node* addr = get_Load_ptr(load);
187         size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
188         ir_nodeset_t* interfere_sets = get_irn_link(block);
189         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
190         size_t size = ir_nodeset_size(interfere_set);
191         ir_nodeset_iterator_t interfere_iter;
192         size_t i;
193
194         assert(size > 0);
195         ir_nodeset_iterator_init(&interfere_iter, interfere_set);
196         if (size == 1) {
197                 ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
198                 if (is_Proj(after)) {
199                         ir_node* pred = get_Proj_pred(after);
200                         if (is_Load(pred)) {
201 #ifdef OPTIMISE_LOAD_AFTER_LOAD
202                                 if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
203                                         exchange(load, pred);
204                                         return;
205                                 }
206 #endif
207                                 after = get_Load_mem(pred);
208                         }
209                 }
210                 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
211                 set_Load_mem(load, after);
212         } else {
213                 ir_node** after_set;
214                 ir_node* sync;
215
216                 NEW_ARR_A(ir_node*, after_set, size);
217                 for (i = 0; i < size; i++) {
218                         ir_node* mem = ir_nodeset_iterator_next(&interfere_iter);
219                         if (is_Proj(mem)) {
220                                 ir_node* pred = get_Proj_pred(mem);
221                                 if (is_Load(pred)) {
222 #ifdef OPTIMISE_LOAD_AFTER_LOAD
223                                         if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
224                                                 exchange(load, pred);
225                                                 return;
226                                         }
227 #endif
228                                         mem = get_Load_mem(pred);
229                                 }
230                         }
231                         after_set[i] = mem;
232                         sync = new_r_Sync(irg, block, size, after_set);
233                 }
234                 set_Load_mem(load, sync);
235         }
236
237         for (i = 0; i < count_addrs; i++) {
238                 ir_mode* mode = get_Load_mode(load);
239                 ir_node* other_addr = addrs[i];
240                 ir_mode* other_mode = mode; // XXX second mode is nonsense
241                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
242                 ir_node* other_node;
243
244                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
245                 if (rel == no_alias) {
246                         continue;
247                 }
248                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
249
250                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
251                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
252                         if (is_Proj(other_node) && is_Load(get_Proj_pred(other_node))) continue;
253                         if (AliasTest(irg, addr, mode, other_node) != no_alias) {
254                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], load));
255                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
256                         }
257                 }
258
259                 ir_nodeset_insert(&interfere_sets[i], memory);
260         }
261 }
262
263
264 static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
265 {
266         ir_node* addr = get_Store_ptr(store);
267         size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
268         ir_nodeset_t* interfere_sets = get_irn_link(block);
269         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
270         size_t size = ir_nodeset_size(interfere_set);
271         ir_nodeset_iterator_t interfere_iter;
272         size_t i;
273
274         assert(size > 0);
275         ir_nodeset_iterator_init(&interfere_iter, interfere_set);
276         if (size == 1) {
277                 ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
278                 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", store, after));
279                 set_Store_mem(store, after);
280         } else {
281                 ir_node** after_set;
282                 ir_node* sync;
283
284                 NEW_ARR_A(ir_node*, after_set, size);
285                 for (i = 0; i < size; i++) {
286                         after_set[i] = ir_nodeset_iterator_next(&interfere_iter);
287                         sync = new_r_Sync(irg, block, size, after_set);
288                 }
289                 set_Store_mem(store, sync);
290         }
291
292         for (i = 0; i < count_addrs; i++) {
293                 ir_mode* mode = get_irn_mode(get_Store_value(store));
294                 ir_node* other_addr = addrs[i];
295                 ir_mode* other_mode = mode; // XXX second mode is nonsense
296                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
297                 ir_node* other_node;
298
299                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
300                 if (rel == no_alias) {
301                         continue;
302                 }
303                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
304
305                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
306                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
307                         if (AliasTest(irg, addr, mode, other_node) != no_alias) {
308                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
309                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
310                         }
311                 }
312
313                 ir_nodeset_insert(&interfere_sets[i], memory);
314         }
315 }
316
317
318 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
319 {
320         int block_change = 0;
321         ir_node* block = get_nodes_block(node);
322         ir_node* pred;
323         ir_node* memory = node;
324         ir_nodeset_t* addr_sets;
325
326         if (block != last_block) {
327                 block_change = 1;
328                 if (Block_not_block_visited(block)) {
329                         mark_Block_block_visited(block);
330                 } else {
331                         DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
332                         return block_change;
333                 }
334         }
335
336         // Skip projs
337         if (is_Proj(node)) node = get_Proj_pred(node);
338
339         if (is_Phi(node)) {
340                 WalkMemPhi(irg, block, node);
341                 return 0;
342         } else if (is_Sync(node)) {
343                 UNIMPLEMENTED
344         } else if (is_Return(node)) {
345                 pred = get_Return_mem(node);
346         } else {
347                 pred = get_fragile_op_mem(node);
348         }
349
350         if (WalkMem(irg, pred, block)) {
351                 // There was a block change
352                 DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node));
353                 if (get_Block_n_cfgpreds(block) == 1) {
354                         // Just one predecessor, inherit its alias sets
355                         ir_nodeset_t* predsets = get_irn_link(get_nodes_block(pred));
356                         ir_nodeset_t* thissets = get_irn_link(block);
357                         size_t i;
358
359                         DB((dbg, LEVEL_3, "===> Copying the only predecessor's address sets\n"));
360
361                         for (i = 0; i < count_addrs; i++) {
362                                 ir_nodeset_iterator_t prediter;
363                                 ir_node* addr;
364
365                                 ir_nodeset_iterator_init(&prediter, &predsets[i]);
366                                 while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
367                                         ir_nodeset_insert(&thissets[i], addr);
368                                 }
369                         }
370                 }
371         }
372
373         DB((dbg, LEVEL_3, "===> Detotalising %+F\n", node));
374
375         addr_sets = get_irn_link(block);
376
377         if (is_Load(node)) {
378                 PlaceLoad(irg, block, node, memory);
379         } else if (is_Store(node)) {
380                 PlaceStore(irg, block, node, memory);
381         } else {
382                 ir_nodeset_t sync_set;
383                 size_t i;
384                 size_t sync_arity;
385                 ir_nodeset_iterator_t sync_set_iter;
386                 ir_node* after;
387
388                 DB((dbg, LEVEL_3, "===> Fallback: %+F aliases everything\n", node));
389
390                 ir_nodeset_init(&sync_set);
391                 for (i = 0; i < count_addrs; i++) {
392                         ir_nodeset_iterator_t iter;
393                         ir_node* mem;
394
395                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
396                         while ((mem = ir_nodeset_iterator_next(&iter)) != NULL) {
397                                 ir_nodeset_insert(&sync_set, mem);
398                         }
399                 }
400
401                 sync_arity = ir_nodeset_size(&sync_set);
402                 ir_nodeset_iterator_init(&sync_set_iter, &sync_set);
403                 if (sync_arity == 1) {
404                         after = ir_nodeset_iterator_next(&sync_set_iter);
405                 } else {
406                         ir_node** sync_in;
407
408                         NEW_ARR_A(ir_node*, sync_in, sync_arity);
409                         for (i = 0; i < sync_arity; i++) {
410                                 sync_in[i] = ir_nodeset_iterator_next(&sync_set_iter);
411                         }
412                         after = new_r_Sync(irg, block, sync_arity, sync_in);
413                 }
414                 set_irn_n(node, 0, after); // XXX unnice way to set the memory input
415
416                 for (i = 0; i < count_addrs; i++) {
417                         ir_nodeset_iterator_t iter;
418                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
419                         while (ir_nodeset_iterator_next(&iter) != NULL) {
420                                 ir_nodeset_remove_iterator(&addr_sets[i], &iter);
421                         }
422                         ir_nodeset_insert(&addr_sets[i], memory);
423                 }
424         }
425
426         return block_change;
427 }
428
429
430 static void Detotalise(ir_graph* irg)
431 {
432         ir_node* end_block = get_irg_end_block(irg);
433         size_t npreds = get_Block_n_cfgpreds(end_block);
434         size_t i;
435
436         for (i = 0; i < npreds; i++) {
437                 ir_node* pred = get_Block_cfgpred(end_block, i);
438                 assert(is_Return(pred));
439                 DB((dbg, LEVEL_2, "===> Starting memory walk at %+F\n", pred));
440                 WalkMem(irg, pred, NULL);
441         }
442 }
443
444
445 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
446 {
447         size_t n = get_Sync_n_preds(sync);
448         size_t i;
449
450         for (i = 0; i < n; i++) {
451                 ir_node* pred = get_Sync_pred(sync, i);
452                 if (is_Sync(pred)) {
453                         AddSyncPreds(preds, pred);
454                 } else {
455                         ir_nodeset_insert(preds, pred);
456                 }
457         }
458 }
459
460
461 static void NormaliseSync(ir_node* node, void* env)
462 {
463         ir_nodeset_t preds;
464         ir_nodeset_iterator_t iter;
465         ir_node** in;
466         size_t count_preds;
467         size_t i;
468
469         if (!is_Sync(node)) return;
470
471         ir_nodeset_init(&preds);
472         AddSyncPreds(&preds, node);
473
474         count_preds = ir_nodeset_size(&preds);
475         if (count_preds != get_Sync_n_preds(node)) {
476                 NEW_ARR_A(ir_node*, in, count_preds);
477                 ir_nodeset_iterator_init(&iter, &preds);
478                 for (i = 0; i < count_preds; i++) {
479                         ir_node* pred = ir_nodeset_iterator_next(&iter);
480                         assert(pred != NULL);
481                         in[i] = pred;
482                 }
483                 set_irn_in(node, count_preds, in);
484         }
485
486         ir_nodeset_destroy(&preds);
487 }
488
489
490 void opt_ldst2(ir_graph* irg)
491 {
492         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
493         DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
494
495         normalize_one_return(irg);
496
497         obstack_init(&obst);
498
499         if (1 /* XXX */ || get_opt_alias_analysis()) {
500                 assure_irg_address_taken_computed(irg);
501                 assure_irp_globals_address_taken_computed();
502         }
503
504
505         CollectAddresses(irg);
506         if (count_addrs == 0) return;
507
508         irg_block_walk_graph(irg, AliasSetAdder, NULL, NULL);
509         inc_irg_block_visited(irg);
510         SetStartAddressesTop(irg);
511         Detotalise(irg);
512
513         irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
514         obstack_free(&obst, NULL);
515
516         normalize_proj_nodes(irg);
517         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
518   optimize_graph_df(irg);
519         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
520 }