c9bfcf5181001c9bad0799b6de2a6493563aa97c
[libfirm] / ir / opt / ldst2.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   parallelizing Load/Store optimisation
23  * @author  Christoph Mallon
24  * @version $Id$
25  */
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif
29
30 #include "array.h"
31 #include "debug.h"
32 #include "ircons.h"
33 #include "irgraph.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irmemory.h"
38 #include "irnode.h"
39 #include "irnodeset.h"
40 #include "ldst2.h"
41 #include "obst.h"
42 #include "return.h"
43 #include "irdump.h"
44
45
46 #define OPTIMISE_LOAD_AFTER_LOAD
47
48
49 #define UNIMPLEMENTED abort();
50
51
52 DEBUG_ONLY(static firm_dbg_module_t *dbg);
53
54
55 static struct obstack obst;
56 static size_t count_addrs;
57 static ir_node** addrs;
58
59
60 static void AddressCollector(ir_node* node, void* env)
61 {
62         ir_nodeset_t* addrs_set = env;
63         ir_node* addr;
64         if (is_Load(node)) {
65                 addr = get_Load_ptr(node);
66         } else if (is_Store(node)) {
67                 addr = get_Store_ptr(node);
68         } else {
69                 return;
70         }
71         ir_nodeset_insert(addrs_set, addr);
72 }
73
74
75 /* Collects all unique addresses used by load and store nodes of a graph and
76  * puts them into an array for later use */
77 static void CollectAddresses(ir_graph* irg)
78 {
79         ir_nodeset_t addrs_set;
80
81         ir_nodeset_init(&addrs_set);
82         irg_walk_graph(irg, AddressCollector, NULL, &addrs_set);
83
84         count_addrs = ir_nodeset_size(&addrs_set);
85         DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (unsigned int)count_addrs));
86         if (count_addrs != 0) {
87                 ir_nodeset_iterator_t addr_iter;
88                 size_t i;
89
90                 addrs = NEW_ARR_D(ir_node*, &obst, count_addrs);
91                 ir_nodeset_iterator_init(&addr_iter, &addrs_set);
92                 for (i = 0; i < count_addrs; i++) {
93                         ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
94                         assert(addr != NULL);
95                         set_irn_link(addr, (void *)i);
96                         addrs[i] = addr;
97                         DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
98                 }
99         }
100 }
101
102
103 static void AliasSetAdder(ir_node* block, void* env)
104 {
105         ir_nodeset_t* alias_set;
106         size_t i;
107
108         alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
109         for (i = 0; i < count_addrs; i++) {
110                 ir_nodeset_init(&alias_set[i]);
111         }
112         set_irn_link(block, alias_set);
113 }
114
115
116 static void SetStartAddressesTop(ir_graph* irg)
117 {
118         ir_node* initial_mem;
119         ir_node* start_block;
120         ir_nodeset_t* start_addrs;
121         size_t i;
122
123         initial_mem = get_irg_initial_mem(irg);
124         start_block = get_irg_start_block(irg);
125         start_addrs = get_irn_link(start_block);
126         for (i = 0; i < count_addrs; i++) {
127                 ir_nodeset_insert(&start_addrs[i], initial_mem);
128         }
129         mark_Block_block_visited(start_block);
130 }
131
132
133 static void AliasSetDestroyer(ir_node* block, void* env)
134 {
135         ir_nodeset_t* alias_set = get_irn_link(block);
136         size_t i;
137
138         for (i = 0; i < count_addrs; i++) {
139                 ir_nodeset_destroy(&alias_set[i]);
140         }
141 }
142
143
144 static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, ir_node* other)
145 {
146         ir_node* other_addr;
147         ir_mode* other_mode;
148
149         if (is_Proj(other)) other = get_Proj_pred(other);
150
151         if (is_Load(other)) {
152                 other_addr = get_Load_ptr(other);
153         } else if (is_Store(other)) {
154                 other_addr = get_Store_ptr(other);
155         } else {
156                 return may_alias;
157         }
158
159         other_mode = get_irn_mode(other);
160         return get_alias_relation(irg, addr, mode, other_addr, other_mode);
161 }
162
163
164 static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set)
165 {
166         size_t set_size = ir_nodeset_size(after_set);
167         ir_nodeset_iterator_t iter;
168
169         assert(set_size != 0);
170
171         ir_nodeset_iterator_init(&iter, after_set);
172         if (set_size == 1) {
173                 return ir_nodeset_iterator_next(&iter);
174         } else {
175                 ir_node** in;
176                 size_t i;
177
178                 NEW_ARR_A(ir_node*, in, set_size);
179                 for (i = 0; i < set_size; i++) {
180                         in[i] = ir_nodeset_iterator_next(&iter);
181                 }
182                 return new_r_Sync(irg, block, set_size, in);
183         }
184 }
185
186
187 static ir_node** unfinished_phis;
188
189
190 static void PlaceMemPhis(ir_graph* irg, ir_node* block, ir_node* phi)
191 {
192         int unfinished = 0;
193         size_t block_n_preds = get_Block_n_cfgpreds(block);
194         ir_nodeset_t* thissets;
195         ir_node** in;
196         size_t i;
197         size_t j;
198
199         thissets = get_irn_link(block);
200         NEW_ARR_A(ir_node*, in, block_n_preds);
201         for (j = 0; j < count_addrs; j++) {
202                 ir_node* new_phi;
203
204                 for (i = 0; i < block_n_preds; i++) {
205                         ir_node* pred_block = get_nodes_block(get_Phi_pred(phi, i)); // TODO get_Block_cfgpred_block(block, i);
206                         ir_nodeset_t* predsets = get_irn_link(pred_block);
207                         size_t predset_size = ir_nodeset_size(&predsets[j]);
208
209                         if (predset_size == 0) {
210                                 in[i] = new_r_Unknown(irg, mode_M);
211                                 unfinished = 1;
212                         } else {
213                                 in[i] = GenerateSync(irg, pred_block, &predsets[j]);
214                         }
215                 }
216                 new_phi = new_r_Phi(irg, block, block_n_preds, in, mode_M);
217                 if (unfinished) {
218                         set_irn_link(new_phi, unfinished_phis[j]);
219                         unfinished_phis[j] = new_phi;
220                 }
221                 ir_nodeset_insert(&thissets[j], new_phi);
222         }
223 }
224
225
226 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
227
228
229 static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
230 {
231         size_t n = get_Phi_n_preds(phi);
232         size_t i;
233
234         for (i = 0; i < n; i++) {
235                 WalkMem(irg, get_Phi_pred(phi, i), block);
236         }
237
238         PlaceMemPhis(irg, block, phi);
239         exchange(phi, new_Bad());
240 }
241
242
243 static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
244 {
245         ir_node* addr = get_Load_ptr(load);
246         size_t addr_idx = (size_t)get_irn_link(addr);
247         ir_nodeset_t* interfere_sets = get_irn_link(block);
248         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
249         size_t size = ir_nodeset_size(interfere_set);
250         ir_nodeset_iterator_t interfere_iter;
251         size_t i;
252
253         assert(size > 0);
254         ir_nodeset_iterator_init(&interfere_iter, interfere_set);
255         if (size == 1) {
256                 ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
257                 if (is_Proj(after)) {
258                         ir_node* pred = get_Proj_pred(after);
259                         if (is_Load(pred)) {
260 #ifdef OPTIMISE_LOAD_AFTER_LOAD
261                                 if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
262                                         exchange(load, pred);
263                                         return;
264                                 }
265 #endif
266                                 after = get_Load_mem(pred);
267                         }
268                 }
269                 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
270                 set_Load_mem(load, after);
271         } else {
272                 ir_node** after_set;
273                 ir_node* sync;
274
275                 NEW_ARR_A(ir_node*, after_set, size);
276                 for (i = 0; i < size; i++) {
277                         ir_node* mem = ir_nodeset_iterator_next(&interfere_iter);
278                         if (is_Proj(mem)) {
279                                 ir_node* pred = get_Proj_pred(mem);
280                                 if (is_Load(pred)) {
281 #ifdef OPTIMISE_LOAD_AFTER_LOAD
282                                         if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
283                                                 exchange(load, pred);
284                                                 return;
285                                         }
286 #endif
287                                         mem = get_Load_mem(pred);
288                                 }
289                         }
290                         after_set[i] = mem;
291                         sync = new_r_Sync(irg, block, size, after_set);
292                 }
293                 set_Load_mem(load, sync);
294         }
295
296         for (i = 0; i < count_addrs; i++) {
297                 ir_mode* mode = get_Load_mode(load);
298                 ir_node* other_addr = addrs[i];
299                 ir_mode* other_mode = mode; // XXX second mode is nonsense
300                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
301                 ir_node* other_node;
302
303                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
304                 if (rel == no_alias) {
305                         continue;
306                 }
307                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
308
309                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
310                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
311                         if (is_Proj(other_node) && is_Load(get_Proj_pred(other_node))) continue;
312                         if (AliasTest(irg, addr, mode, other_node) != no_alias) {
313                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], load));
314                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
315                         }
316                 }
317
318                 ir_nodeset_insert(&interfere_sets[i], memory);
319         }
320 }
321
322
323 static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
324 {
325         ir_node* addr = get_Store_ptr(store);
326         size_t addr_idx = (size_t)get_irn_link(addr);
327         ir_nodeset_t* interfere_sets = get_irn_link(block);
328         ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
329         ir_node* after;
330         size_t i;
331
332         after = GenerateSync(irg, block, interfere_set);
333         set_Store_mem(store, after);
334
335         for (i = 0; i < count_addrs; i++) {
336                 ir_nodeset_iterator_t interfere_iter;
337                 ir_mode* mode = get_irn_mode(get_Store_value(store));
338                 ir_node* other_addr = addrs[i];
339                 ir_mode* other_mode = mode; // XXX second mode is nonsense
340                 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
341                 ir_node* other_node;
342
343                 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
344                 if (rel == no_alias) {
345                         continue;
346                 }
347                 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
348
349                 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
350                 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
351                         if (AliasTest(irg, addr, mode, other_node) != no_alias) {
352                                 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
353                                 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
354                         }
355                 }
356
357                 ir_nodeset_insert(&interfere_sets[i], memory);
358         }
359 }
360
361
362 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
363 {
364         int block_change = 0;
365         ir_node* block = get_nodes_block(node);
366         ir_node* pred;
367         ir_node* memory = node;
368         ir_nodeset_t* addr_sets;
369
370         if (block != last_block) {
371                 DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block));
372                 block_change = 1;
373                 if (Block_not_block_visited(block)) {
374                         mark_Block_block_visited(block);
375                 } else {
376                         DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
377                         return block_change;
378                 }
379         }
380
381         // Skip projs
382         if (is_Proj(node)) node = get_Proj_pred(node);
383
384         if (is_Phi(node)) {
385                 WalkMemPhi(irg, block, node);
386                 return block_change;
387         } else if (is_Sync(node)) {
388                 UNIMPLEMENTED
389         } else if (is_Return(node)) {
390                 pred = get_Return_mem(node);
391         } else {
392                 pred = get_fragile_op_mem(node);
393         }
394
395         if (WalkMem(irg, pred, block)) {
396                 // There was a block change
397                 size_t block_arity = get_Block_n_cfgpreds(block);
398
399                 DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node));
400                 if (block_arity == 1) {
401                         // Just one predecessor, inherit its alias sets
402                         ir_node* pred_block = get_nodes_block(pred);
403                         ir_nodeset_t* predsets = get_irn_link(pred_block);
404                         ir_nodeset_t* thissets = get_irn_link(block);
405                         size_t i;
406
407                         DB((dbg, LEVEL_3, "===> Copying the only predecessor's address sets\n"));
408
409                         if (ir_nodeset_size(&predsets[0]) == 0) {
410                                 ir_node* unknown;
411
412                                 DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n"));
413                                 assert(!Block_not_block_visited(pred_block));
414
415                                 unknown = new_r_Unknown(irg, mode_M);
416                                 for (i = 0; i < count_addrs; i++) {
417                                         ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M);
418                                         set_irn_link(phi_unk, unfinished_phis[i]);
419                                         unfinished_phis[i] = phi_unk;
420                                         ir_nodeset_insert(&thissets[i], phi_unk);
421                                 }
422                         } else {
423                                 for (i = 0; i < count_addrs; i++) {
424                                         ir_nodeset_iterator_t prediter;
425                                         ir_node* addr;
426
427                                         ir_nodeset_iterator_init(&prediter, &predsets[i]);
428                                         while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
429                                                 ir_nodeset_insert(&thissets[i], addr);
430                                         }
431                                 }
432                         }
433                 }
434         }
435
436         DB((dbg, LEVEL_3, "===> Detotalising %+F\n", node));
437
438         addr_sets = get_irn_link(block);
439
440         if (is_Load(node)) {
441                 PlaceLoad(irg, block, node, memory);
442         } else if (is_Store(node)) {
443                 PlaceStore(irg, block, node, memory);
444         } else {
445                 ir_nodeset_t sync_set;
446                 size_t i;
447                 ir_node* after;
448
449                 DB((dbg, LEVEL_3, "===> Fallback: %+F aliases everything\n", node));
450
451                 ir_nodeset_init(&sync_set);
452                 for (i = 0; i < count_addrs; i++) {
453                         ir_nodeset_iterator_t iter;
454                         ir_node* mem;
455
456                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
457                         while ((mem = ir_nodeset_iterator_next(&iter)) != NULL) {
458                                 ir_nodeset_insert(&sync_set, mem);
459                         }
460                 }
461
462                 after = GenerateSync(irg, block, &sync_set);
463                 set_irn_n(node, 0, after); // XXX unnice way to set the memory input
464
465                 for (i = 0; i < count_addrs; i++) {
466                         ir_nodeset_iterator_t iter;
467                         ir_nodeset_iterator_init(&iter, &addr_sets[i]);
468                         while (ir_nodeset_iterator_next(&iter) != NULL) {
469                                 ir_nodeset_remove_iterator(&addr_sets[i], &iter);
470                         }
471                         ir_nodeset_insert(&addr_sets[i], memory);
472                 }
473         }
474
475         return block_change;
476 }
477
478
479 static void FinalisePhis(ir_graph* irg)
480 {
481         size_t i;
482
483         for (i = 0; i < count_addrs; i++) {
484                 ir_node* next_phi;
485                 ir_node* phi;
486
487                 for (phi = unfinished_phis[i]; phi != NULL; phi = next_phi) {
488                         ir_node* block = get_nodes_block(phi);
489                         size_t block_n_preds = get_Block_n_cfgpreds(block);
490
491                         next_phi = get_irn_link(phi);
492
493                         DB((dbg, LEVEL_4, "===> Finialising phi %+F in %+F\n", phi, block));
494
495                         if (block_n_preds == 1) {
496                                 ir_node* pred_block = get_Block_cfgpred_block(block, 0);
497                                 ir_nodeset_t* pred_sets = get_irn_link(pred_block);
498                                 ir_node* after = GenerateSync(irg, pred_block, &pred_sets[i]);
499
500                                 assert(is_Unknown(get_Phi_pred(phi, 0)));
501                                 exchange(phi, after);
502                         } else {
503                                 ir_node** in;
504                                 size_t j;
505
506                                 NEW_ARR_A(ir_node*, in, block_n_preds);
507                                 for (j = 0; j < block_n_preds; j++) {
508                                         ir_node* pred_block = get_Block_cfgpred_block(block, j);
509                                         ir_nodeset_t* pred_sets = get_irn_link(pred_block);
510
511                                         if (is_Unknown(get_Phi_pred(phi, j))) {
512                                                 set_Phi_pred(phi, j, GenerateSync(irg, pred_block, &pred_sets[i]));
513                                         }
514                                 }
515                         }
516                 }
517         }
518 }
519
520
521 static void Detotalise(ir_graph* irg)
522 {
523         ir_node* end_block = get_irg_end_block(irg);
524         size_t npreds = get_Block_n_cfgpreds(end_block);
525         size_t i;
526
527         unfinished_phis = xmalloc(sizeof(*unfinished_phis) * count_addrs);
528         for (i = 0; i < count_addrs; i++) {
529                 unfinished_phis[i] = NULL;
530         }
531
532         for (i = 0; i < npreds; i++) {
533                 ir_node* pred = get_Block_cfgpred(end_block, i);
534                 assert(is_Return(pred));
535                 DB((dbg, LEVEL_2, "===> Starting memory walk at %+F\n", pred));
536                 WalkMem(irg, pred, NULL);
537         }
538
539         FinalisePhis(irg);
540         xfree(unfinished_phis);
541 }
542
543
544 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
545 {
546         size_t n = get_Sync_n_preds(sync);
547         size_t i;
548
549         for (i = 0; i < n; i++) {
550                 ir_node* pred = get_Sync_pred(sync, i);
551                 if (is_Sync(pred)) {
552                         AddSyncPreds(preds, pred);
553                 } else {
554                         ir_nodeset_insert(preds, pred);
555                 }
556         }
557 }
558
559
560 static void NormaliseSync(ir_node* node, void* env)
561 {
562         ir_nodeset_t preds;
563         ir_nodeset_iterator_t iter;
564         ir_node** in;
565         size_t count_preds;
566         size_t i;
567
568         if (!is_Sync(node)) return;
569
570         ir_nodeset_init(&preds);
571         AddSyncPreds(&preds, node);
572
573         count_preds = ir_nodeset_size(&preds);
574         if (count_preds != get_Sync_n_preds(node)) {
575                 NEW_ARR_A(ir_node*, in, count_preds);
576                 ir_nodeset_iterator_init(&iter, &preds);
577                 for (i = 0; i < count_preds; i++) {
578                         ir_node* pred = ir_nodeset_iterator_next(&iter);
579                         assert(pred != NULL);
580                         in[i] = pred;
581                 }
582                 set_irn_in(node, count_preds, in);
583         }
584
585         ir_nodeset_destroy(&preds);
586 }
587
588
589 void opt_ldst2(ir_graph* irg)
590 {
591         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
592         DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
593
594         normalize_one_return(irg);
595
596         obstack_init(&obst);
597
598         if (1 /* XXX */ || get_opt_alias_analysis()) {
599                 assure_irg_address_taken_computed(irg);
600                 assure_irp_globals_address_taken_computed();
601         }
602
603
604         CollectAddresses(irg);
605         if (count_addrs == 0) return;
606
607         irg_block_walk_graph(irg, AliasSetAdder, NULL, NULL);
608         inc_irg_block_visited(irg);
609         SetStartAddressesTop(irg);
610         Detotalise(irg);
611
612         dump_ir_block_graph(irg, "-fluffig");
613
614         irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
615         obstack_free(&obst, NULL);
616
617         normalize_proj_nodes(irg);
618         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
619   optimize_graph_df(irg);
620         irg_walk_graph(irg, NormaliseSync, NULL, NULL);
621 }