becopyheur4: Clean up co_mst_irn_init().
[libfirm] / ir / opt / parallelize_mem.c
1 /*
2  * This file is part of libFirm.
3  * Copyright (C) 2012 University of Karlsruhe.
4  */
5
6 /**
7  * @file
8  * @brief   parallelizing Load/Store optimisation
9  * @author  Christoph Mallon
10  */
11 #include "config.h"
12
13 #include "iroptimize.h"
14
15 #include "array_t.h"
16 #include "debug.h"
17 #include "ircons.h"
18 #include "irgraph.h"
19 #include "irgmod.h"
20 #include "irgopt.h"
21 #include "irgwalk.h"
22 #include "irmemory.h"
23 #include "irnode.h"
24 #include "irnodeset.h"
25 #include "obst.h"
26 #include "irdump.h"
27 #include "irflag_t.h"
28 #include "irprintf.h"
29 #include "irpass.h"
30 #include "iredges.h"
31
32 typedef struct parallelize_info
33 {
34         ir_node      *origin_block;
35         ir_node      *origin_ptr;
36         ir_mode      *origin_mode;
37         ir_nodeset_t  this_mem;
38         ir_nodeset_t  user_mem;
39 } parallelize_info;
40
41 static void parallelize_load(parallelize_info *pi, ir_node *irn)
42 {
43         /* There is no point in investigating the same subgraph twice */
44         if (ir_nodeset_contains(&pi->user_mem, irn))
45                 return;
46
47         if (get_nodes_block(irn) == pi->origin_block) {
48                 if (is_Proj(irn)) {
49                         ir_node *pred = get_Proj_pred(irn);
50                         if (is_Load(pred) &&
51                                         get_Load_volatility(pred) == volatility_non_volatile) {
52                                 ir_node *mem = get_Load_mem(pred);
53                                 //ir_nodeset_insert(&pi->this_mem, mem);
54                                 ir_nodeset_insert(&pi->user_mem, irn);
55                                 parallelize_load(pi, mem);
56                                 return;
57                         } else if (is_Store(pred) &&
58                                         get_Store_volatility(pred) == volatility_non_volatile) {
59                                 ir_mode *org_mode   = pi->origin_mode;
60                                 ir_node *org_ptr    = pi->origin_ptr;
61                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
62                                 ir_node *store_ptr  = get_Store_ptr(pred);
63                                 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
64                                         ir_node *mem = get_Store_mem(pred);
65                                         ir_nodeset_insert(&pi->user_mem, irn);
66                                         parallelize_load(pi, mem);
67                                         return;
68                                 }
69                         }
70                 } else if (is_Sync(irn)) {
71                         int n = get_Sync_n_preds(irn);
72                         int i;
73
74                         for (i = 0; i < n; ++i) {
75                                 ir_node *sync_pred = get_Sync_pred(irn, i);
76                                 parallelize_load(pi, sync_pred);
77                         }
78                         return;
79                 }
80         }
81         ir_nodeset_insert(&pi->this_mem, irn);
82 }
83
84 static void parallelize_store(parallelize_info *pi, ir_node *irn)
85 {
86         /* There is no point in investigating the same subgraph twice */
87         if (ir_nodeset_contains(&pi->user_mem, irn))
88                 return;
89
90         //ir_fprintf(stderr, "considering %+F\n", irn);
91         if (get_nodes_block(irn) == pi->origin_block) {
92                 if (is_Proj(irn)) {
93                         ir_node *pred = get_Proj_pred(irn);
94                         if (is_Load(pred) &&
95                                         get_Load_volatility(pred) == volatility_non_volatile) {
96                                 ir_mode *org_mode  = pi->origin_mode;
97                                 ir_node *org_ptr   = pi->origin_ptr;
98                                 ir_mode *load_mode = get_Load_mode(pred);
99                                 ir_node *load_ptr  = get_Load_ptr(pred);
100                                 if (get_alias_relation(org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
101                                         ir_node *mem = get_Load_mem(pred);
102                                         ir_nodeset_insert(&pi->user_mem, irn);
103                                         parallelize_store(pi, mem);
104                                         return;
105                                 }
106                         } else if (is_Store(pred) &&
107                                         get_Store_volatility(pred) == volatility_non_volatile) {
108                                 ir_mode *org_mode   = pi->origin_mode;
109                                 ir_node *org_ptr    = pi->origin_ptr;
110                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
111                                 ir_node *store_ptr  = get_Store_ptr(pred);
112                                 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
113                                         ir_node *mem;
114
115                                         ir_nodeset_insert(&pi->user_mem, irn);
116                                         mem = get_Store_mem(pred);
117                                         parallelize_store(pi, mem);
118                                         return;
119                                 }
120                         }
121                 } else if (is_Sync(irn)) {
122                         int n = get_Sync_n_preds(irn);
123                         int i;
124
125                         for (i = 0; i < n; ++i) {
126                                 ir_node *sync_pred = get_Sync_pred(irn, i);
127                                 parallelize_store(pi, sync_pred);
128                         }
129                         return;
130                 }
131         }
132         ir_nodeset_insert(&pi->this_mem, irn);
133 }
134
135 static void walker(ir_node *proj, void *env)
136 {
137         ir_node          *mem_op;
138         ir_node          *pred;
139         ir_node          *block;
140         size_t            n;
141         parallelize_info  pi;
142
143         (void)env;
144
145         if (!is_Proj(proj)) return;
146         if (get_irn_mode(proj) != mode_M) return;
147
148         mem_op = get_Proj_pred(proj);
149         if (is_Load(mem_op)) {
150                 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
151
152                 block = get_nodes_block(mem_op);
153                 pred  = get_Load_mem(mem_op);
154
155                 pi.origin_block = block,
156                 pi.origin_ptr   = get_Load_ptr(mem_op);
157                 pi.origin_mode  = get_Load_mode(mem_op);
158                 ir_nodeset_init(&pi.this_mem);
159                 ir_nodeset_init(&pi.user_mem);
160
161                 parallelize_load(&pi, pred);
162         } else if (is_Store(mem_op)) {
163                 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
164
165                 block = get_nodes_block(mem_op);
166                 pred  = get_Store_mem(mem_op);
167
168                 pi.origin_block = block,
169                 pi.origin_ptr   = get_Store_ptr(mem_op);
170                 pi.origin_mode  = get_irn_mode(get_Store_value(mem_op));
171                 ir_nodeset_init(&pi.this_mem);
172                 ir_nodeset_init(&pi.user_mem);
173
174                 parallelize_store(&pi, pred);
175         } else {
176                 return;
177         }
178
179         n = ir_nodeset_size(&pi.user_mem);
180         if (n > 0) { /* nothing happened otherwise */
181                 ir_node  *sync;
182                 ir_node **in   = XMALLOCN(ir_node*, n+1);
183                 size_t    i;
184
185                 i = 0;
186                 in[i++] = proj;
187                 foreach_ir_nodeset(&pi.user_mem, node, iter) {
188                         in[i++] = node;
189                 }
190                 assert(i == n+1);
191                 sync = new_r_Sync(block, i, in);
192                 xfree(in);
193                 edges_reroute_except(proj, sync, sync);
194
195                 n = ir_nodeset_size(&pi.this_mem);
196                 if (n == 1) {
197                         sync = ir_nodeset_first(&pi.this_mem);
198                 } else {
199                         in = XMALLOCN(ir_node*, n);
200                         i = 0;
201                         foreach_ir_nodeset(&pi.this_mem, node, iter) {
202                                 in[i++] = node;
203                         }
204                         assert(i == n);
205                         sync = new_r_Sync(block, i, in);
206                 }
207                 set_memop_mem(mem_op, sync);
208         }
209
210         ir_nodeset_destroy(&pi.this_mem);
211         ir_nodeset_destroy(&pi.user_mem);
212 }
213
214 void opt_parallelize_mem(ir_graph *irg)
215 {
216         assure_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES);
217         irg_walk_graph(irg, NULL, walker, NULL);
218         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
219 }
220
221 ir_graph_pass_t *opt_parallelize_mem_pass(const char *name)
222 {
223         return def_graph_pass(name ? name : "parallelize-mem", opt_parallelize_mem);
224 }