Make spiller more deterministic.
[libfirm] / ir / opt / parallelize_mem.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   parallelizing Load/Store optimisation
23  * @author  Christoph Mallon
24  */
25 #include "config.h"
26
27 #include "iroptimize.h"
28
29 #include "array_t.h"
30 #include "debug.h"
31 #include "ircons.h"
32 #include "irgraph.h"
33 #include "irgmod.h"
34 #include "irgopt.h"
35 #include "irgwalk.h"
36 #include "irmemory.h"
37 #include "irnode.h"
38 #include "irnodeset.h"
39 #include "obst.h"
40 #include "irdump.h"
41 #include "irflag_t.h"
42 #include "irprintf.h"
43 #include "irpass.h"
44 #include "iredges.h"
45
46 typedef struct parallelize_info
47 {
48         ir_node      *origin_block;
49         ir_node      *origin_ptr;
50         ir_mode      *origin_mode;
51         ir_nodeset_t  this_mem;
52         ir_nodeset_t  user_mem;
53 } parallelize_info;
54
55 static void parallelize_load(parallelize_info *pi, ir_node *irn)
56 {
57         /* There is no point in investigating the same subgraph twice */
58         if (ir_nodeset_contains(&pi->user_mem, irn))
59                 return;
60
61         if (get_nodes_block(irn) == pi->origin_block) {
62                 if (is_Proj(irn)) {
63                         ir_node *pred = get_Proj_pred(irn);
64                         if (is_Load(pred) &&
65                                         get_Load_volatility(pred) == volatility_non_volatile) {
66                                 ir_node *mem = get_Load_mem(pred);
67                                 //ir_nodeset_insert(&pi->this_mem, mem);
68                                 ir_nodeset_insert(&pi->user_mem, irn);
69                                 parallelize_load(pi, mem);
70                                 return;
71                         } else if (is_Store(pred) &&
72                                         get_Store_volatility(pred) == volatility_non_volatile) {
73                                 ir_mode *org_mode   = pi->origin_mode;
74                                 ir_node *org_ptr    = pi->origin_ptr;
75                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
76                                 ir_node *store_ptr  = get_Store_ptr(pred);
77                                 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
78                                         ir_node *mem = get_Store_mem(pred);
79                                         ir_nodeset_insert(&pi->user_mem, irn);
80                                         parallelize_load(pi, mem);
81                                         return;
82                                 }
83                         }
84                 } else if (is_Sync(irn)) {
85                         int n = get_Sync_n_preds(irn);
86                         int i;
87
88                         for (i = 0; i < n; ++i) {
89                                 ir_node *sync_pred = get_Sync_pred(irn, i);
90                                 parallelize_load(pi, sync_pred);
91                         }
92                         return;
93                 }
94         }
95         ir_nodeset_insert(&pi->this_mem, irn);
96 }
97
98 static void parallelize_store(parallelize_info *pi, ir_node *irn)
99 {
100         /* There is no point in investigating the same subgraph twice */
101         if (ir_nodeset_contains(&pi->user_mem, irn))
102                 return;
103
104         //ir_fprintf(stderr, "considering %+F\n", irn);
105         if (get_nodes_block(irn) == pi->origin_block) {
106                 if (is_Proj(irn)) {
107                         ir_node *pred = get_Proj_pred(irn);
108                         if (is_Load(pred) &&
109                                         get_Load_volatility(pred) == volatility_non_volatile) {
110                                 ir_mode *org_mode  = pi->origin_mode;
111                                 ir_node *org_ptr   = pi->origin_ptr;
112                                 ir_mode *load_mode = get_Load_mode(pred);
113                                 ir_node *load_ptr  = get_Load_ptr(pred);
114                                 if (get_alias_relation(org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
115                                         ir_node *mem = get_Load_mem(pred);
116                                         ir_nodeset_insert(&pi->user_mem, irn);
117                                         parallelize_store(pi, mem);
118                                         return;
119                                 }
120                         } else if (is_Store(pred) &&
121                                         get_Store_volatility(pred) == volatility_non_volatile) {
122                                 ir_mode *org_mode   = pi->origin_mode;
123                                 ir_node *org_ptr    = pi->origin_ptr;
124                                 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
125                                 ir_node *store_ptr  = get_Store_ptr(pred);
126                                 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
127                                         ir_node *mem;
128
129                                         ir_nodeset_insert(&pi->user_mem, irn);
130                                         mem = get_Store_mem(pred);
131                                         parallelize_store(pi, mem);
132                                         return;
133                                 }
134                         }
135                 } else if (is_Sync(irn)) {
136                         int n = get_Sync_n_preds(irn);
137                         int i;
138
139                         for (i = 0; i < n; ++i) {
140                                 ir_node *sync_pred = get_Sync_pred(irn, i);
141                                 parallelize_store(pi, sync_pred);
142                         }
143                         return;
144                 }
145         }
146         ir_nodeset_insert(&pi->this_mem, irn);
147 }
148
149 static void walker(ir_node *proj, void *env)
150 {
151         ir_node          *mem_op;
152         ir_node          *pred;
153         ir_node          *block;
154         size_t            n;
155         parallelize_info  pi;
156
157         (void)env;
158
159         if (!is_Proj(proj)) return;
160         if (get_irn_mode(proj) != mode_M) return;
161
162         mem_op = get_Proj_pred(proj);
163         if (is_Load(mem_op)) {
164                 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
165
166                 block = get_nodes_block(mem_op);
167                 pred  = get_Load_mem(mem_op);
168
169                 pi.origin_block = block,
170                 pi.origin_ptr   = get_Load_ptr(mem_op);
171                 pi.origin_mode  = get_Load_mode(mem_op);
172                 ir_nodeset_init(&pi.this_mem);
173                 ir_nodeset_init(&pi.user_mem);
174
175                 parallelize_load(&pi, pred);
176         } else if (is_Store(mem_op)) {
177                 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
178
179                 block = get_nodes_block(mem_op);
180                 pred  = get_Store_mem(mem_op);
181
182                 pi.origin_block = block,
183                 pi.origin_ptr   = get_Store_ptr(mem_op);
184                 pi.origin_mode  = get_irn_mode(get_Store_value(mem_op));
185                 ir_nodeset_init(&pi.this_mem);
186                 ir_nodeset_init(&pi.user_mem);
187
188                 parallelize_store(&pi, pred);
189         } else {
190                 return;
191         }
192
193         n = ir_nodeset_size(&pi.user_mem);
194         if (n > 0) { /* nothing happened otherwise */
195                 ir_node                *sync;
196                 ir_node               **in   = XMALLOCN(ir_node*, n+1);
197                 ir_node                *node;
198                 ir_nodeset_iterator_t   iter;
199                 size_t                  i;
200
201                 i = 0;
202                 in[i++] = proj;
203                 foreach_ir_nodeset(&pi.user_mem, node, iter) {
204                         in[i++] = node;
205                 }
206                 assert(i == n+1);
207                 sync = new_r_Sync(block, i, in);
208                 xfree(in);
209                 edges_reroute_except(proj, sync, sync);
210
211                 n = ir_nodeset_size(&pi.this_mem);
212                 if (n == 1) {
213                         ir_nodeset_iterator_init(&iter, &pi.this_mem);
214                         sync = ir_nodeset_iterator_next(&iter);
215                 } else {
216                         in = XMALLOCN(ir_node*, n);
217                         i = 0;
218                         foreach_ir_nodeset(&pi.this_mem, node, iter) {
219                                 in[i++] = node;
220                         }
221                         assert(i == n);
222                         sync = new_r_Sync(block, i, in);
223                 }
224                 set_memop_mem(mem_op, sync);
225         }
226
227         ir_nodeset_destroy(&pi.this_mem);
228         ir_nodeset_destroy(&pi.user_mem);
229 }
230
231 void opt_parallelize_mem(ir_graph *irg)
232 {
233         assure_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES);
234         irg_walk_graph(irg, NULL, walker, NULL);
235         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
236 }
237
238 ir_graph_pass_t *opt_parallelize_mem_pass(const char *name)
239 {
240         return def_graph_pass(name ? name : "parallelize-mem", opt_parallelize_mem);
241 }