2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief parallelizing Load/Store optimisation
23 * @author Christoph Mallon
28 #include "iroptimize.h"
39 #include "irnodeset.h"
45 #include "opt_manage.h"
47 typedef struct parallelize_info
49 ir_node *origin_block;
52 ir_nodeset_t this_mem;
53 ir_nodeset_t user_mem;
56 static void parallelize_load(parallelize_info *pi, ir_node *irn)
58 /* There is no point in investigating the same subgraph twice */
59 if (ir_nodeset_contains(&pi->user_mem, irn))
62 if (get_nodes_block(irn) == pi->origin_block) {
64 ir_node *pred = get_Proj_pred(irn);
66 get_Load_volatility(pred) == volatility_non_volatile) {
67 ir_node *mem = get_Load_mem(pred);
68 //ir_nodeset_insert(&pi->this_mem, mem);
69 ir_nodeset_insert(&pi->user_mem, irn);
70 parallelize_load(pi, mem);
72 } else if (is_Store(pred) &&
73 get_Store_volatility(pred) == volatility_non_volatile) {
74 ir_mode *org_mode = pi->origin_mode;
75 ir_node *org_ptr = pi->origin_ptr;
76 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
77 ir_node *store_ptr = get_Store_ptr(pred);
78 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
79 ir_node *mem = get_Store_mem(pred);
80 ir_nodeset_insert(&pi->user_mem, irn);
81 parallelize_load(pi, mem);
85 } else if (is_Sync(irn)) {
86 int n = get_Sync_n_preds(irn);
89 for (i = 0; i < n; ++i) {
90 ir_node *sync_pred = get_Sync_pred(irn, i);
91 parallelize_load(pi, sync_pred);
96 ir_nodeset_insert(&pi->this_mem, irn);
99 static void parallelize_store(parallelize_info *pi, ir_node *irn)
101 /* There is no point in investigating the same subgraph twice */
102 if (ir_nodeset_contains(&pi->user_mem, irn))
105 //ir_fprintf(stderr, "considering %+F\n", irn);
106 if (get_nodes_block(irn) == pi->origin_block) {
108 ir_node *pred = get_Proj_pred(irn);
110 get_Load_volatility(pred) == volatility_non_volatile) {
111 ir_mode *org_mode = pi->origin_mode;
112 ir_node *org_ptr = pi->origin_ptr;
113 ir_mode *load_mode = get_Load_mode(pred);
114 ir_node *load_ptr = get_Load_ptr(pred);
115 if (get_alias_relation(org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
116 ir_node *mem = get_Load_mem(pred);
117 ir_nodeset_insert(&pi->user_mem, irn);
118 parallelize_store(pi, mem);
121 } else if (is_Store(pred) &&
122 get_Store_volatility(pred) == volatility_non_volatile) {
123 ir_mode *org_mode = pi->origin_mode;
124 ir_node *org_ptr = pi->origin_ptr;
125 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
126 ir_node *store_ptr = get_Store_ptr(pred);
127 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
130 ir_nodeset_insert(&pi->user_mem, irn);
131 mem = get_Store_mem(pred);
132 parallelize_store(pi, mem);
136 } else if (is_Sync(irn)) {
137 int n = get_Sync_n_preds(irn);
140 for (i = 0; i < n; ++i) {
141 ir_node *sync_pred = get_Sync_pred(irn, i);
142 parallelize_store(pi, sync_pred);
147 ir_nodeset_insert(&pi->this_mem, irn);
150 static void walker(ir_node *proj, void *env)
160 if (!is_Proj(proj)) return;
161 if (get_irn_mode(proj) != mode_M) return;
163 mem_op = get_Proj_pred(proj);
164 if (is_Load(mem_op)) {
165 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
167 block = get_nodes_block(mem_op);
168 pred = get_Load_mem(mem_op);
170 pi.origin_block = block,
171 pi.origin_ptr = get_Load_ptr(mem_op);
172 pi.origin_mode = get_Load_mode(mem_op);
173 ir_nodeset_init(&pi.this_mem);
174 ir_nodeset_init(&pi.user_mem);
176 parallelize_load(&pi, pred);
177 } else if (is_Store(mem_op)) {
178 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
180 block = get_nodes_block(mem_op);
181 pred = get_Store_mem(mem_op);
183 pi.origin_block = block,
184 pi.origin_ptr = get_Store_ptr(mem_op);
185 pi.origin_mode = get_irn_mode(get_Store_value(mem_op));
186 ir_nodeset_init(&pi.this_mem);
187 ir_nodeset_init(&pi.user_mem);
189 parallelize_store(&pi, pred);
194 n = ir_nodeset_size(&pi.user_mem);
195 if (n != 0) { /* nothing happened otherwise */
196 ir_graph *irg = get_irn_irg(block);
199 ir_nodeset_iterator_t iter;
203 NEW_ARR_A(ir_node*, in, n);
205 in[i++] = new_r_Unknown(irg, mode_M);
206 ir_nodeset_iterator_init(&iter, &pi.user_mem);
208 ir_node* p = ir_nodeset_iterator_next(&iter);
209 if (p == NULL) break;
213 sync = new_r_Sync(block, n, in);
214 exchange(proj, sync);
216 assert((long)pn_Load_M == (long)pn_Store_M);
217 proj = new_r_Proj(mem_op, mode_M, pn_Load_M);
218 set_Sync_pred(sync, 0, proj);
220 n = ir_nodeset_size(&pi.this_mem);
221 ir_nodeset_iterator_init(&iter, &pi.this_mem);
223 sync = ir_nodeset_iterator_next(&iter);
225 NEW_ARR_A(ir_node*, in, n);
228 ir_node* p = ir_nodeset_iterator_next(&iter);
229 if (p == NULL) break;
233 sync = new_r_Sync(block, n, in);
235 set_memop_mem(mem_op, sync);
238 ir_nodeset_destroy(&pi.this_mem);
239 ir_nodeset_destroy(&pi.user_mem);
242 static ir_graph_state_t do_parallelize_mem(ir_graph *irg)
244 irg_walk_graph(irg, NULL, walker, NULL);
249 static optdesc_t opt_parallel_mem = {
255 void opt_parallelize_mem(ir_graph *irg)
257 perform_irg_optimization(irg, &opt_parallel_mem);
260 ir_graph_pass_t *opt_parallelize_mem_pass(const char *name)
262 return def_graph_pass(name ? name : "parallelize-mem", opt_parallelize_mem);