2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief parallelizing Load/Store optimisation
9 * @author Christoph Mallon
13 #include "iroptimize.h"
24 #include "irnodeset.h"
32 typedef struct parallelize_info
34 ir_node *origin_block;
37 ir_nodeset_t this_mem;
38 ir_nodeset_t user_mem;
41 static void parallelize_load(parallelize_info *pi, ir_node *irn)
43 /* There is no point in investigating the same subgraph twice */
44 if (ir_nodeset_contains(&pi->user_mem, irn))
47 if (get_nodes_block(irn) == pi->origin_block) {
49 ir_node *pred = get_Proj_pred(irn);
51 get_Load_volatility(pred) == volatility_non_volatile) {
52 ir_node *mem = get_Load_mem(pred);
53 //ir_nodeset_insert(&pi->this_mem, mem);
54 ir_nodeset_insert(&pi->user_mem, irn);
55 parallelize_load(pi, mem);
57 } else if (is_Store(pred) &&
58 get_Store_volatility(pred) == volatility_non_volatile) {
59 ir_mode *org_mode = pi->origin_mode;
60 ir_node *org_ptr = pi->origin_ptr;
61 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
62 ir_node *store_ptr = get_Store_ptr(pred);
63 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
64 ir_node *mem = get_Store_mem(pred);
65 ir_nodeset_insert(&pi->user_mem, irn);
66 parallelize_load(pi, mem);
70 } else if (is_Sync(irn)) {
71 int n = get_Sync_n_preds(irn);
74 for (i = 0; i < n; ++i) {
75 ir_node *sync_pred = get_Sync_pred(irn, i);
76 parallelize_load(pi, sync_pred);
81 ir_nodeset_insert(&pi->this_mem, irn);
84 static void parallelize_store(parallelize_info *pi, ir_node *irn)
86 /* There is no point in investigating the same subgraph twice */
87 if (ir_nodeset_contains(&pi->user_mem, irn))
90 //ir_fprintf(stderr, "considering %+F\n", irn);
91 if (get_nodes_block(irn) == pi->origin_block) {
93 ir_node *pred = get_Proj_pred(irn);
95 get_Load_volatility(pred) == volatility_non_volatile) {
96 ir_mode *org_mode = pi->origin_mode;
97 ir_node *org_ptr = pi->origin_ptr;
98 ir_mode *load_mode = get_Load_mode(pred);
99 ir_node *load_ptr = get_Load_ptr(pred);
100 if (get_alias_relation(org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
101 ir_node *mem = get_Load_mem(pred);
102 ir_nodeset_insert(&pi->user_mem, irn);
103 parallelize_store(pi, mem);
106 } else if (is_Store(pred) &&
107 get_Store_volatility(pred) == volatility_non_volatile) {
108 ir_mode *org_mode = pi->origin_mode;
109 ir_node *org_ptr = pi->origin_ptr;
110 ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
111 ir_node *store_ptr = get_Store_ptr(pred);
112 if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
115 ir_nodeset_insert(&pi->user_mem, irn);
116 mem = get_Store_mem(pred);
117 parallelize_store(pi, mem);
121 } else if (is_Sync(irn)) {
122 int n = get_Sync_n_preds(irn);
125 for (i = 0; i < n; ++i) {
126 ir_node *sync_pred = get_Sync_pred(irn, i);
127 parallelize_store(pi, sync_pred);
132 ir_nodeset_insert(&pi->this_mem, irn);
135 static void walker(ir_node *proj, void *env)
145 if (!is_Proj(proj)) return;
146 if (get_irn_mode(proj) != mode_M) return;
148 mem_op = get_Proj_pred(proj);
149 if (is_Load(mem_op)) {
150 if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
152 block = get_nodes_block(mem_op);
153 pred = get_Load_mem(mem_op);
155 pi.origin_block = block,
156 pi.origin_ptr = get_Load_ptr(mem_op);
157 pi.origin_mode = get_Load_mode(mem_op);
158 ir_nodeset_init(&pi.this_mem);
159 ir_nodeset_init(&pi.user_mem);
161 parallelize_load(&pi, pred);
162 } else if (is_Store(mem_op)) {
163 if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
165 block = get_nodes_block(mem_op);
166 pred = get_Store_mem(mem_op);
168 pi.origin_block = block,
169 pi.origin_ptr = get_Store_ptr(mem_op);
170 pi.origin_mode = get_irn_mode(get_Store_value(mem_op));
171 ir_nodeset_init(&pi.this_mem);
172 ir_nodeset_init(&pi.user_mem);
174 parallelize_store(&pi, pred);
179 n = ir_nodeset_size(&pi.user_mem);
180 if (n > 0) { /* nothing happened otherwise */
182 ir_node **in = XMALLOCN(ir_node*, n+1);
187 foreach_ir_nodeset(&pi.user_mem, node, iter) {
191 sync = new_r_Sync(block, i, in);
193 edges_reroute_except(proj, sync, sync);
195 n = ir_nodeset_size(&pi.this_mem);
197 sync = ir_nodeset_first(&pi.this_mem);
199 in = XMALLOCN(ir_node*, n);
201 foreach_ir_nodeset(&pi.this_mem, node, iter) {
205 sync = new_r_Sync(block, i, in);
207 set_memop_mem(mem_op, sync);
210 ir_nodeset_destroy(&pi.this_mem);
211 ir_nodeset_destroy(&pi.user_mem);
214 void opt_parallelize_mem(ir_graph *irg)
216 assure_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES);
217 irg_walk_graph(irg, NULL, walker, NULL);
218 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
221 ir_graph_pass_t *opt_parallelize_mem_pass(const char *name)
223 return def_graph_pass(name ? name : "parallelize-mem", opt_parallelize_mem);