Lines 33-281
Boston, MA 02110-1301, USA. */
Link Here
|
33 |
#include "tree-gimple.h" |
33 |
#include "tree-gimple.h" |
34 |
#include "tree-dump.h" |
34 |
#include "tree-dump.h" |
35 |
#include "timevar.h" |
35 |
#include "timevar.h" |
36 |
#include "hashtab.h" |
|
|
37 |
#include "tree-iterator.h" |
36 |
#include "tree-iterator.h" |
38 |
#include "tree-pass.h" |
37 |
#include "tree-pass.h" |
|
|
38 |
#include "alloc-pool.h" |
39 |
#include "vec.h" |
40 |
#include "langhooks.h" |
39 |
|
41 |
|
40 |
/* This is a simple global reassociation pass that uses a combination |
42 |
/* This is a simple global reassociation pass. It is, in part, based |
41 |
of heuristics and a hashtable to try to expose more operations to |
43 |
on the LLVM pass of the same name (They do some things more/less |
42 |
CSE. |
44 |
than we do, in different orders, etc). |
43 |
|
|
|
44 |
The basic idea behind the heuristic is to rank expressions by |
45 |
depth of the computation tree and loop depth, and try to produce |
46 |
expressions consisting of small rank operations, as they are more |
47 |
likely to reoccur. In addition, we use a hashtable to try to see |
48 |
if we can transpose an operation into something we have seen |
49 |
before. |
50 |
|
51 |
Note that the way the hashtable is structured will sometimes find |
52 |
matches that will not expose additional redundancies, since it is |
53 |
not unwound as we traverse back up one branch of the dominator |
54 |
tree and down another. However, the cost of improving this is |
55 |
probably not worth the additional benefits it will bring. */ |
56 |
|
45 |
|
57 |
/* Statistics */ |
46 |
It consists of five steps: |
58 |
static struct |
|
|
59 |
{ |
60 |
int reassociated_by_rank; |
61 |
int reassociated_by_match; |
62 |
} reassociate_stats; |
63 |
|
47 |
|
|
|
48 |
1. Breaking up subtract operations into addition + negate, where |
49 |
it would promote the reassociation of adds. |
64 |
|
50 |
|
|
|
51 |
2. Left linearization of the expression trees, so that (A+B)+(C+D) |
52 |
becomes (((A+B)+C)+D), which is easier for us to rewrite later. |
53 |
During linearization, we place the operands of the binary |
54 |
expressions into the a vector of operand_entry_t |
65 |
|
55 |
|
66 |
/* Seen binary operator hashtable. */ |
56 |
3. Optimization of the operand lists, eliminating things like a + |
67 |
static htab_t seen_binops; |
57 |
-a, a & a, etc. |
68 |
|
58 |
|
69 |
/* Binary operator struct. */ |
59 |
4. Rewrite the expression trees we linearized and optimized so |
|
|
60 |
they are in proper rank order. |
70 |
|
61 |
|
71 |
typedef struct seen_binop_d |
62 |
5. Repropagate negates, as nothing else will clean it up ATM. |
72 |
{ |
|
|
73 |
tree op1; |
74 |
tree op2; |
75 |
} *seen_binop_t; |
76 |
|
63 |
|
77 |
/* Return a SEEN_BINOP_T if we have seen an associative binary |
64 |
A bit of theory on #4, since nobody seems to write anything down |
78 |
operator with OP1 and OP2 in it. */ |
65 |
about why it makes sense to do it the way they do it: |
79 |
|
66 |
|
80 |
static seen_binop_t |
67 |
We could do this much nicer theoretically, but don't (for reasons |
81 |
find_seen_binop (tree op1, tree op2) |
68 |
explained after how to do it theoretically nice :P). |
82 |
{ |
|
|
83 |
void **slot; |
84 |
struct seen_binop_d sbd; |
85 |
sbd.op1 = op1; |
86 |
sbd.op2 = op2; |
87 |
slot = htab_find_slot (seen_binops, &sbd, NO_INSERT); |
88 |
if (!slot) |
89 |
return NULL; |
90 |
return ((seen_binop_t) *slot); |
91 |
} |
92 |
|
69 |
|
93 |
/* Insert a binary operator consisting of OP1 and OP2 into the |
70 |
In order to promote the most redundancy elimination, you want |
94 |
SEEN_BINOP table. */ |
71 |
binary expressions whose operands are the same rank (or |
|
|
72 |
preferrably, the same value) exposed to the redundancy eliminator, |
73 |
for possible elimination. |
95 |
|
74 |
|
96 |
static void |
75 |
So the way to do this if we really cared, is to build the new op |
97 |
insert_seen_binop (tree op1, tree op2) |
76 |
tree from the leaves to the roots, merging as you go, and putting the |
98 |
{ |
77 |
new op on the end of the worklist, until you are left with one |
99 |
void **slot; |
78 |
thing on the worklist. |
100 |
seen_binop_t new_pair = xmalloc (sizeof (*new_pair)); |
|
|
101 |
new_pair->op1 = op1; |
102 |
new_pair->op2 = op2; |
103 |
slot = htab_find_slot (seen_binops, new_pair, INSERT); |
104 |
if (*slot != NULL) |
105 |
free (*slot); |
106 |
*slot = new_pair; |
107 |
} |
108 |
|
79 |
|
109 |
/* Return the hash value for a seen binop structure pointed to by P. |
80 |
IE if you have to rewrite the following set of operands (listed with |
110 |
Because all the binops we consider are associative, we just add the |
81 |
rank in parentheses), with opcode PLUS_EXPR: |
111 |
hash value for op1 and op2. */ |
|
|
112 |
|
82 |
|
113 |
static hashval_t |
83 |
a (1), b (1), c (1), d (2), e (2) |
114 |
seen_binop_hash (const void *p) |
|
|
115 |
{ |
116 |
const seen_binop_t sb = (seen_binop_t) p; |
117 |
return iterative_hash_expr (sb->op1, 0) + iterative_hash_expr (sb->op2, 0); |
118 |
} |
119 |
|
84 |
|
120 |
/* Return true if two seen binop structures pointed to by P1 and P2 are equal. |
|
|
121 |
We have to check the operators both ways because we don't know what |
122 |
order they appear in the table. */ |
123 |
|
85 |
|
124 |
static int |
86 |
We start with our merge worklist empty, and the ops list with all of |
125 |
seen_binop_eq (const void *p1, const void *p2) |
87 |
those on it. |
126 |
{ |
88 |
|
127 |
const seen_binop_t sb1 = (seen_binop_t) p1; |
89 |
You want to first merge all leaves of the same rank, as much as |
128 |
const seen_binop_t sb2 = (seen_binop_t) p2; |
90 |
possible. |
129 |
return (sb1->op1 == sb2->op1 && sb1->op2 == sb2->op2) |
91 |
|
130 |
|| (sb1->op2 == sb2->op1 && sb1->op1 == sb2->op2); |
92 |
So first build a binary op of |
131 |
} |
93 |
|
|
|
94 |
mergetmp = a + b, and put "mergetmp" on the merge worklist. |
95 |
|
96 |
Because there is no three operand form of PLUS_EXPR, c is not going to |
97 |
be exposed to redundancy elimination as a rank 1 operand. |
98 |
|
99 |
So you might as well throw it on the merge worklist (you could also |
100 |
consider it to now be a rank two operand, and merge it with d and e, |
101 |
but in this case, you then have evicted e from a binary op. So at |
102 |
least in this situation, you can't win.) |
103 |
|
104 |
Then build a binary op of d + e |
105 |
mergetmp2 = d + e |
106 |
|
107 |
and put mergetmp2 on the merge worklist. |
108 |
|
109 |
so merge worklist = {mergetmp, c, mergetmp2} |
110 |
|
111 |
Continue building binary ops of these operations until you have only |
112 |
one operation left on the worklist. |
113 |
|
114 |
So we have |
115 |
|
116 |
build binary op |
117 |
mergetmp3 = mergetmp + c |
132 |
|
118 |
|
133 |
/* Value rank structure. */ |
119 |
worklist = {mergetmp2, mergetmp3} |
134 |
|
120 |
|
135 |
typedef struct valrank_d |
121 |
mergetmp4 = mergetmp2 + mergetmp3 |
|
|
122 |
|
123 |
worklist = {mergetmp4} |
124 |
|
125 |
because we have one operation left, we can now just set the original |
126 |
statement equal to the result of that operation. |
127 |
|
128 |
This will at least expose a + b and d + e to redundancy elimination |
129 |
as binary operations. |
130 |
|
131 |
For extra points, you can reuse the old statements to build the |
132 |
mergetmps, since you shouldn't run out. |
133 |
|
134 |
|
135 |
So why don't we do this? |
136 |
|
137 |
Because it's expensive, and rarely will help. Most trees we are |
138 |
reassociating have 3 or less ops. If they have 2 ops, they already |
139 |
will be written into a nice single binary op. If you have 3 ops, a |
140 |
single simple check suffices to tell you whether the first two are of the |
141 |
same rank. If so, you know to order it |
142 |
|
143 |
mergetmp = op1 + op2 |
144 |
newstmt = mergetmp + op3 |
145 |
|
146 |
instead of |
147 |
mergetmp = op2 + op3 |
148 |
newstmt = mergetmp + op1 |
149 |
|
150 |
If all three are of the same rank, you can't expose them all in a |
151 |
single binary operator anyway, so the above is *still* the best you |
152 |
can do. |
153 |
|
154 |
Thus, this is what we do. When we have three ops left, we check to see |
155 |
what order to put them in, and call it a day. As a nod to vector sum |
156 |
reduction, we check if any of ops are a really a phi node that is a |
157 |
destructive update for the associating op, and keep the destructive |
158 |
update together for vector sum reduction recognition. */ |
159 |
|
160 |
|
161 |
/* Statistics */ |
162 |
static struct |
163 |
{ |
164 |
int linearized; |
165 |
int constants_eliminated; |
166 |
int ops_eliminated; |
167 |
int rewritten; |
168 |
} reassociate_stats; |
169 |
|
170 |
/* Operator, rank pair. */ |
171 |
typedef struct operand_entry |
136 |
{ |
172 |
{ |
137 |
tree e; |
173 |
unsigned int rank; |
138 |
unsigned int rank; |
174 |
tree op; |
139 |
} *valrank_t; |
175 |
} *operand_entry_t; |
|
|
176 |
|
177 |
static alloc_pool operand_entry_pool; |
178 |
|
140 |
|
179 |
|
141 |
/* Starting rank number for a given basic block, so that we can rank |
180 |
/* Starting rank number for a given basic block, so that we can rank |
142 |
operations using unmovable instructions in that BB based on the bb |
181 |
operations using unmovable instructions in that BB based on the bb |
143 |
depth. */ |
182 |
depth. */ |
144 |
static unsigned int *bb_rank; |
183 |
static unsigned int *bb_rank; |
145 |
|
184 |
|
146 |
/* Value rank hashtable. */ |
185 |
/* Operand->rank hashtable. */ |
147 |
static htab_t value_rank; |
186 |
static htab_t operand_rank; |
148 |
|
187 |
|
149 |
|
188 |
|
150 |
/* Look up the value rank structure for expression E. */ |
189 |
/* Look up the operand rank structure for expression E. */ |
151 |
|
190 |
|
152 |
static valrank_t |
191 |
static operand_entry_t |
153 |
find_value_rank (tree e) |
192 |
find_operand_rank (tree e) |
154 |
{ |
193 |
{ |
155 |
void **slot; |
194 |
void **slot; |
156 |
struct valrank_d vrd; |
195 |
struct operand_entry vrd; |
157 |
vrd.e = e; |
196 |
|
158 |
slot = htab_find_slot (value_rank, &vrd, NO_INSERT); |
197 |
vrd.op = e; |
|
|
198 |
slot = htab_find_slot (operand_rank, &vrd, NO_INSERT); |
159 |
if (!slot) |
199 |
if (!slot) |
160 |
return NULL; |
200 |
return NULL; |
161 |
return ((valrank_t) *slot); |
201 |
return ((operand_entry_t) *slot); |
162 |
} |
202 |
} |
163 |
|
203 |
|
164 |
/* Insert {E,RANK} into the value rank hashtable. */ |
204 |
/* Insert {E,RANK} into the operand rank hashtable. */ |
165 |
|
205 |
|
166 |
static void |
206 |
static void |
167 |
insert_value_rank (tree e, unsigned int rank) |
207 |
insert_operand_rank (tree e, unsigned int rank) |
168 |
{ |
208 |
{ |
169 |
void **slot; |
209 |
void **slot; |
170 |
valrank_t new_pair = xmalloc (sizeof (*new_pair)); |
210 |
operand_entry_t new_pair = pool_alloc (operand_entry_pool); |
171 |
new_pair->e = e; |
211 |
|
|
|
212 |
new_pair->op = e; |
172 |
new_pair->rank = rank; |
213 |
new_pair->rank = rank; |
173 |
slot = htab_find_slot (value_rank, new_pair, INSERT); |
214 |
slot = htab_find_slot (operand_rank, new_pair, INSERT); |
174 |
gcc_assert (*slot == NULL); |
215 |
gcc_assert (*slot == NULL); |
175 |
*slot = new_pair; |
216 |
*slot = new_pair; |
176 |
|
|
|
177 |
} |
217 |
} |
178 |
|
218 |
|
179 |
|
219 |
/* Return the hash value for a operand rank structure */ |
180 |
/* Return the hash value for a value rank structure */ |
|
|
181 |
|
220 |
|
182 |
static hashval_t |
221 |
static hashval_t |
183 |
valrank_hash (const void *p) |
222 |
operand_entry_hash (const void *p) |
184 |
{ |
223 |
{ |
185 |
const valrank_t vr = (valrank_t) p; |
224 |
const operand_entry_t vr = (operand_entry_t) p; |
186 |
return iterative_hash_expr (vr->e, 0); |
225 |
return iterative_hash_expr (vr->op, 0); |
187 |
} |
226 |
} |
188 |
|
227 |
|
189 |
/* Return true if two value rank structures are equal. */ |
228 |
/* Return true if two operand rank structures are equal. */ |
190 |
|
229 |
|
191 |
static int |
230 |
static int |
192 |
valrank_eq (const void *p1, const void *p2) |
231 |
operand_entry_eq (const void *p1, const void *p2) |
193 |
{ |
|
|
194 |
const valrank_t vr1 = (valrank_t) p1; |
195 |
const valrank_t vr2 = (valrank_t) p2; |
196 |
return vr1->e == vr2->e; |
197 |
} |
198 |
|
199 |
|
200 |
/* Initialize the reassociation pass. */ |
201 |
|
202 |
static void |
203 |
init_reassoc (void) |
204 |
{ |
232 |
{ |
205 |
int i; |
233 |
const operand_entry_t vr1 = (operand_entry_t) p1; |
206 |
unsigned int rank = 2; |
234 |
const operand_entry_t vr2 = (operand_entry_t) p2; |
207 |
|
235 |
return vr1->op == vr2->op; |
208 |
tree param; |
|
|
209 |
int *bbs = xmalloc ((last_basic_block + 1) * sizeof (int)); |
210 |
|
211 |
memset (&reassociate_stats, 0, sizeof (reassociate_stats)); |
212 |
|
213 |
/* Reverse RPO (Reverse Post Order) will give us something where |
214 |
deeper loops come later. */ |
215 |
flow_reverse_top_sort_order_compute (bbs); |
216 |
bb_rank = xcalloc (last_basic_block + 1, sizeof (unsigned int)); |
217 |
value_rank = htab_create (511, valrank_hash, |
218 |
valrank_eq, free); |
219 |
seen_binops = htab_create (511, seen_binop_hash, |
220 |
seen_binop_eq, free); |
221 |
|
222 |
/* Give each argument a distinct rank. */ |
223 |
for (param = DECL_ARGUMENTS (current_function_decl); |
224 |
param; |
225 |
param = TREE_CHAIN (param)) |
226 |
{ |
227 |
if (default_def (param) != NULL) |
228 |
{ |
229 |
tree def = default_def (param); |
230 |
insert_value_rank (def, ++rank); |
231 |
} |
232 |
} |
233 |
/* Give the chain decl a distinct rank. */ |
234 |
if (cfun->static_chain_decl != NULL) |
235 |
{ |
236 |
tree def = default_def (cfun->static_chain_decl); |
237 |
if (def != NULL) |
238 |
insert_value_rank (def, ++rank); |
239 |
} |
240 |
|
241 |
/* Set up rank for each BB */ |
242 |
for (i = 0; i < n_basic_blocks; i++) |
243 |
bb_rank[bbs[i]] = ++rank << 16; |
244 |
|
245 |
free (bbs); |
246 |
calculate_dominance_info (CDI_DOMINATORS); |
247 |
|
248 |
} |
236 |
} |
249 |
|
237 |
|
250 |
/* Cleanup after the reassociation pass, and print stats if |
|
|
251 |
requested. */ |
252 |
|
253 |
static void |
254 |
fini_reassoc (void) |
255 |
{ |
256 |
|
257 |
if (dump_file && (dump_flags & TDF_STATS)) |
258 |
{ |
259 |
fprintf (dump_file, "Reassociation stats:\n"); |
260 |
fprintf (dump_file, "Reassociated by rank: %d\n", reassociate_stats.reassociated_by_rank); |
261 |
fprintf (dump_file, "Reassociated by match: %d\n", reassociate_stats.reassociated_by_match); |
262 |
} |
263 |
htab_delete (value_rank); |
264 |
htab_delete (seen_binops); |
265 |
free (bb_rank); |
266 |
} |
267 |
|
238 |
|
268 |
/* Given an expression E, return the rank of the expression. */ |
239 |
/* Given an expression E, return the rank of the expression. */ |
269 |
|
240 |
|
270 |
static unsigned int |
241 |
static unsigned int |
271 |
get_rank (tree e) |
242 |
get_rank (tree e) |
272 |
{ |
243 |
{ |
273 |
valrank_t vr; |
244 |
operand_entry_t vr; |
274 |
|
245 |
|
275 |
/* Constants have rank 0. */ |
246 |
/* Constants have rank 0. */ |
276 |
if (is_gimple_min_invariant (e)) |
247 |
if (is_gimple_min_invariant (e)) |
277 |
return 0; |
248 |
return 0; |
278 |
|
249 |
|
279 |
/* SSA_NAME's have the rank of the expression they are the result |
250 |
/* SSA_NAME's have the rank of the expression they are the result |
280 |
of. |
251 |
of. |
281 |
For globals and uninitialized values, the rank is 0. |
252 |
For globals and uninitialized values, the rank is 0. |
Lines 290-313
get_rank (tree e)
Link Here
|
290 |
if (TREE_CODE (e) == SSA_NAME) |
261 |
if (TREE_CODE (e) == SSA_NAME) |
291 |
{ |
262 |
{ |
292 |
tree stmt; |
263 |
tree stmt; |
293 |
tree rhs; |
264 |
tree rhs; |
294 |
unsigned int rank, maxrank; |
265 |
unsigned int rank, maxrank; |
295 |
int i; |
266 |
int i; |
296 |
|
267 |
|
297 |
if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL |
268 |
if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL |
298 |
&& e == default_def (SSA_NAME_VAR (e))) |
269 |
&& e == default_def (SSA_NAME_VAR (e))) |
299 |
return find_value_rank (e)->rank; |
270 |
return find_operand_rank (e)->rank; |
300 |
|
271 |
|
301 |
stmt = SSA_NAME_DEF_STMT (e); |
272 |
stmt = SSA_NAME_DEF_STMT (e); |
302 |
if (bb_for_stmt (stmt) == NULL) |
273 |
if (bb_for_stmt (stmt) == NULL) |
303 |
return 0; |
274 |
return 0; |
304 |
|
275 |
|
305 |
if (TREE_CODE (stmt) != MODIFY_EXPR |
276 |
if (TREE_CODE (stmt) != MODIFY_EXPR |
306 |
|| !ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS)) |
277 |
|| !ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS)) |
307 |
return bb_rank[bb_for_stmt (stmt)->index]; |
278 |
return bb_rank[bb_for_stmt (stmt)->index]; |
308 |
|
279 |
|
309 |
/* If we already have a rank for this expression, use that. */ |
280 |
/* If we already have a rank for this expression, use that. */ |
310 |
vr = find_value_rank (e); |
281 |
vr = find_operand_rank (e); |
311 |
if (vr) |
282 |
if (vr) |
312 |
return vr->rank; |
283 |
return vr->rank; |
313 |
|
284 |
|
Lines 318-341
get_rank (tree e)
Link Here
|
318 |
rhs = TREE_OPERAND (stmt, 1); |
289 |
rhs = TREE_OPERAND (stmt, 1); |
319 |
if (TREE_CODE_LENGTH (TREE_CODE (rhs)) == 0) |
290 |
if (TREE_CODE_LENGTH (TREE_CODE (rhs)) == 0) |
320 |
rank = MAX (rank, get_rank (rhs)); |
291 |
rank = MAX (rank, get_rank (rhs)); |
321 |
else |
292 |
else |
322 |
{ |
293 |
{ |
323 |
for (i = 0; |
294 |
for (i = 0; |
324 |
i < TREE_CODE_LENGTH (TREE_CODE (rhs)) |
295 |
i < TREE_CODE_LENGTH (TREE_CODE (rhs)) |
325 |
&& TREE_OPERAND (rhs, i) |
296 |
&& TREE_OPERAND (rhs, i) |
326 |
&& rank != maxrank; i++) |
297 |
&& rank != maxrank; |
|
|
298 |
i++) |
327 |
rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i))); |
299 |
rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i))); |
328 |
} |
300 |
} |
329 |
|
301 |
|
330 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
302 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
331 |
{ |
303 |
{ |
332 |
fprintf (dump_file, "Rank for "); |
304 |
fprintf (dump_file, "Rank for "); |
333 |
print_generic_expr (dump_file, e, 0); |
305 |
print_generic_expr (dump_file, e, 0); |
334 |
fprintf (dump_file, " is %d\n", (rank + 1)); |
306 |
fprintf (dump_file, " is %d\n", (rank + 1)); |
335 |
} |
307 |
} |
336 |
|
308 |
|
337 |
/* Note the rank in the hashtable so we don't recompute it. */ |
309 |
/* Note the rank in the hashtable so we don't recompute it. */ |
338 |
insert_value_rank (e, (rank + 1)); |
310 |
insert_operand_rank (e, (rank + 1)); |
339 |
return (rank + 1); |
311 |
return (rank + 1); |
340 |
} |
312 |
} |
341 |
|
313 |
|
Lines 343-624
get_rank (tree e)
Link Here
|
343 |
return 0; |
315 |
return 0; |
344 |
} |
316 |
} |
345 |
|
317 |
|
|
|
318 |
DEF_VEC_P(operand_entry_t); |
319 |
DEF_VEC_ALLOC_P(operand_entry_t, heap); |
320 |
|
321 |
/* We want integer ones to end up last no matter what, since they are |
322 |
the ones we can do the most with. */ |
323 |
#define INTEGER_CONST_TYPE 1 << 3 |
324 |
#define FLOAT_CONST_TYPE 1 << 2 |
325 |
#define OTHER_CONST_TYPE 1 << 1 |
326 |
|
327 |
/* Classify an invariant tree into integer, float, or other, so that |
328 |
we can sort them to be near other constants of the same type. */ |
329 |
static inline int |
330 |
constant_type (tree t) |
331 |
{ |
332 |
if (INTEGRAL_TYPE_P (TREE_TYPE (t))) |
333 |
return INTEGER_CONST_TYPE; |
334 |
else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) |
335 |
return FLOAT_CONST_TYPE; |
336 |
else |
337 |
return OTHER_CONST_TYPE; |
338 |
} |
339 |
|
340 |
/* qsort comparison function to sort operand entries PA and PB by rank |
341 |
so that the sorted array is ordered by rank in decreasing order. */ |
342 |
static int |
343 |
sort_by_operand_rank (const void *pa, const void *pb) |
344 |
{ |
345 |
const operand_entry_t oea = *(const operand_entry_t *)pa; |
346 |
const operand_entry_t oeb = *(const operand_entry_t *)pb; |
347 |
|
348 |
/* It's nicer for optimize_expression if constants that are likely |
349 |
to fold when added/multiplied//whatever are put next to each |
350 |
other. Since all constants have rank 0, order them by type. */ |
351 |
if (oeb->rank == 0 && oea->rank == 0) |
352 |
return constant_type (oeb->op) - constant_type (oea->op); |
353 |
|
354 |
/* Lastly, make sure the versions that are the same go next to each |
355 |
other. We use SSA_NAME_VERSION because it's stable. */ |
356 |
if ((oeb->rank - oea->rank == 0) |
357 |
&& TREE_CODE (oea->op) == SSA_NAME |
358 |
&& TREE_CODE (oeb->op) == SSA_NAME) |
359 |
return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); |
360 |
|
361 |
return oeb->rank - oea->rank; |
362 |
} |
363 |
|
364 |
/* Add an operand entry to *OPS for the tree operand OP. */ |
365 |
|
366 |
static void |
367 |
add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op) |
368 |
{ |
369 |
operand_entry_t oe = pool_alloc (operand_entry_pool); |
370 |
|
371 |
oe->op = op; |
372 |
oe->rank = get_rank (op); |
373 |
VEC_safe_push (operand_entry_t, heap, *ops, oe); |
374 |
} |
346 |
|
375 |
|
347 |
/* Decide whether we should transpose RHS and some operand of |
376 |
/* Return true if STMT is reassociable operation containing a binary |
348 |
LHSDEFOP. |
377 |
operation with tree code CODE. */ |
349 |
If yes, then return true and set TAKEOP to the operand number of LHSDEFOP to |
|
|
350 |
switch RHS for. |
351 |
Otherwise, return false. */ |
352 |
|
378 |
|
353 |
static bool |
379 |
static bool |
354 |
should_transpose (tree rhs ATTRIBUTE_UNUSED, |
380 |
is_reassociable_op (tree stmt, enum tree_code code) |
355 |
unsigned int rhsrank, |
381 |
{ |
356 |
tree lhsdefop, unsigned int *takeop) |
382 |
if (!IS_EMPTY_STMT (stmt) |
357 |
{ |
383 |
&& TREE_CODE (stmt) == MODIFY_EXPR |
358 |
/* Attempt to expose the low ranked |
384 |
&& TREE_CODE (TREE_OPERAND (stmt, 1)) == code |
359 |
arguments to CSE if we have something like: |
385 |
&& has_single_use (TREE_OPERAND (stmt, 0))) |
360 |
a = <rank 2> + c (rank 1) |
|
|
361 |
b = a (rank 3) + d (rank 1) |
362 |
We want to transform this into: |
363 |
a = c + d |
364 |
b = <rank 2> + <rank 3> |
365 |
|
366 |
The op finding part wouldn't be necessary if |
367 |
we could swap the operands above and not have |
368 |
update_stmt change them back on us. |
369 |
*/ |
370 |
unsigned int lowrankop; |
371 |
unsigned int lowrank; |
372 |
unsigned int highrank; |
373 |
unsigned int highrankop; |
374 |
unsigned int temp; |
375 |
|
376 |
lowrankop = 0; |
377 |
*takeop = 1; |
378 |
lowrank = get_rank (TREE_OPERAND (lhsdefop, 0)); |
379 |
temp = get_rank (TREE_OPERAND (lhsdefop, 1)); |
380 |
highrank = temp; |
381 |
highrankop = 1; |
382 |
if (temp < lowrank) |
383 |
{ |
384 |
lowrankop = 1; |
385 |
highrankop = 0; |
386 |
*takeop = 0; |
387 |
highrank = lowrank; |
388 |
lowrank = temp; |
389 |
} |
390 |
|
391 |
/* If highrank == lowrank, then we had something |
392 |
like: |
393 |
a = <rank 1> + <rank 1> |
394 |
already, so there is no guarantee that |
395 |
swapping our argument in is going to be |
396 |
better. |
397 |
If we run reassoc twice, we could probably |
398 |
have a flag that switches this behavior on, |
399 |
so that we try once without it, and once with |
400 |
it, so that redundancy elimination sees it |
401 |
both ways. |
402 |
*/ |
403 |
|
404 |
if (lowrank == rhsrank && highrank != lowrank) |
405 |
return true; |
386 |
return true; |
406 |
|
|
|
407 |
/* Also, see if the LHS's high ranked op should be switched with our |
408 |
RHS simply because it is greater in rank than our current RHS. */ |
409 |
if (TREE_CODE (TREE_OPERAND (lhsdefop, highrankop)) == SSA_NAME) |
410 |
{ |
411 |
tree iop = SSA_NAME_DEF_STMT (TREE_OPERAND (lhsdefop, highrankop)); |
412 |
if (TREE_CODE (iop) == MODIFY_EXPR) |
413 |
iop = TREE_OPERAND (iop, 1); |
414 |
if (TREE_CODE (iop) == TREE_CODE (lhsdefop)) |
415 |
*takeop = 1; |
416 |
if (rhsrank < get_rank (TREE_OPERAND (lhsdefop, *takeop))) |
417 |
return true; |
418 |
} |
419 |
|
420 |
return false; |
387 |
return false; |
421 |
} |
388 |
} |
422 |
|
389 |
|
423 |
/* Attempt to reassociate the associative binary operator BEXPR, which |
390 |
|
424 |
is in the statement pointed to by CURRBSI. Return true if we |
391 |
/* Given NAME, if NAME is defined by a unary operation OPCODE, return the |
425 |
changed the statement. */ |
392 |
operand of the negate operation. Otherwise, return NULL. */ |
|
|
393 |
|
394 |
static tree |
395 |
get_unary_op (tree name, enum tree_code opcode) |
396 |
{ |
397 |
tree stmt = SSA_NAME_DEF_STMT (name); |
398 |
tree rhs; |
399 |
|
400 |
if (TREE_CODE (stmt) != MODIFY_EXPR) |
401 |
return NULL_TREE; |
402 |
|
403 |
rhs = TREE_OPERAND (stmt, 1); |
404 |
if (TREE_CODE (rhs) == opcode) |
405 |
return TREE_OPERAND (rhs, 0); |
406 |
return NULL_TREE; |
407 |
} |
408 |
|
409 |
/* If CURR and LAST are a pair of ops that OPCODE allows us to |
410 |
eliminate through equivalences, do so, remove them from OPS, and |
411 |
return true. Otherwise, return false. */ |
426 |
|
412 |
|
427 |
static bool |
413 |
static bool |
428 |
reassociate_expr (tree bexpr, block_stmt_iterator *currbsi) |
414 |
eliminate_duplicate_pair (enum tree_code opcode, |
|
|
415 |
VEC (operand_entry_t, heap) **ops, |
416 |
bool *all_done, |
417 |
unsigned int i, |
418 |
operand_entry_t curr, |
419 |
operand_entry_t last) |
429 |
{ |
420 |
{ |
430 |
tree lhs = TREE_OPERAND (bexpr, 0); |
421 |
|
431 |
tree rhs = TREE_OPERAND (bexpr, 1); |
422 |
/* If we have two of the same op, and the opcode is & or |, we can |
432 |
tree lhsdef; |
423 |
eliminate one of them. |
433 |
tree lhsi; |
424 |
If we have two of the same op, and the opcode is ^, we can |
434 |
bool changed = false; |
425 |
eliminate both of them. */ |
435 |
unsigned int lhsrank = get_rank (lhs); |
426 |
|
436 |
unsigned int rhsrank = get_rank (rhs); |
427 |
if (last && last->op == curr->op) |
437 |
|
|
|
438 |
/* If unsafe math optimizations we can do reassociation for non-integral |
439 |
types. */ |
440 |
if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
441 |
|| !INTEGRAL_TYPE_P (TREE_TYPE (rhs))) |
442 |
&& (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs)) |
443 |
|| !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs)) |
444 |
|| !flag_unsafe_math_optimizations)) |
445 |
return false; |
446 |
|
447 |
/* We want the greater ranked operand to be our "LHS" for simplicity |
448 |
sake. There is no point in actually modifying the expression, as |
449 |
update_stmt will simply resort the operands anyway. */ |
450 |
if (lhsrank < rhsrank) |
451 |
{ |
428 |
{ |
452 |
tree temp; |
429 |
switch (opcode) |
453 |
unsigned int temp1; |
430 |
{ |
454 |
temp = lhs; |
431 |
case BIT_IOR_EXPR: |
455 |
lhs = rhs; |
432 |
case BIT_AND_EXPR: |
456 |
rhs = temp; |
433 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
457 |
temp1 = lhsrank; |
434 |
{ |
458 |
lhsrank = rhsrank; |
435 |
fprintf (dump_file, "Equivalence: "); |
459 |
rhsrank = temp1; |
436 |
print_generic_expr (dump_file, curr->op, 0); |
460 |
} |
437 |
fprintf (dump_file, " [&|] "); |
461 |
|
438 |
print_generic_expr (dump_file, last->op, 0); |
462 |
/* If the high ranked operand is an SSA_NAME, and the binary |
439 |
fprintf (dump_file, " -> "); |
463 |
operator is not something we've already seen somewhere else |
440 |
print_generic_stmt (dump_file, last->op, 0); |
464 |
(i.e., it may be redundant), attempt to reassociate it. |
441 |
} |
465 |
|
442 |
|
466 |
We can't reassociate expressions unless the expression we are |
443 |
VEC_ordered_remove (operand_entry_t, *ops, i); |
467 |
going to reassociate with is only used in our current expression, |
444 |
reassociate_stats.ops_eliminated ++; |
468 |
or else we may screw up other computations, like so: |
445 |
|
469 |
|
446 |
return true; |
470 |
a = b + c |
447 |
|
471 |
e = a + d |
448 |
case BIT_XOR_EXPR: |
472 |
|
449 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
473 |
g = a + f |
450 |
{ |
474 |
|
451 |
fprintf (dump_file, "Equivalence: "); |
475 |
We cannot reassociate and rewrite the "a = ..." , |
452 |
print_generic_expr (dump_file, curr->op, 0); |
476 |
because that would change the value of the computation of |
453 |
fprintf (dump_file, " ^ "); |
477 |
"g = a + f". */ |
454 |
print_generic_expr (dump_file, last->op, 0); |
478 |
if (TREE_CODE (lhs) == SSA_NAME && !find_seen_binop (lhs, rhs)) |
455 |
fprintf (dump_file, " -> nothing\n"); |
479 |
{ |
|
|
480 |
lhsdef = SSA_NAME_DEF_STMT (lhs); |
481 |
if (TREE_CODE (lhsdef) == MODIFY_EXPR) |
482 |
{ |
483 |
lhsi = TREE_OPERAND (lhsdef, 1); |
484 |
if (TREE_CODE (lhsi) == TREE_CODE (bexpr)) |
485 |
{ |
486 |
use_operand_p use; |
487 |
tree usestmt; |
488 |
if (single_imm_use (lhs, &use, &usestmt)) |
489 |
{ |
490 |
unsigned int takeop = 0; |
491 |
unsigned int otherop = 1; |
492 |
bool foundmatch = false; |
493 |
bool foundrank = false; |
494 |
|
495 |
/* If we can easily transpose this into an operation |
496 |
we've already seen, let's do that. |
497 |
otherwise, let's try to expose low ranked ops to |
498 |
CSE. */ |
499 |
if (find_seen_binop (TREE_OPERAND (lhsi, 1), rhs)) |
500 |
{ |
501 |
takeop = 0; |
502 |
otherop = 1; |
503 |
foundmatch = true; |
504 |
} |
505 |
else if (find_seen_binop (TREE_OPERAND (lhsi, 0), |
506 |
rhs)) |
507 |
{ |
508 |
takeop = 1; |
509 |
otherop = 0; |
510 |
foundmatch = true; |
511 |
} |
512 |
else if (should_transpose (rhs, rhsrank, lhsi, |
513 |
&takeop)) |
514 |
{ |
515 |
foundrank = true; |
516 |
} |
517 |
if (foundmatch || foundrank) |
518 |
{ |
519 |
block_stmt_iterator lhsbsi = bsi_for_stmt (lhsdef); |
520 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
521 |
{ |
522 |
fprintf (dump_file, "Reassociating by %s\n", |
523 |
foundmatch ? "match" : "rank"); |
524 |
fprintf (dump_file, "Before LHS:"); |
525 |
print_generic_stmt (dump_file, lhsi, 0); |
526 |
fprintf (dump_file, "Before curr expr:"); |
527 |
print_generic_stmt (dump_file, bexpr, 0); |
528 |
} |
529 |
TREE_OPERAND (bexpr, 0) = TREE_OPERAND (lhsi, takeop); |
530 |
TREE_OPERAND (lhsi, takeop) = rhs; |
531 |
TREE_OPERAND (bexpr, 1) = TREE_OPERAND (lhsdef, 0); |
532 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
533 |
{ |
534 |
fprintf (dump_file, "After LHS:"); |
535 |
print_generic_stmt (dump_file, lhsi, 0); |
536 |
fprintf (dump_file, "After curr expr:"); |
537 |
print_generic_stmt (dump_file, bexpr, 0); |
538 |
} |
539 |
bsi_move_before (&lhsbsi, currbsi); |
540 |
update_stmt (lhsdef); |
541 |
update_stmt (bsi_stmt (*currbsi)); |
542 |
lhsbsi = bsi_for_stmt (lhsdef); |
543 |
update_stmt (bsi_stmt (lhsbsi)); |
544 |
|
545 |
/* If update_stmt didn't reorder our operands, |
546 |
we'd like to recurse on the expression we |
547 |
just reassociated and reassociate it |
548 |
top-down, exposing further opportunities. |
549 |
Unfortunately, update_stmt does reorder them, |
550 |
so we can't do this cheaply. */ |
551 |
if (!foundmatch) |
552 |
reassociate_stats.reassociated_by_rank++; |
553 |
else |
554 |
reassociate_stats.reassociated_by_match++; |
555 |
return true; |
556 |
} |
557 |
} |
558 |
} |
456 |
} |
|
|
457 |
|
458 |
reassociate_stats.ops_eliminated += 2; |
459 |
|
460 |
if (VEC_length (operand_entry_t, *ops) == 2) |
461 |
{ |
462 |
VEC_free (operand_entry_t, heap, *ops); |
463 |
*ops = NULL; |
464 |
add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op), |
465 |
integer_zero_node)); |
466 |
*all_done = true; |
467 |
} |
468 |
else |
469 |
{ |
470 |
VEC_ordered_remove (operand_entry_t, *ops, i-1); |
471 |
VEC_ordered_remove (operand_entry_t, *ops, i-1); |
472 |
} |
473 |
|
474 |
return true; |
475 |
|
476 |
default: |
477 |
break; |
559 |
} |
478 |
} |
560 |
} |
479 |
} |
561 |
return changed; |
480 |
return false; |
562 |
} |
481 |
} |
563 |
|
482 |
|
564 |
/* Reassociate expressions in basic block BB and its dominator as |
483 |
/* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression, |
565 |
children , return true if any |
484 |
look in OPS for a corresponding positive operation to cancel it |
566 |
expressions changed. */ |
485 |
out. If we find one, remove the other from OPS, replace |
|
|
486 |
OPS[CURRINDEX] with 0, and return true. Otherwise, return |
487 |
false. */ |
567 |
|
488 |
|
568 |
static bool |
489 |
static bool |
569 |
reassociate_bb (basic_block bb) |
490 |
eliminate_plus_minus_pair (enum tree_code opcode, |
|
|
491 |
VEC (operand_entry_t, heap) **ops, |
492 |
unsigned int currindex, |
493 |
operand_entry_t curr) |
570 |
{ |
494 |
{ |
571 |
bool changed = false; |
495 |
tree negateop; |
572 |
block_stmt_iterator bsi; |
496 |
unsigned int i; |
573 |
basic_block son; |
497 |
operand_entry_t oe; |
574 |
|
498 |
|
575 |
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) |
499 |
if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) |
|
|
500 |
return false; |
501 |
|
502 |
negateop = get_unary_op (curr->op, NEGATE_EXPR); |
503 |
if (negateop == NULL_TREE) |
504 |
return false; |
505 |
|
506 |
/* Any non-negated version will have a rank that is one less than |
507 |
the current rank. So once we hit those ranks, if we don't find |
508 |
one, we can stop. */ |
509 |
|
510 |
for (i = currindex; |
511 |
VEC_iterate (operand_entry_t, *ops, i, oe) && oe->rank >= curr->rank - 1 ; |
512 |
i++) |
576 |
{ |
513 |
{ |
577 |
tree stmt = bsi_stmt (bsi); |
514 |
if (oe->op == negateop && i != currindex) |
578 |
|
|
|
579 |
if (TREE_CODE (stmt) == MODIFY_EXPR) |
580 |
{ |
515 |
{ |
581 |
tree rhs = TREE_OPERAND (stmt, 1); |
516 |
|
582 |
if (associative_tree_code (TREE_CODE (rhs))) |
517 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
583 |
{ |
518 |
{ |
584 |
if (reassociate_expr (rhs, &bsi)) |
519 |
fprintf (dump_file, "Equivalence: "); |
585 |
{ |
520 |
print_generic_expr (dump_file, negateop, 0); |
586 |
changed = true; |
521 |
fprintf (dump_file, " + -"); |
587 |
update_stmt (stmt); |
522 |
print_generic_expr (dump_file, oe->op, 0); |
588 |
} |
523 |
fprintf (dump_file, " -> 0\n"); |
589 |
insert_seen_binop (TREE_OPERAND (rhs, 0), |
|
|
590 |
TREE_OPERAND (rhs, 1)); |
591 |
} |
524 |
} |
|
|
525 |
|
526 |
VEC_ordered_remove (operand_entry_t, *ops, i); |
527 |
add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op), |
528 |
integer_zero_node)); |
529 |
VEC_ordered_remove (operand_entry_t, *ops, currindex); |
530 |
reassociate_stats.ops_eliminated ++; |
531 |
|
532 |
return true; |
592 |
} |
533 |
} |
593 |
} |
534 |
} |
594 |
for (son = first_dom_son (CDI_DOMINATORS, bb); |
535 |
|
595 |
son; |
536 |
return false; |
596 |
son = next_dom_son (CDI_DOMINATORS, son)) |
|
|
597 |
{ |
598 |
changed |= reassociate_bb (son); |
599 |
} |
600 |
return changed; |
601 |
} |
537 |
} |
602 |
|
538 |
|
603 |
|
539 |
/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a |
|
|
540 |
bitwise not expression, look in OPS for a corresponding operand to |
541 |
cancel it out. If we find one, remove the other from OPS, replace |
542 |
OPS[CURRINDEX] with 0, and return true. Otherwise, return |
543 |
false. */ |
544 |
|
604 |
static bool |
545 |
static bool |
605 |
do_reassoc (void) |
546 |
eliminate_not_pairs (enum tree_code opcode, |
606 |
{ |
547 |
VEC (operand_entry_t, heap) **ops, |
607 |
bool changed = false; |
548 |
unsigned int currindex, |
608 |
|
549 |
operand_entry_t curr) |
609 |
changed = reassociate_bb (ENTRY_BLOCK_PTR); |
550 |
{ |
|
|
551 |
tree notop; |
552 |
unsigned int i; |
553 |
operand_entry_t oe; |
554 |
|
555 |
if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) |
556 |
|| TREE_CODE (curr->op) != SSA_NAME) |
557 |
return false; |
558 |
|
559 |
notop = get_unary_op (curr->op, BIT_NOT_EXPR); |
560 |
if (notop == NULL_TREE) |
561 |
return false; |
562 |
|
563 |
/* Any non-not version will have a rank that is one less than |
564 |
the current rank. So once we hit those ranks, if we don't find |
565 |
one, we can stop. */ |
566 |
|
567 |
for (i = currindex; |
568 |
VEC_iterate (operand_entry_t, *ops, i, oe) && oe->rank >= curr->rank - 1 ; |
569 |
i++) |
570 |
{ |
571 |
if (oe->op == notop && i != currindex) |
572 |
{ |
573 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
574 |
{ |
575 |
fprintf (dump_file, "Equivalence: "); |
576 |
print_generic_expr (dump_file, notop, 0); |
577 |
if (opcode == BIT_AND_EXPR) |
578 |
fprintf (dump_file, " & ~"); |
579 |
else if (opcode == BIT_IOR_EXPR) |
580 |
fprintf (dump_file, " | ~"); |
581 |
print_generic_expr (dump_file, oe->op, 0); |
582 |
if (opcode == BIT_AND_EXPR) |
583 |
fprintf (dump_file, " -> 0\n"); |
584 |
else if (opcode == BIT_IOR_EXPR) |
585 |
fprintf (dump_file, " -> -1\n"); |
586 |
} |
587 |
|
588 |
if (opcode == BIT_AND_EXPR) |
589 |
oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node); |
590 |
else if (opcode == BIT_IOR_EXPR) |
591 |
oe->op = build_low_bits_mask (TREE_TYPE (oe->op), |
592 |
TYPE_PRECISION (TREE_TYPE (oe->op))); |
593 |
|
594 |
reassociate_stats.ops_eliminated += VEC_length (operand_entry_t, *ops) - 1; |
595 |
VEC_free (operand_entry_t, heap, *ops); |
596 |
*ops = NULL; |
597 |
VEC_safe_push (operand_entry_t, heap, *ops, oe); |
598 |
return true; |
599 |
} |
600 |
} |
610 |
|
601 |
|
611 |
return changed; |
602 |
return false; |
612 |
} |
603 |
} |
613 |
|
604 |
|
|
|
605 |
/* Use constant value that may be present in OPS to try to eliminate |
606 |
operands. Note that this function is only really used when we've |
607 |
eliminated ops for other reasons, or merged constants. Across |
608 |
single statements, fold already does all of this, plus more. There |
609 |
is little point in duplicating logic, so I've only included the |
610 |
identities that I could ever construct testcases to trigger. */ |
614 |
|
611 |
|
615 |
/* Gate and execute functions for Reassociation. */ |
612 |
static void |
|
|
613 |
eliminate_using_constants (enum tree_code opcode, |
614 |
VEC(operand_entry_t, heap) **ops) |
615 |
{ |
616 |
operand_entry_t oelast = VEC_last (operand_entry_t, *ops); |
617 |
|
618 |
if (oelast->rank == 0 && INTEGRAL_TYPE_P (TREE_TYPE (oelast->op))) |
619 |
{ |
620 |
switch (opcode) |
621 |
{ |
622 |
case BIT_AND_EXPR: |
623 |
if (integer_zerop (oelast->op)) |
624 |
{ |
625 |
if (VEC_length (operand_entry_t, *ops) != 1) |
626 |
{ |
627 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
628 |
fprintf (dump_file, "Found & 0, removing all other ops\n"); |
629 |
reassociate_stats.ops_eliminated += VEC_length (operand_entry_t, *ops) - 1; |
630 |
VEC_free (operand_entry_t, heap, *ops); |
631 |
*ops = NULL; |
632 |
VEC_safe_push (operand_entry_t, heap, *ops, oelast); |
633 |
return; |
634 |
} |
635 |
} |
636 |
/* FALLTHRU */ |
637 |
case BIT_IOR_EXPR: |
638 |
if (integer_all_onesp (oelast->op)) |
639 |
{ |
640 |
if (VEC_length (operand_entry_t, *ops) != 1) |
641 |
{ |
642 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
643 |
fprintf (dump_file, "Found [&|] -1, removing\n"); |
644 |
VEC_pop (operand_entry_t, *ops); |
645 |
reassociate_stats.ops_eliminated++; |
646 |
} |
647 |
} |
648 |
break; |
649 |
case MULT_EXPR: |
650 |
if (integer_zerop (oelast->op)) |
651 |
{ |
652 |
if (VEC_length (operand_entry_t, *ops) != 1) |
653 |
{ |
654 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
655 |
fprintf (dump_file, "Found * 0, removing all other ops\n"); |
656 |
reassociate_stats.ops_eliminated += VEC_length (operand_entry_t, *ops) - 1; |
657 |
VEC_free (operand_entry_t, heap, *ops); |
658 |
*ops = NULL; |
659 |
VEC_safe_push (operand_entry_t, heap, *ops, oelast); |
660 |
return; |
661 |
} |
662 |
} |
663 |
else if (integer_onep (oelast->op)) |
664 |
{ |
665 |
if (VEC_length (operand_entry_t, *ops) != 1) |
666 |
{ |
667 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
668 |
fprintf (dump_file, "Found * 1, removing\n"); |
669 |
VEC_pop (operand_entry_t, *ops); |
670 |
reassociate_stats.ops_eliminated++; |
671 |
return; |
672 |
} |
673 |
} |
674 |
break; |
675 |
case BIT_XOR_EXPR: |
676 |
case PLUS_EXPR: |
677 |
case MINUS_EXPR: |
678 |
if (integer_zerop (oelast->op)) |
679 |
{ |
680 |
if (VEC_length (operand_entry_t, *ops) != 1) |
681 |
{ |
682 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
683 |
fprintf (dump_file, "Found [|^+] 0, removing\n"); |
684 |
VEC_pop (operand_entry_t, *ops); |
685 |
reassociate_stats.ops_eliminated++; |
686 |
return; |
687 |
} |
688 |
} |
689 |
break; |
690 |
default: |
691 |
break; |
692 |
} |
693 |
} |
694 |
} |
695 |
|
696 |
/* Perform various identities and other optimizations on the list of |
697 |
operand entries, stored in OPS. The tree code for the binary |
698 |
operation between all the operands is OPCODE. */ |
616 |
|
699 |
|
617 |
static void |
700 |
static void |
618 |
execute_reassoc (void) |
701 |
optimize_ops_list (enum tree_code opcode, |
|
|
702 |
VEC (operand_entry_t, heap) **ops) |
619 |
{ |
703 |
{ |
620 |
init_reassoc (); |
704 |
unsigned int length = VEC_length (operand_entry_t, *ops); |
621 |
do_reassoc (); |
705 |
unsigned int i; |
|
|
706 |
operand_entry_t oe; |
707 |
int fconstcount = 0; |
708 |
int iconstcount = 0; |
709 |
int oconstcount = 0; |
710 |
operand_entry_t oelast = NULL; |
711 |
bool iterate = false; |
712 |
|
713 |
if (length == 1) |
714 |
return; |
715 |
|
716 |
oelast = VEC_last (operand_entry_t, *ops); |
717 |
|
718 |
/* If the last two are constants, pop the constants off, merge them |
719 |
and try the next two. */ |
720 |
if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) |
721 |
{ |
722 |
operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2); |
723 |
|
724 |
if (oelm1->rank == 0 |
725 |
&& is_gimple_min_invariant (oelm1->op) |
726 |
&& lang_hooks.types_compatible_p (TREE_TYPE (oelm1->op), |
727 |
TREE_TYPE (oelast->op))) |
728 |
{ |
729 |
tree folded = fold_build2 (opcode, TREE_TYPE (oelm1->op), |
730 |
oelm1->op, oelast->op); |
731 |
|
732 |
if (is_gimple_min_invariant (folded)) |
733 |
{ |
734 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
735 |
fprintf (dump_file, "Merging constants\n"); |
736 |
|
737 |
VEC_pop (operand_entry_t, *ops); |
738 |
VEC_pop (operand_entry_t, *ops); |
739 |
|
740 |
add_to_ops_vec (ops, folded); |
741 |
reassociate_stats.constants_eliminated++; |
742 |
|
743 |
optimize_ops_list (opcode, ops); |
744 |
return; |
745 |
} |
746 |
} |
747 |
} |
748 |
|
749 |
eliminate_using_constants (opcode, ops); |
750 |
oelast = NULL; |
751 |
|
752 |
for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);) |
753 |
{ |
754 |
bool done = false; |
755 |
|
756 |
if (eliminate_not_pairs (opcode, ops, i, oe)) |
757 |
return; |
758 |
if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) |
759 |
|| (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))) |
760 |
{ |
761 |
if (done) |
762 |
return; |
763 |
iterate = true; |
764 |
oelast = NULL; |
765 |
continue; |
766 |
} |
767 |
if (oe->rank == 0 && is_gimple_min_invariant (oe->op)) |
768 |
{ |
769 |
switch (constant_type (oe->op)) |
770 |
{ |
771 |
case INTEGER_CONST_TYPE: |
772 |
iconstcount++; |
773 |
break; |
774 |
|
775 |
case FLOAT_CONST_TYPE: |
776 |
fconstcount++; |
777 |
break; |
778 |
case OTHER_CONST_TYPE: |
779 |
oconstcount++; |
780 |
break; |
781 |
|
782 |
default: |
783 |
break; |
784 |
} |
785 |
} |
786 |
oelast = oe; |
787 |
i++; |
788 |
} |
789 |
if (iconstcount > 0 && dump_file && (dump_flags & TDF_DETAILS)) |
790 |
fprintf (dump_file, "iconstcount is %d\n", iconstcount); |
791 |
if (fconstcount > 0 && dump_file && (dump_flags & TDF_DETAILS)) |
792 |
fprintf (dump_file, "fconstcount is %d\n", fconstcount); |
793 |
if (oconstcount> 0 && dump_file && (dump_flags & TDF_DETAILS)) |
794 |
fprintf (dump_file, "oconstcount is %d\n", oconstcount); |
795 |
|
796 |
length = VEC_length (operand_entry_t, *ops); |
797 |
oelast = VEC_last (operand_entry_t, *ops); |
798 |
|
799 |
if (iterate) |
800 |
optimize_ops_list (opcode, ops); |
801 |
} |
802 |
|
803 |
#if 0 |
804 |
static tree |
805 |
merge_two_ops (enum tree_code code, tree op1, tree op2) |
806 |
{ |
807 |
tree temp; |
808 |
tree name; |
809 |
tree newexpr; |
810 |
|
811 |
temp = create_tmp_var (TREE_TYPE (op1), "mergetmp"); |
812 |
add_referenced_tmp_var (temp); |
813 |
newexpr = fold_build2 (code, TREE_TYPE (op1), op1, op2); |
814 |
newexpr = build (MODIFY_EXPR, TREE_TYPE (op1), temp, newexpr); |
815 |
name = make_ssa_name (temp, newexpr); |
816 |
TREE_OPERAND (newexpr, 0) = name; |
817 |
return name; |
818 |
} |
819 |
|
820 |
static void |
821 |
merge_leaves_of_rank (enum tree_code opcode, VEC (tree, heap) *worklist, |
822 |
VEC (tree, heap) **results) |
823 |
{ |
824 |
unsigned int length = VEC_length (tree, worklist); |
825 |
if (length % 2 == 1) |
826 |
{ |
827 |
VEC_safe_push (tree, heap, *results, VEC_pop (tree, worklist)); |
828 |
length--; |
829 |
} |
830 |
while (length != 0) |
831 |
{ |
832 |
tree oe1 = VEC_pop (tree, worklist); |
833 |
tree oe2 = VEC_pop (tree, worklist); |
834 |
length -= 2; |
835 |
VEC_safe_push (tree, heap, *results, merge_two_ops (opcode, oe1, oe2)); |
836 |
} |
837 |
VEC_free (tree, heap, worklist); |
838 |
|
839 |
} |
840 |
|
841 |
static void |
842 |
rewrite_expr_tree_new (tree stmt, enum tree_code opcode, |
843 |
VEC (operand_entry_t, heap) **ops) |
844 |
{ |
845 |
VEC(tree, heap) *initial_merge_worklist = NULL; |
846 |
VEC(tree, heap) *merge_worklist = NULL; |
847 |
VEC(tree, heap) *stmtsforsale = NULL; |
848 |
operand_entry_t oe; |
849 |
unsigned int opslength = VEC_length (operand_entry_t, *ops); |
850 |
|
851 |
if (opslength < 4) |
852 |
return; |
853 |
|
854 |
/* Initialize the worklist */ |
855 |
while (VEC_length (operand_entry_t, *ops) != 0) |
856 |
{ |
857 |
initial_merge_worklist = NULL; |
858 |
oe = VEC_pop (operand_entry_t, *ops); |
859 |
while (!VEC_empty (operand_entry_t, *ops) |
860 |
&& VEC_last (operand_entry_t, *ops)->rank == oe->rank) |
861 |
{ |
862 |
VEC_safe_push (tree, heap, initial_merge_worklist, oe->op); |
863 |
oe = VEC_pop (operand_entry_t, *ops); |
864 |
} |
865 |
VEC_safe_push (tree, heap, initial_merge_worklist, oe->op); |
866 |
merge_leaves_of_rank (opcode, initial_merge_worklist, &merge_worklist); |
867 |
} |
868 |
|
869 |
gcc_assert (VEC_length (tree, merge_worklist) <= opslength); |
870 |
|
871 |
while (VEC_length (tree, merge_worklist) != 0) |
872 |
add_to_ops_vec (ops, VEC_pop (tree, merge_worklist)); |
873 |
} |
874 |
#endif |
875 |
|
876 |
static bool |
877 |
is_phi_for_stmt (tree stmt, tree operand) |
878 |
{ |
879 |
tree def_stmt; |
880 |
tree lhs = TREE_OPERAND (stmt, 0); |
881 |
use_operand_p arg_p; |
882 |
ssa_op_iter i; |
883 |
|
884 |
if (TREE_CODE (operand) != SSA_NAME) |
885 |
return false; |
886 |
|
887 |
def_stmt = SSA_NAME_DEF_STMT (operand); |
888 |
if (TREE_CODE (def_stmt) != PHI_NODE) |
889 |
return false; |
890 |
|
891 |
FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE) |
892 |
if (lhs == USE_FROM_PTR (arg_p)) |
893 |
return true; |
894 |
return false; |
895 |
} |
896 |
|
897 |
/* Recursively rewrite our linearized statements so that the operators |
898 |
match those in OPS[OPINDEX], putting the computation in rank |
899 |
order. */ |
900 |
|
901 |
static void |
902 |
rewrite_expr_tree (tree stmt, unsigned int opindex, |
903 |
VEC(operand_entry_t, heap) * ops) |
904 |
{ |
905 |
tree rhs = TREE_OPERAND (stmt, 1); |
906 |
operand_entry_t oe; |
907 |
|
908 |
/* If we have three operands left, then we want to make sure the one |
909 |
that gets the double binary op are the ones with the same rank. |
910 |
|
911 |
The alternative we try is to see if this is a destructive |
912 |
update style statement, which is like: |
913 |
b = phi (a, ...) |
914 |
a = c + b; |
915 |
In that case, we want to use the destructive update form to |
916 |
expose the possible vectorizer sum reduction opportunity. |
917 |
In that case, the third operand will be the phi node. |
918 |
|
919 |
We could, of course, try to be better as noted above, and do a |
920 |
lot of work to try to find these opportunities in >3 operand |
921 |
cases, but it is unlikely to be worth it. */ |
922 |
if (opindex + 3 == VEC_length (operand_entry_t, ops)) |
923 |
{ |
924 |
operand_entry_t oe1, oe2, oe3; |
925 |
|
926 |
oe1 = VEC_index (operand_entry_t, ops, opindex); |
927 |
oe2 = VEC_index (operand_entry_t, ops, opindex + 1); |
928 |
oe3 = VEC_index (operand_entry_t, ops, opindex + 2); |
929 |
|
930 |
if ((oe1->rank == oe2->rank |
931 |
&& oe2->rank != oe3->rank) |
932 |
|| (is_phi_for_stmt (stmt, oe3->op) |
933 |
&& !is_phi_for_stmt (stmt, oe1->op) |
934 |
&& !is_phi_for_stmt (stmt, oe2->op))) |
935 |
{ |
936 |
struct operand_entry temp = *oe3; |
937 |
oe3->op = oe1->op; |
938 |
oe3->rank = oe1->rank; |
939 |
oe1->op = temp.op; |
940 |
oe1->rank= temp.rank; |
941 |
} |
942 |
} |
943 |
|
944 |
/* The final recursion case for this function is that you have |
945 |
exactly two operations left. |
946 |
If we had one exactly one op in the entire list to start with, we |
947 |
would have never called this function, and the tail recursion |
948 |
rewrites them one at a time. */ |
949 |
if (opindex + 2 == VEC_length (operand_entry_t, ops)) |
950 |
{ |
951 |
operand_entry_t oe1, oe2; |
952 |
|
953 |
oe1 = VEC_index (operand_entry_t, ops, opindex); |
954 |
oe2 = VEC_index (operand_entry_t, ops, opindex + 1); |
955 |
|
956 |
if (TREE_OPERAND (rhs, 0) != oe1->op |
957 |
|| TREE_OPERAND (rhs, 1) != oe2->op) |
958 |
{ |
959 |
|
960 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
961 |
{ |
962 |
fprintf (dump_file, "Transforming "); |
963 |
print_generic_expr (dump_file, rhs, 0); |
964 |
} |
965 |
|
966 |
TREE_OPERAND (rhs, 0) = oe1->op; |
967 |
TREE_OPERAND (rhs, 1) = oe2->op; |
968 |
update_stmt (stmt); |
969 |
|
970 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
971 |
{ |
972 |
fprintf (dump_file, " into "); |
973 |
print_generic_stmt (dump_file, rhs, 0); |
974 |
} |
975 |
|
976 |
} |
977 |
return; |
978 |
} |
979 |
|
980 |
/* If we hit here, we should have 3 or more ops left. */ |
981 |
gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops)); |
982 |
|
983 |
/* Rewrite the next operator. */ |
984 |
oe = VEC_index (operand_entry_t, ops, opindex); |
985 |
|
986 |
if (oe->op != TREE_OPERAND (rhs, 1)) |
987 |
{ |
988 |
|
989 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
990 |
{ |
991 |
fprintf (dump_file, "Transforming "); |
992 |
print_generic_expr (dump_file, rhs, 0); |
993 |
} |
994 |
|
995 |
TREE_OPERAND (rhs, 1) = oe->op; |
996 |
update_stmt (stmt); |
997 |
|
998 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
999 |
{ |
1000 |
fprintf (dump_file, " into "); |
1001 |
print_generic_stmt (dump_file, rhs, 0); |
1002 |
} |
1003 |
} |
1004 |
/* Recurse on the LHS of the binary operator, which is guaranteed to |
1005 |
be the non-leaf side. */ |
1006 |
rewrite_expr_tree (SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0)), |
1007 |
opindex + 1, ops); |
1008 |
} |
1009 |
|
1010 |
/* Transform STMT, which is really (A +B) + (C + D) into the left |
1011 |
linear form, ((A+B)+C)+D. |
1012 |
Recurse on D if necessary. */ |
1013 |
|
1014 |
static void |
1015 |
linearize_expr (tree stmt) |
1016 |
{ |
1017 |
block_stmt_iterator bsinow, bsirhs; |
1018 |
tree rhs = TREE_OPERAND (stmt, 1); |
1019 |
enum tree_code rhscode = TREE_CODE (rhs); |
1020 |
tree binrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1)); |
1021 |
tree binlhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0)); |
1022 |
tree newbinrhs = NULL_TREE; |
1023 |
|
1024 |
gcc_assert (is_reassociable_op (binlhs, TREE_CODE (rhs)) |
1025 |
&& is_reassociable_op (binrhs, TREE_CODE (rhs))); |
1026 |
|
1027 |
bsinow = bsi_for_stmt (stmt); |
1028 |
bsirhs = bsi_for_stmt (binrhs); |
1029 |
bsi_move_before (&bsirhs, &bsinow); |
1030 |
|
1031 |
TREE_OPERAND (rhs, 1) = TREE_OPERAND (TREE_OPERAND (binrhs, 1), 0); |
1032 |
if (TREE_CODE (TREE_OPERAND (rhs, 1)) == SSA_NAME) |
1033 |
newbinrhs = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 1)); |
1034 |
TREE_OPERAND (TREE_OPERAND (binrhs, 1), 0) = TREE_OPERAND (binlhs, 0); |
1035 |
TREE_OPERAND (rhs, 0) = TREE_OPERAND (binrhs, 0); |
1036 |
|
1037 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
1038 |
{ |
1039 |
fprintf (dump_file, "Linearized: "); |
1040 |
print_generic_stmt (dump_file, rhs, 0); |
1041 |
} |
1042 |
|
1043 |
reassociate_stats.linearized++; |
1044 |
update_stmt (binrhs); |
1045 |
update_stmt (binlhs); |
1046 |
update_stmt (stmt); |
1047 |
TREE_VISITED (binrhs) = 1; |
1048 |
TREE_VISITED (binlhs) = 1; |
1049 |
TREE_VISITED (stmt) = 1; |
1050 |
|
1051 |
/* Tail recurse on the new rhs if it still needs reassociation. */ |
1052 |
if (newbinrhs && is_reassociable_op (newbinrhs, rhscode)) |
1053 |
linearize_expr (stmt); |
1054 |
|
1055 |
} |
1056 |
|
1057 |
/* If LHS has a single immediate use that is a MODIFY_EXPR, return |
1058 |
it. Otherwise, return NULL. */ |
1059 |
|
1060 |
static tree |
1061 |
get_single_immediate_use (tree lhs) |
1062 |
{ |
1063 |
use_operand_p immuse; |
1064 |
tree immusestmt; |
1065 |
|
1066 |
if (TREE_CODE (lhs) == SSA_NAME |
1067 |
&& single_imm_use (lhs, &immuse, &immusestmt)) |
1068 |
{ |
1069 |
if (TREE_CODE (immusestmt) == RETURN_EXPR) |
1070 |
immusestmt = TREE_OPERAND (immusestmt, 0); |
1071 |
if (TREE_CODE (immusestmt) == MODIFY_EXPR) |
1072 |
return immusestmt; |
1073 |
} |
1074 |
return NULL_TREE; |
1075 |
} |
1076 |
static VEC(tree, heap) *broken_up_subtracts; |
1077 |
|
1078 |
|
1079 |
/* Recursively negate the value of TONEGATE, and return the SSA_NAME |
1080 |
representing the negated value. Insertions of any necessary |
1081 |
instructions go before BSI. |
1082 |
This function is recursive in that, if you hand it "a_5" as the |
1083 |
value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will |
1084 |
transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ |
1085 |
|
1086 |
static tree |
1087 |
negate_value (tree tonegate, block_stmt_iterator *bsi) |
1088 |
{ |
1089 |
tree negatedef = tonegate; |
1090 |
tree resultofnegate; |
1091 |
|
1092 |
if (TREE_CODE (tonegate) == SSA_NAME) |
1093 |
negatedef = SSA_NAME_DEF_STMT (tonegate); |
1094 |
|
1095 |
/* If we are trying to negate a name, defined by an add, negate the |
1096 |
add operands instead. */ |
1097 |
if (TREE_CODE (tonegate) == SSA_NAME |
1098 |
&& TREE_CODE (negatedef) == MODIFY_EXPR |
1099 |
&& TREE_CODE (TREE_OPERAND (negatedef, 0)) == SSA_NAME |
1100 |
&& num_imm_uses (TREE_OPERAND (negatedef, 0)) == 1 |
1101 |
&& TREE_CODE (TREE_OPERAND (negatedef, 1)) == PLUS_EXPR) |
1102 |
{ |
1103 |
block_stmt_iterator bsi; |
1104 |
tree binop = TREE_OPERAND (negatedef, 1); |
1105 |
|
1106 |
bsi = bsi_for_stmt (negatedef); |
1107 |
TREE_OPERAND (binop, 0) = negate_value (TREE_OPERAND (binop, 0), |
1108 |
&bsi); |
1109 |
bsi = bsi_for_stmt (negatedef); |
1110 |
TREE_OPERAND (binop, 1) = negate_value (TREE_OPERAND (binop, 1), |
1111 |
&bsi); |
1112 |
update_stmt (negatedef); |
1113 |
return TREE_OPERAND (negatedef, 0); |
1114 |
} |
1115 |
|
1116 |
tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); |
1117 |
resultofnegate = force_gimple_operand_bsi (bsi, tonegate, true, |
1118 |
NULL_TREE); |
1119 |
VEC_safe_push (tree, heap, broken_up_subtracts, resultofnegate); |
1120 |
return resultofnegate; |
1121 |
|
1122 |
} |
1123 |
|
1124 |
/* Return true if we should break up the subtract in STMT into an add |
1125 |
with negate. This is true when we the subtract operands are really |
1126 |
adds, or the subtract itself is used in an add expression. In |
1127 |
either case, breaking up the subtract into an add with negate |
1128 |
exposes the adds to reassociation. */ |
1129 |
|
1130 |
static bool |
1131 |
should_break_up_subtract (tree stmt) |
1132 |
{ |
1133 |
|
1134 |
tree lhs = TREE_OPERAND (stmt, 0); |
1135 |
tree rhs = TREE_OPERAND (stmt, 1); |
1136 |
tree binlhs = TREE_OPERAND (rhs, 0); |
1137 |
tree binrhs = TREE_OPERAND (rhs, 1); |
1138 |
tree immusestmt; |
1139 |
|
1140 |
if (TREE_CODE (binlhs) == SSA_NAME |
1141 |
&& is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR)) |
1142 |
return true; |
1143 |
|
1144 |
if (TREE_CODE (binrhs) == SSA_NAME |
1145 |
&& is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR)) |
1146 |
return true; |
1147 |
|
1148 |
if (TREE_CODE (lhs) == SSA_NAME |
1149 |
&& (immusestmt = get_single_immediate_use (lhs)) |
1150 |
&& TREE_CODE (TREE_OPERAND (immusestmt, 1)) == PLUS_EXPR) |
1151 |
return true; |
1152 |
return false; |
1153 |
|
1154 |
} |
1155 |
|
1156 |
/* Transform STMT from A - B into A + -B. */ |
1157 |
|
1158 |
static void |
1159 |
break_up_subtract (tree stmt, block_stmt_iterator *bsi) |
1160 |
{ |
1161 |
tree rhs = TREE_OPERAND (stmt, 1); |
1162 |
|
1163 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
1164 |
{ |
1165 |
fprintf (dump_file, "Breaking up subtract "); |
1166 |
print_generic_stmt (dump_file, stmt, 0); |
1167 |
} |
1168 |
|
1169 |
TREE_SET_CODE (TREE_OPERAND (stmt, 1), PLUS_EXPR); |
1170 |
TREE_OPERAND (rhs, 1) = negate_value (TREE_OPERAND (rhs, 1), bsi); |
1171 |
|
1172 |
update_stmt (stmt); |
1173 |
} |
1174 |
|
1175 |
/* Recursively linearize a binary expression that is the RHS of STMT. |
1176 |
Place the operands of the expression tree in the vector named OPS. */ |
1177 |
|
1178 |
static void |
1179 |
linearize_expr_tree (VEC(operand_entry_t, heap) **ops, tree stmt) |
1180 |
{ |
1181 |
block_stmt_iterator bsinow, bsilhs; |
1182 |
tree rhs = TREE_OPERAND (stmt, 1); |
1183 |
tree binrhs = TREE_OPERAND (rhs, 1); |
1184 |
tree binlhs = TREE_OPERAND (rhs, 0); |
1185 |
tree binlhsdef, binrhsdef; |
1186 |
bool binlhsisreassoc = false; |
1187 |
bool binrhsisreassoc = false; |
1188 |
enum tree_code rhscode = TREE_CODE (rhs); |
1189 |
|
1190 |
TREE_VISITED (stmt) = 1; |
1191 |
|
1192 |
if (TREE_CODE (binlhs) == SSA_NAME) |
1193 |
{ |
1194 |
binlhsdef = SSA_NAME_DEF_STMT (binlhs); |
1195 |
binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode); |
1196 |
} |
1197 |
|
1198 |
if (TREE_CODE (binrhs) == SSA_NAME) |
1199 |
{ |
1200 |
binrhsdef = SSA_NAME_DEF_STMT (binrhs); |
1201 |
binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode); |
1202 |
} |
1203 |
|
1204 |
/* If the LHS is not reassociable, but the RHS is, we need to swap |
1205 |
them. If neither is reassociable, there is nothing we can do, so |
1206 |
just put them in the ops vector. If the LHS is reassociable, |
1207 |
linearize it. If both are reassociable, then linearize the RHS |
1208 |
and the LHS. */ |
1209 |
|
1210 |
if (!binlhsisreassoc) |
1211 |
{ |
1212 |
tree temp; |
1213 |
|
1214 |
if (!binrhsisreassoc) |
1215 |
{ |
1216 |
add_to_ops_vec (ops, binrhs); |
1217 |
add_to_ops_vec (ops, binlhs); |
1218 |
return; |
1219 |
} |
1220 |
|
1221 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
1222 |
{ |
1223 |
fprintf (dump_file, "swapping operands of "); |
1224 |
print_generic_expr (dump_file, stmt, 0); |
1225 |
} |
1226 |
|
1227 |
swap_tree_operands (stmt, &TREE_OPERAND (rhs, 0), |
1228 |
&TREE_OPERAND (rhs, 1)); |
1229 |
update_stmt (stmt); |
1230 |
|
1231 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
1232 |
{ |
1233 |
fprintf (dump_file, " is now "); |
1234 |
print_generic_stmt (dump_file, stmt, 0); |
1235 |
} |
1236 |
|
1237 |
/* We want to make it so the lhs is always the reassociative op, |
1238 |
so swap. */ |
1239 |
temp = binlhs; |
1240 |
binlhs = binrhs; |
1241 |
binrhs = temp; |
1242 |
} |
1243 |
else if (binrhsisreassoc) |
1244 |
{ |
1245 |
linearize_expr (stmt); |
1246 |
gcc_assert (rhs == TREE_OPERAND (stmt, 1)); |
1247 |
binlhs = TREE_OPERAND (rhs, 0); |
1248 |
binrhs = TREE_OPERAND (rhs, 1); |
1249 |
} |
1250 |
|
1251 |
gcc_assert (TREE_CODE (binrhs) != SSA_NAME |
1252 |
|| !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), rhscode)); |
1253 |
bsinow = bsi_for_stmt (stmt); |
1254 |
bsilhs = bsi_for_stmt (SSA_NAME_DEF_STMT (binlhs)); |
1255 |
bsi_move_before (&bsilhs, &bsinow); |
1256 |
linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs)); |
1257 |
add_to_ops_vec (ops, binrhs); |
1258 |
} |
1259 |
|
1260 |
/* Repropagate the negates back into subtracts, since no other pass |
1261 |
currently does it. */ |
1262 |
|
1263 |
static void |
1264 |
repropagate_negates (void) |
1265 |
{ |
1266 |
unsigned int i = 0; |
1267 |
tree negate; |
1268 |
|
1269 |
for (i = 0; VEC_iterate (tree, broken_up_subtracts, i, negate); i++) |
1270 |
{ |
1271 |
tree user = get_single_immediate_use (negate); |
1272 |
|
1273 |
/* Due to linearization, the negate operand should now be an RHS |
1274 |
leaf of some PLUS expression. I.E. |
1275 |
|
1276 |
d = -c |
1277 |
e = a + d |
1278 |
|
1279 |
So just repropagate it, transforming the PLUS_EXPR back into |
1280 |
a MINUS_EXPR. */ |
1281 |
|
1282 |
if (user |
1283 |
&& TREE_CODE (user) == MODIFY_EXPR |
1284 |
&& TREE_CODE (TREE_OPERAND (user, 1)) == PLUS_EXPR |
1285 |
&& TREE_OPERAND (TREE_OPERAND (user, 1), 1) == negate) |
1286 |
{ |
1287 |
tree rhs = TREE_OPERAND (user, 1); |
1288 |
TREE_SET_CODE (rhs, MINUS_EXPR); |
1289 |
TREE_OPERAND (rhs, 1) = get_unary_op (negate, NEGATE_EXPR); |
1290 |
update_stmt (user); |
1291 |
} |
1292 |
} |
1293 |
} |
1294 |
|
1295 |
/* Break up subtract operations in block BB. |
1296 |
|
1297 |
We do this top down because we don't know whether the subtract is |
1298 |
part of a possible chain of reassociation except at the top. |
1299 |
|
1300 |
IE given |
1301 |
d = f + g |
1302 |
c = a + e |
1303 |
b = c - d |
1304 |
q = b - r |
1305 |
k = t - q |
1306 |
|
1307 |
we want to break up k = t - q, but we won't until we've transformed q |
1308 |
= b - r, which won't be broken up until we transform b = c - d. */ |
1309 |
|
1310 |
static void |
1311 |
break_up_subtract_bb (basic_block bb) |
1312 |
{ |
1313 |
block_stmt_iterator bsi; |
1314 |
basic_block son; |
1315 |
|
1316 |
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) |
1317 |
{ |
1318 |
tree stmt = bsi_stmt (bsi); |
1319 |
|
1320 |
if (TREE_CODE (stmt) == MODIFY_EXPR) |
1321 |
{ |
1322 |
tree lhs = TREE_OPERAND (stmt, 0); |
1323 |
tree rhs = TREE_OPERAND (stmt, 1); |
1324 |
|
1325 |
TREE_VISITED (stmt) = 0; |
1326 |
/* If unsafe math optimizations we can do reassociation for |
1327 |
non-integral types. */ |
1328 |
if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
1329 |
|| !INTEGRAL_TYPE_P (TREE_TYPE (rhs))) |
1330 |
&& (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs)) |
1331 |
|| !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs)) |
1332 |
|| !flag_unsafe_math_optimizations)) |
1333 |
continue; |
1334 |
|
1335 |
/* Check for a subtract used only in an addition. If this |
1336 |
is the case, transform it into add of a negate for better |
1337 |
reassociation. IE transform C = A-B into C = A + -B if C |
1338 |
is only used in an addition. */ |
1339 |
if (TREE_CODE (rhs) == MINUS_EXPR) |
1340 |
if (should_break_up_subtract (stmt)) |
1341 |
break_up_subtract (stmt, &bsi); |
1342 |
} |
1343 |
} |
1344 |
for (son = first_dom_son (CDI_DOMINATORS, bb); |
1345 |
son; |
1346 |
son = next_dom_son (CDI_DOMINATORS, son)) |
1347 |
break_up_subtract_bb (son); |
1348 |
} |
1349 |
|
1350 |
/* Reassociate expressions in basic block BB and its post-dominator as |
1351 |
children. */ |
1352 |
|
1353 |
static void |
1354 |
reassociate_bb (basic_block bb) |
1355 |
{ |
1356 |
block_stmt_iterator bsi; |
1357 |
basic_block son; |
1358 |
|
1359 |
for (bsi = bsi_last (bb); !bsi_end_p (bsi); bsi_prev (&bsi)) |
1360 |
{ |
1361 |
tree stmt = bsi_stmt (bsi); |
1362 |
|
1363 |
if (TREE_CODE (stmt) == MODIFY_EXPR) |
1364 |
{ |
1365 |
tree lhs = TREE_OPERAND (stmt, 0); |
1366 |
tree rhs = TREE_OPERAND (stmt, 1); |
1367 |
|
1368 |
/* If this was part of an already processed tree, we don't |
1369 |
need to touch it again. */ |
1370 |
if (TREE_VISITED (stmt)) |
1371 |
continue; |
1372 |
|
1373 |
/* If unsafe math optimizations we can do reassociation for |
1374 |
non-integral types. */ |
1375 |
if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
1376 |
|| !INTEGRAL_TYPE_P (TREE_TYPE (rhs))) |
1377 |
&& (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs)) |
1378 |
|| !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs)) |
1379 |
|| !flag_unsafe_math_optimizations)) |
1380 |
continue; |
1381 |
|
1382 |
if (associative_tree_code (TREE_CODE (rhs))) |
1383 |
{ |
1384 |
VEC(operand_entry_t, heap) *ops = NULL; |
1385 |
|
1386 |
/* There may be no immediate uses left by the time we |
1387 |
get here because we may have eliminated them all. */ |
1388 |
if (TREE_CODE (lhs) == SSA_NAME && num_imm_uses (lhs) == 0) |
1389 |
continue; |
1390 |
|
1391 |
TREE_VISITED (stmt) = 1; |
1392 |
linearize_expr_tree (&ops, stmt); |
1393 |
qsort (VEC_address (operand_entry_t, ops), |
1394 |
VEC_length (operand_entry_t, ops), |
1395 |
sizeof (operand_entry_t), |
1396 |
sort_by_operand_rank); |
1397 |
optimize_ops_list (TREE_CODE (rhs), &ops); |
1398 |
|
1399 |
if (VEC_length (operand_entry_t, ops) == 1) |
1400 |
{ |
1401 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
1402 |
{ |
1403 |
fprintf (dump_file, "Transforming "); |
1404 |
print_generic_expr (dump_file, rhs, 0); |
1405 |
} |
1406 |
TREE_OPERAND (stmt, 1) = VEC_last (operand_entry_t, ops)->op; |
1407 |
update_stmt (stmt); |
1408 |
|
1409 |
if (dump_file && (dump_flags & TDF_DETAILS)) |
1410 |
{ |
1411 |
fprintf (dump_file, " into "); |
1412 |
print_generic_stmt (dump_file, |
1413 |
TREE_OPERAND (stmt, 1), 0); |
1414 |
} |
1415 |
} |
1416 |
else |
1417 |
{ |
1418 |
/*rewrite_expr_tree_new (stmt, TREE_CODE (rhs), &ops);*/ |
1419 |
rewrite_expr_tree (stmt, 0, ops); |
1420 |
} |
1421 |
|
1422 |
VEC_free (operand_entry_t, heap, ops); |
1423 |
} |
1424 |
} |
1425 |
} |
1426 |
for (son = first_dom_son (CDI_POST_DOMINATORS, bb); |
1427 |
son; |
1428 |
son = next_dom_son (CDI_POST_DOMINATORS, son)) |
1429 |
reassociate_bb (son); |
1430 |
} |
1431 |
|
1432 |
void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops); |
1433 |
void debug_ops_vector (VEC (operand_entry_t, heap) *ops); |
1434 |
|
1435 |
/* Dump the operand entry vector OPS to FILE. */ |
1436 |
|
1437 |
void |
1438 |
dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops) |
1439 |
{ |
1440 |
operand_entry_t oe; |
1441 |
unsigned int i; |
1442 |
|
1443 |
for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++) |
1444 |
{ |
1445 |
fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); |
1446 |
print_generic_stmt (file, oe->op, 0); |
1447 |
} |
1448 |
} |
1449 |
|
1450 |
/* Dump the operand entry vector OPS to STDERR. */ |
1451 |
|
1452 |
void |
1453 |
debug_ops_vector (VEC (operand_entry_t, heap) *ops) |
1454 |
{ |
1455 |
dump_ops_vector (stderr, ops); |
1456 |
} |
1457 |
|
1458 |
static void |
1459 |
do_reassoc (void) |
1460 |
{ |
1461 |
break_up_subtract_bb (ENTRY_BLOCK_PTR); |
1462 |
reassociate_bb (EXIT_BLOCK_PTR); |
1463 |
} |
1464 |
|
1465 |
/* Initialize the reassociation pass. */ |
1466 |
|
1467 |
static void |
1468 |
init_reassoc (void) |
1469 |
{ |
1470 |
int i; |
1471 |
unsigned int rank = 2; |
1472 |
tree param; |
1473 |
int *bbs = xmalloc ((last_basic_block + 1) * sizeof (int)); |
1474 |
|
1475 |
memset (&reassociate_stats, 0, sizeof (reassociate_stats)); |
1476 |
|
1477 |
operand_entry_pool = create_alloc_pool ("operand entry pool", |
1478 |
sizeof (struct operand_entry), 30); |
1479 |
|
1480 |
/* Reverse RPO (Reverse Post Order) will give us something where |
1481 |
deeper loops come later. */ |
1482 |
flow_depth_first_order_compute (NULL, bbs); |
1483 |
bb_rank = xcalloc (last_basic_block + 1, sizeof (unsigned int)); |
1484 |
|
1485 |
operand_rank = htab_create (511, operand_entry_hash, |
1486 |
operand_entry_eq, 0); |
1487 |
|
1488 |
/* Give each argument a distinct rank. */ |
1489 |
for (param = DECL_ARGUMENTS (current_function_decl); |
1490 |
param; |
1491 |
param = TREE_CHAIN (param)) |
1492 |
{ |
1493 |
if (default_def (param) != NULL) |
1494 |
{ |
1495 |
tree def = default_def (param); |
1496 |
insert_operand_rank (def, ++rank); |
1497 |
} |
1498 |
} |
1499 |
|
1500 |
/* Give the chain decl a distinct rank. */ |
1501 |
if (cfun->static_chain_decl != NULL) |
1502 |
{ |
1503 |
tree def = default_def (cfun->static_chain_decl); |
1504 |
if (def != NULL) |
1505 |
insert_operand_rank (def, ++rank); |
1506 |
} |
1507 |
|
1508 |
/* Set up rank for each BB */ |
1509 |
for (i = 0; i < n_basic_blocks; i++) |
1510 |
bb_rank[bbs[i]] = ++rank << 16; |
1511 |
|
1512 |
free (bbs); |
1513 |
calculate_dominance_info (CDI_DOMINATORS); |
1514 |
calculate_dominance_info (CDI_POST_DOMINATORS); |
1515 |
broken_up_subtracts = NULL; |
1516 |
} |
1517 |
|
1518 |
/* Cleanup after the reassociation pass, and print stats if |
1519 |
requested. */ |
1520 |
|
1521 |
static void |
1522 |
fini_reassoc (void) |
1523 |
{ |
1524 |
|
1525 |
if (dump_file && (dump_flags & TDF_STATS)) |
1526 |
{ |
1527 |
fprintf (dump_file, "Reassociation stats:\n"); |
1528 |
fprintf (dump_file, "Linearized: %d\n", reassociate_stats.linearized); |
1529 |
fprintf (dump_file, "Constants eliminated: %d\n", |
1530 |
reassociate_stats.constants_eliminated); |
1531 |
fprintf (dump_file, "Ops eliminated: %d\n", |
1532 |
reassociate_stats.ops_eliminated); |
1533 |
fprintf (dump_file, "Statements rewritten: %d\n", |
1534 |
reassociate_stats.rewritten); |
1535 |
} |
1536 |
htab_delete (operand_rank); |
1537 |
|
1538 |
free_alloc_pool (operand_entry_pool); |
1539 |
free (bb_rank); |
1540 |
VEC_free (tree, heap, broken_up_subtracts); |
1541 |
} |
1542 |
|
1543 |
/* Gate and execute functions for Reassociation. */ |
1544 |
|
1545 |
static void |
1546 |
execute_reassoc (void) |
1547 |
{ |
1548 |
init_reassoc (); |
1549 |
|
1550 |
do_reassoc (); |
1551 |
repropagate_negates (); |
1552 |
|
622 |
fini_reassoc (); |
1553 |
fini_reassoc (); |
623 |
} |
1554 |
} |
624 |
|
1555 |
|
Lines 635-641
struct tree_opt_pass pass_reassoc =
Link Here
|
635 |
0, /* properties_provided */ |
1566 |
0, /* properties_provided */ |
636 |
0, /* properties_destroyed */ |
1567 |
0, /* properties_destroyed */ |
637 |
0, /* todo_flags_start */ |
1568 |
0, /* todo_flags_start */ |
638 |
TODO_update_ssa | TODO_dump_func |
1569 |
TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */ |
639 |
| TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */ |
|
|
640 |
0 /* letter */ |
1570 |
0 /* letter */ |
641 |
}; |
1571 |
}; |