1/* Loop invariant motion.
2 Copyright (C) 2003-2026 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "tree.h"
25#include "gimple.h"
26#include "cfghooks.h"
27#include "tree-pass.h"
28#include "ssa.h"
29#include "gimple-pretty-print.h"
30#include "fold-const.h"
31#include "cfganal.h"
32#include "tree-eh.h"
33#include "gimplify.h"
34#include "gimple-iterator.h"
35#include "tree-cfg.h"
36#include "tree-ssa-loop-manip.h"
37#include "tree-ssa-loop.h"
38#include "tree-into-ssa.h"
39#include "cfgloop.h"
40#include "tree-affine.h"
41#include "tree-ssa-propagate.h"
42#include "trans-mem.h"
43#include "gimple-fold.h"
44#include "tree-scalar-evolution.h"
45#include "tree-ssa-loop-niter.h"
46#include "alias.h"
47#include "builtins.h"
48#include "tree-dfa.h"
49#include "tree-ssa.h"
50#include "dbgcnt.h"
51#include "insn-codes.h"
52#include "optabs-tree.h"
53
54/* TODO: Support for predicated code motion. I.e.
55
56 while (1)
57 {
58 if (cond)
59 {
60 a = inv;
61 something;
62 }
63 }
64
65 Where COND and INV are invariants, but evaluating INV may trap or be
66 invalid from some other reason if !COND. This may be transformed to
67
68 if (cond)
69 a = inv;
70 while (1)
71 {
72 if (cond)
73 something;
74 } */
75
76/* The auxiliary data kept for each statement. */
77
78struct lim_aux_data
79{
80 class loop *max_loop; /* The outermost loop in that the statement
81 is invariant. */
82
83 class loop *tgt_loop; /* The loop out of that we want to move the
84 invariant. */
85
86 class loop *always_executed_in;
87 /* The outermost loop for that we are sure
88 the statement is executed if the loop
89 is entered. */
90
91 unsigned cost; /* Cost of the computation performed by the
92 statement. */
93
94 unsigned ref; /* The simple_mem_ref in this stmt or 0. */
95
96 vec<gimple *> depends; /* Vector of statements that must be also
97 hoisted out of the loop when this statement
98 is hoisted; i.e. those that define the
99 operands of the statement and are inside of
100 the MAX_LOOP loop. */
101};
102
103/* Maps statements to their lim_aux_data. */
104
105static hash_map<gimple *, lim_aux_data *> *lim_aux_data_map;
106
107/* Description of a memory reference location. */
108
109struct mem_ref_loc
110{
111 tree *ref; /* The reference itself. */
112 gimple *stmt; /* The statement in that it occurs. */
113};
114
115
116/* Description of a memory reference. */
117
118class im_mem_ref
119{
120public:
121 unsigned id : 30; /* ID assigned to the memory reference
122 (its index in memory_accesses.refs_list) */
123 unsigned ref_canonical : 1; /* Whether mem.ref was canonicalized. */
124 unsigned ref_decomposed : 1; /* Whether the ref was hashed from mem. */
125 hashval_t hash; /* Its hash value. */
126
127 /* The memory access itself and associated caching of alias-oracle
128 query meta-data. We are using mem.ref == error_mark_node for the
129 case the reference is represented by its single access stmt
130 in accesses_in_loop[0]. */
131 ao_ref mem;
132
133 bitmap stored; /* The set of loops in that this memory location
134 is stored to. */
135 bitmap loaded; /* The set of loops in that this memory location
136 is loaded from. */
137 vec<mem_ref_loc> accesses_in_loop;
138 /* The locations of the accesses. */
139
140 /* The following set is computed on demand. */
141 bitmap_head dep_loop; /* The set of loops in that the memory
142 reference is {in,}dependent in
143 different modes. */
144};
145
146static bool in_loop_pipeline;
147
148/* We use six bits per loop in the ref->dep_loop bitmap to record
149 the dep_kind x dep_state combinations. */
150
151enum dep_kind { lim_raw, sm_war, sm_waw };
152enum dep_state { dep_unknown, dep_independent, dep_dependent };
153
154/* coldest outermost loop for given loop. */
155vec<class loop *> coldest_outermost_loop;
156/* hotter outer loop nearest to given loop. */
157vec<class loop *> hotter_than_inner_loop;
158
159/* Populate the loop dependence cache of REF for LOOP, KIND with STATE. */
160
161static void
162record_loop_dependence (class loop *loop, im_mem_ref *ref,
163 dep_kind kind, dep_state state)
164{
165 gcc_assert (state != dep_unknown);
166 unsigned bit = 6 * loop->num + kind * 2 + state == dep_dependent ? 1 : 0;
167 bitmap_set_bit (&ref->dep_loop, bit);
168}
169
170/* Query the loop dependence cache of REF for LOOP, KIND. */
171
172static dep_state
173query_loop_dependence (class loop *loop, im_mem_ref *ref, dep_kind kind)
174{
175 unsigned first_bit = 6 * loop->num + kind * 2;
176 if (bitmap_bit_p (&ref->dep_loop, first_bit))
177 return dep_independent;
178 else if (bitmap_bit_p (&ref->dep_loop, first_bit + 1))
179 return dep_dependent;
180 return dep_unknown;
181}
182
183/* Mem_ref hashtable helpers. */
184
185struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
186{
187 typedef ao_ref *compare_type;
188 static inline hashval_t hash (const im_mem_ref *);
189 static inline bool equal (const im_mem_ref *, const ao_ref *);
190};
191
192/* A hash function for class im_mem_ref object OBJ. */
193
194inline hashval_t
195mem_ref_hasher::hash (const im_mem_ref *mem)
196{
197 return mem->hash;
198}
199
200/* An equality function for class im_mem_ref object MEM1 with
201 memory reference OBJ2. */
202
203inline bool
204mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
205{
206 if (obj2->max_size_known_p ())
207 return (mem1->ref_decomposed
208 && ((TREE_CODE (mem1->mem.base) == MEM_REF
209 && TREE_CODE (obj2->base) == MEM_REF
210 && operand_equal_p (TREE_OPERAND (mem1->mem.base, 0),
211 TREE_OPERAND (obj2->base, 0), flags: 0)
212 && known_eq (mem_ref_offset (mem1->mem.base) * BITS_PER_UNIT + mem1->mem.offset,
213 mem_ref_offset (obj2->base) * BITS_PER_UNIT + obj2->offset))
214 || (operand_equal_p (mem1->mem.base, obj2->base, flags: 0)
215 && known_eq (mem1->mem.offset, obj2->offset)))
216 && known_eq (mem1->mem.size, obj2->size)
217 && known_eq (mem1->mem.max_size, obj2->max_size)
218 && mem1->mem.volatile_p == obj2->volatile_p
219 && (mem1->mem.ref_alias_set == obj2->ref_alias_set
220 /* We are not canonicalizing alias-sets but for the
221 special-case we didn't canonicalize yet and the
222 incoming ref is a alias-set zero MEM we pick
223 the correct one already. */
224 || (!mem1->ref_canonical
225 && (TREE_CODE (obj2->ref) == MEM_REF
226 || TREE_CODE (obj2->ref) == TARGET_MEM_REF)
227 && obj2->ref_alias_set == 0)
228 /* Likewise if there's a canonical ref with alias-set zero. */
229 || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0))
230 && types_compatible_p (TREE_TYPE (mem1->mem.ref),
231 TREE_TYPE (obj2->ref)));
232 else
233 return operand_equal_p (mem1->mem.ref, obj2->ref, flags: 0);
234}
235
236
237/* Description of memory accesses in loops. */
238
239static struct
240{
241 /* The hash table of memory references accessed in loops. */
242 hash_table<mem_ref_hasher> *refs;
243
244 /* The list of memory references. */
245 vec<im_mem_ref *> refs_list;
246
247 /* The set of memory references accessed in each loop. */
248 vec<bitmap_head> refs_loaded_in_loop;
249
250 /* The set of memory references stored in each loop. */
251 vec<bitmap_head> refs_stored_in_loop;
252
253 /* The set of memory references stored in each loop, including subloops . */
254 vec<bitmap_head> all_refs_stored_in_loop;
255
256 /* Cache for expanding memory addresses. */
257 hash_map<tree, name_expansion *> *ttae_cache;
258} memory_accesses;
259
260/* Obstack for the bitmaps in the above data structures. */
261static bitmap_obstack lim_bitmap_obstack;
262static obstack mem_ref_obstack;
263
264static bool ref_indep_loop_p (class loop *, im_mem_ref *, dep_kind);
265static bool ref_always_accessed_p (class loop *, im_mem_ref *, bool);
266static bool refs_independent_p (im_mem_ref *, im_mem_ref *, bool = true);
267
268/* Minimum cost of an expensive expression. */
269#define LIM_EXPENSIVE ((unsigned) param_lim_expensive)
270
271/* The outermost loop for which execution of the header guarantees that the
272 block will be executed. */
273#define ALWAYS_EXECUTED_IN(BB) ((class loop *) (BB)->aux)
274#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
275
276/* ID of the shared unanalyzable mem. */
277#define UNANALYZABLE_MEM_ID 0
278
279/* Whether the reference was analyzable. */
280#define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
281
282static struct lim_aux_data *
283init_lim_data (gimple *stmt)
284{
285 lim_aux_data *p = XCNEW (struct lim_aux_data);
286 lim_aux_data_map->put (k: stmt, v: p);
287
288 return p;
289}
290
291static struct lim_aux_data *
292get_lim_data (gimple *stmt)
293{
294 lim_aux_data **p = lim_aux_data_map->get (k: stmt);
295 if (!p)
296 return NULL;
297
298 return *p;
299}
300
301/* Releases the memory occupied by DATA. */
302
303static void
304free_lim_aux_data (struct lim_aux_data *data)
305{
306 data->depends.release ();
307 free (ptr: data);
308}
309
310static void
311clear_lim_data (gimple *stmt)
312{
313 lim_aux_data **p = lim_aux_data_map->get (k: stmt);
314 if (!p)
315 return;
316
317 free_lim_aux_data (data: *p);
318 *p = NULL;
319}
320
321
322/* The possibilities of statement movement. */
323enum move_pos
324 {
325 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
326 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
327 become executed -- memory accesses, ... */
328 MOVE_POSSIBLE /* Unlimited movement. */
329 };
330
331
332/* If it is possible to hoist the statement STMT unconditionally,
333 returns MOVE_POSSIBLE.
334 If it is possible to hoist the statement STMT, but we must avoid making
335 it executed if it would not be executed in the original program (e.g.
336 because it may trap), return MOVE_PRESERVE_EXECUTION.
337 Otherwise return MOVE_IMPOSSIBLE. */
338
339static enum move_pos
340movement_possibility_1 (gimple *stmt)
341{
342 tree lhs;
343 enum move_pos ret = MOVE_POSSIBLE;
344
345 if (flag_unswitch_loops
346 && gimple_code (g: stmt) == GIMPLE_COND)
347 {
348 /* If we perform unswitching, force the operands of the invariant
349 condition to be moved out of the loop. */
350 return MOVE_POSSIBLE;
351 }
352
353 if (gimple_code (g: stmt) == GIMPLE_PHI
354 && gimple_phi_num_args (gs: stmt) <= 2
355 && !virtual_operand_p (op: gimple_phi_result (gs: stmt))
356 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
357 return MOVE_POSSIBLE;
358
359 if (gimple_get_lhs (stmt) == NULL_TREE)
360 return MOVE_IMPOSSIBLE;
361
362 if (gimple_vdef (g: stmt))
363 return MOVE_IMPOSSIBLE;
364
365 if (stmt_ends_bb_p (stmt)
366 || gimple_has_volatile_ops (stmt)
367 || gimple_has_side_effects (stmt)
368 || stmt_could_throw_p (cfun, stmt))
369 return MOVE_IMPOSSIBLE;
370
371 if (is_gimple_call (gs: stmt))
372 {
373 /* While pure or const call is guaranteed to have no side effects, we
374 cannot move it arbitrarily. Consider code like
375
376 char *s = something ();
377
378 while (1)
379 {
380 if (s)
381 t = strlen (s);
382 else
383 t = 0;
384 }
385
386 Here the strlen call cannot be moved out of the loop, even though
387 s is invariant. In addition to possibly creating a call with
388 invalid arguments, moving out a function call that is not executed
389 may cause performance regressions in case the call is costly and
390 not executed at all. */
391 ret = MOVE_PRESERVE_EXECUTION;
392 lhs = gimple_call_lhs (gs: stmt);
393 }
394 else if (is_gimple_assign (gs: stmt))
395 lhs = gimple_assign_lhs (gs: stmt);
396 else
397 return MOVE_IMPOSSIBLE;
398
399 if (TREE_CODE (lhs) == SSA_NAME
400 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
401 return MOVE_IMPOSSIBLE;
402
403 if (TREE_CODE (lhs) != SSA_NAME
404 || gimple_could_trap_p (stmt))
405 return MOVE_PRESERVE_EXECUTION;
406
407 if (is_gimple_assign (gs: stmt))
408 {
409 auto code = gimple_assign_rhs_code (gs: stmt);
410 tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
411 /* For shifts and rotates and possibly out-of-bound shift operands
412 we currently cannot rewrite them into something unconditionally
413 well-defined. */
414 if ((code == LSHIFT_EXPR
415 || code == RSHIFT_EXPR
416 || code == LROTATE_EXPR
417 || code == RROTATE_EXPR)
418 && (TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
419 /* We cannot use ranges at 'stmt' here. */
420 || wi::ltu_p (x: wi::to_wide (t: gimple_assign_rhs2 (gs: stmt)),
421 y: element_precision (type))))
422 ret = MOVE_PRESERVE_EXECUTION;
423 }
424
425 /* Non local loads in a transaction cannot be hoisted out. Well,
426 unless the load happens on every path out of the loop, but we
427 don't take this into account yet. */
428 if (flag_tm
429 && gimple_in_transaction (stmt)
430 && gimple_assign_single_p (gs: stmt))
431 {
432 tree rhs = gimple_assign_rhs1 (gs: stmt);
433 if (DECL_P (rhs) && is_global_var (t: rhs))
434 {
435 if (dump_file)
436 {
437 fprintf (stream: dump_file, format: "Cannot hoist conditional load of ");
438 print_generic_expr (dump_file, rhs, TDF_SLIM);
439 fprintf (stream: dump_file, format: " because it is in a transaction.\n");
440 }
441 return MOVE_IMPOSSIBLE;
442 }
443 }
444
445 return ret;
446}
447
448static enum move_pos
449movement_possibility (gimple *stmt)
450{
451 enum move_pos pos = movement_possibility_1 (stmt);
452 if (pos == MOVE_POSSIBLE)
453 {
454 use_operand_p use_p;
455 ssa_op_iter ssa_iter;
456 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, ssa_iter, SSA_OP_USE)
457 if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME
458 && ssa_name_maybe_undef_p (USE_FROM_PTR (use_p)))
459 return MOVE_PRESERVE_EXECUTION;
460 }
461 return pos;
462}
463
464
465/* Compare the profile count inequality of bb and loop's preheader, it is
466 three-state as stated in profile-count.h, FALSE is returned if inequality
467 cannot be decided. */
468bool
469bb_colder_than_loop_preheader (basic_block bb, class loop *loop)
470{
471 gcc_assert (bb && loop);
472 return bb->count < loop_preheader_edge (loop)->src->count;
473}
474
475/* Check coldest loop between OUTERMOST_LOOP and LOOP by comparing profile
476 count.
477 It does three steps check:
478 1) Check whether CURR_BB is cold in it's own loop_father, if it is cold, just
479 return NULL which means it should not be moved out at all;
480 2) CURR_BB is NOT cold, check if pre-computed COLDEST_LOOP is outside of
481 OUTERMOST_LOOP, if it is inside of OUTERMOST_LOOP, return the COLDEST_LOOP;
482 3) If COLDEST_LOOP is outside of OUTERMOST_LOOP, check whether there is a
483 hotter loop between OUTERMOST_LOOP and loop in pre-computed
484 HOTTER_THAN_INNER_LOOP, return it's nested inner loop, otherwise return
485 OUTERMOST_LOOP.
486 At last, the coldest_loop is inside of OUTERMOST_LOOP, just return it as
487 the hoist target. */
488
489static class loop *
490get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
491 basic_block curr_bb)
492{
493 gcc_assert (outermost_loop == loop
494 || flow_loop_nested_p (outermost_loop, loop));
495
496 /* If bb_colder_than_loop_preheader returns false due to three-state
497 comparision, OUTERMOST_LOOP is returned finally to preserve the behavior.
498 Otherwise, return the coldest loop between OUTERMOST_LOOP and LOOP. */
499 if (curr_bb && bb_colder_than_loop_preheader (bb: curr_bb, loop))
500 return NULL;
501
502 class loop *coldest_loop = coldest_outermost_loop[loop->num];
503 if (loop_depth (loop: coldest_loop) < loop_depth (loop: outermost_loop))
504 {
505 class loop *hotter_loop = hotter_than_inner_loop[loop->num];
506 if (!hotter_loop
507 || loop_depth (loop: hotter_loop) < loop_depth (loop: outermost_loop))
508 return outermost_loop;
509
510 /* hotter_loop is between OUTERMOST_LOOP and LOOP like:
511 [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
512 hotter_loop, second_coldest_loop, ..., loop]
513 return second_coldest_loop to be the hoist target. */
514 class loop *aloop;
515 for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
516 if (aloop == loop || flow_loop_nested_p (aloop, loop))
517 return aloop;
518 }
519 return coldest_loop;
520}
521
522/* Suppose that operand DEF is used inside the LOOP. Returns the outermost
523 loop to that we could move the expression using DEF if it did not have
524 other operands, i.e. the outermost loop enclosing LOOP in that the value
525 of DEF is invariant. */
526
527static class loop *
528outermost_invariant_loop (tree def, class loop *loop)
529{
530 gimple *def_stmt;
531 basic_block def_bb;
532 class loop *max_loop;
533 struct lim_aux_data *lim_data;
534
535 if (!def)
536 return superloop_at_depth (loop, 1);
537
538 if (TREE_CODE (def) != SSA_NAME)
539 {
540 gcc_assert (is_gimple_min_invariant (def));
541 return superloop_at_depth (loop, 1);
542 }
543
544 def_stmt = SSA_NAME_DEF_STMT (def);
545 def_bb = gimple_bb (g: def_stmt);
546 if (!def_bb)
547 return superloop_at_depth (loop, 1);
548
549 max_loop = find_common_loop (loop, def_bb->loop_father);
550
551 lim_data = get_lim_data (stmt: def_stmt);
552 if (lim_data != NULL && lim_data->max_loop != NULL)
553 max_loop = find_common_loop (max_loop,
554 loop_outer (loop: lim_data->max_loop));
555 if (max_loop == loop)
556 return NULL;
557 max_loop = superloop_at_depth (loop, loop_depth (loop: max_loop) + 1);
558
559 return max_loop;
560}
561
562/* DATA is a structure containing information associated with a statement
563 inside LOOP. DEF is one of the operands of this statement.
564
565 Find the outermost loop enclosing LOOP in that value of DEF is invariant
566 and record this in DATA->max_loop field. If DEF itself is defined inside
567 this loop as well (i.e. we need to hoist it out of the loop if we want
568 to hoist the statement represented by DATA), record the statement in that
569 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
570 add the cost of the computation of DEF to the DATA->cost.
571
572 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
573
574static bool
575add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
576 bool add_cost)
577{
578 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
579 basic_block def_bb = gimple_bb (g: def_stmt);
580 class loop *max_loop;
581 struct lim_aux_data *def_data;
582
583 if (!def_bb)
584 return true;
585
586 max_loop = outermost_invariant_loop (def, loop);
587 if (!max_loop)
588 return false;
589
590 if (flow_loop_nested_p (data->max_loop, max_loop))
591 data->max_loop = max_loop;
592
593 def_data = get_lim_data (stmt: def_stmt);
594 if (!def_data)
595 return true;
596
597 if (add_cost
598 /* Only add the cost if the statement defining DEF is inside LOOP,
599 i.e. if it is likely that by moving the invariants dependent
600 on it, we will be able to avoid creating a new register for
601 it (since it will be only used in these dependent invariants). */
602 && def_bb->loop_father == loop)
603 data->cost += def_data->cost;
604
605 data->depends.safe_push (obj: def_stmt);
606
607 return true;
608}
609
610/* Returns an estimate for a cost of statement STMT. The values here
611 are just ad-hoc constants, similar to costs for inlining. */
612
613static unsigned
614stmt_cost (gimple *stmt)
615{
616 /* Always try to create possibilities for unswitching. */
617 if (gimple_code (g: stmt) == GIMPLE_COND
618 || gimple_code (g: stmt) == GIMPLE_PHI)
619 return LIM_EXPENSIVE;
620
621 /* We should be hoisting calls if possible. */
622 if (is_gimple_call (gs: stmt))
623 {
624 tree fndecl;
625
626 /* Unless the call is a builtin_constant_p; this always folds to a
627 constant, so moving it is useless. */
628 fndecl = gimple_call_fndecl (gs: stmt);
629 if (fndecl && fndecl_built_in_p (node: fndecl, name1: BUILT_IN_CONSTANT_P))
630 return 0;
631
632 return LIM_EXPENSIVE;
633 }
634
635 /* Hoisting memory references out should almost surely be a win. */
636 if (gimple_references_memory_p (stmt))
637 return LIM_EXPENSIVE;
638
639 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
640 return 1;
641
642 enum tree_code code = gimple_assign_rhs_code (gs: stmt);
643 switch (code)
644 {
645 case MULT_EXPR:
646 case WIDEN_MULT_EXPR:
647 case WIDEN_MULT_PLUS_EXPR:
648 case WIDEN_MULT_MINUS_EXPR:
649 case DOT_PROD_EXPR:
650 case TRUNC_DIV_EXPR:
651 case CEIL_DIV_EXPR:
652 case FLOOR_DIV_EXPR:
653 case ROUND_DIV_EXPR:
654 case EXACT_DIV_EXPR:
655 case CEIL_MOD_EXPR:
656 case FLOOR_MOD_EXPR:
657 case ROUND_MOD_EXPR:
658 case TRUNC_MOD_EXPR:
659 case RDIV_EXPR:
660 /* Division and multiplication are usually expensive. */
661 return LIM_EXPENSIVE;
662
663 case LSHIFT_EXPR:
664 case RSHIFT_EXPR:
665 case WIDEN_LSHIFT_EXPR:
666 case LROTATE_EXPR:
667 case RROTATE_EXPR:
668 /* Shifts and rotates are usually expensive. */
669 return LIM_EXPENSIVE;
670
671 case COND_EXPR:
672 case VEC_COND_EXPR:
673 /* Conditionals are expensive. */
674 return LIM_EXPENSIVE;
675
676 case CONSTRUCTOR:
677 /* Make vector construction cost proportional to the number
678 of elements. */
679 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
680
681 case SSA_NAME:
682 case PAREN_EXPR:
683 /* Whether or not something is wrapped inside a PAREN_EXPR
684 should not change move cost. Nor should an intermediate
685 unpropagated SSA name copy. */
686 return 0;
687
688 default:
689 /* Comparisons are usually expensive. */
690 if (TREE_CODE_CLASS (code) == tcc_comparison)
691 return LIM_EXPENSIVE;
692 return 1;
693 }
694}
695
696/* Finds the outermost loop between OUTER and LOOP in that the memory reference
697 REF is independent. If REF is not independent in LOOP, NULL is returned
698 instead. */
699
700static class loop *
701outermost_indep_loop (class loop *outer, class loop *loop, im_mem_ref *ref)
702{
703 class loop *aloop;
704
705 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
706 return NULL;
707
708 for (aloop = outer;
709 aloop != loop;
710 aloop = superloop_at_depth (loop, loop_depth (loop: aloop) + 1))
711 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
712 && ref_indep_loop_p (aloop, ref, lim_raw))
713 return aloop;
714
715 if (ref_indep_loop_p (loop, ref, lim_raw))
716 return loop;
717 else
718 return NULL;
719}
720
721/* If there is a simple load or store to a memory reference in STMT, returns
722 the location of the memory reference, and sets IS_STORE according to whether
723 it is a store or load. Otherwise, returns NULL. */
724
725static tree *
726simple_mem_ref_in_stmt (gimple *stmt, bool *is_store)
727{
728 tree *lhs, *rhs;
729
730 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
731 if (!gimple_assign_single_p (gs: stmt))
732 return NULL;
733
734 lhs = gimple_assign_lhs_ptr (gs: stmt);
735 rhs = gimple_assign_rhs1_ptr (gs: stmt);
736
737 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (g: stmt))
738 {
739 *is_store = false;
740 return rhs;
741 }
742 else if (gimple_vdef (g: stmt)
743 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
744 {
745 *is_store = true;
746 return lhs;
747 }
748 else
749 return NULL;
750}
751
752/* From a controlling predicate in DOM determine the arguments from
753 the PHI node PHI that are chosen if the predicate evaluates to
754 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
755 they are non-NULL. Returns true if the arguments can be determined,
756 else return false. */
757
758static bool
759extract_true_false_args_from_phi (basic_block dom, gphi *phi,
760 tree *true_arg_p, tree *false_arg_p)
761{
762 edge te, fe;
763 if (! extract_true_false_controlled_edges (dom, gimple_bb (g: phi),
764 &te, &fe))
765 return false;
766
767 if (true_arg_p)
768 *true_arg_p = PHI_ARG_DEF (phi, te->dest_idx);
769 if (false_arg_p)
770 *false_arg_p = PHI_ARG_DEF (phi, fe->dest_idx);
771
772 return true;
773}
774
775/* Determine the outermost loop to that it is possible to hoist a statement
776 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
777 the outermost loop in that the value computed by STMT is invariant.
778 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
779 we preserve the fact whether STMT is executed. It also fills other related
780 information to LIM_DATA (STMT).
781
782 The function returns false if STMT cannot be hoisted outside of the loop it
783 is defined in, and true otherwise. */
784
785static bool
786determine_max_movement (gimple *stmt, bool must_preserve_exec)
787{
788 basic_block bb = gimple_bb (g: stmt);
789 class loop *loop = bb->loop_father;
790 class loop *level;
791 struct lim_aux_data *lim_data = get_lim_data (stmt);
792 tree val;
793 ssa_op_iter iter;
794
795 if (must_preserve_exec)
796 level = ALWAYS_EXECUTED_IN (bb);
797 else
798 level = superloop_at_depth (loop, 1);
799 lim_data->max_loop = get_coldest_out_loop (outermost_loop: level, loop, curr_bb: bb);
800 if (!lim_data->max_loop)
801 return false;
802
803 if (gphi *phi = dyn_cast <gphi *> (p: stmt))
804 {
805 use_operand_p use_p;
806 unsigned min_cost = UINT_MAX;
807 unsigned total_cost = 0;
808 struct lim_aux_data *def_data;
809
810 /* We will end up promoting dependencies to be unconditionally
811 evaluated. For this reason the PHI cost (and thus the
812 cost we remove from the loop by doing the invariant motion)
813 is that of the cheapest PHI argument dependency chain. */
814 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
815 {
816 val = USE_FROM_PTR (use_p);
817
818 if (TREE_CODE (val) != SSA_NAME)
819 {
820 /* Assign const 1 to constants. */
821 min_cost = MIN (min_cost, 1);
822 total_cost += 1;
823 continue;
824 }
825 if (!add_dependency (def: val, data: lim_data, loop, add_cost: false))
826 return false;
827
828 gimple *def_stmt = SSA_NAME_DEF_STMT (val);
829 if (gimple_bb (g: def_stmt)
830 && gimple_bb (g: def_stmt)->loop_father == loop)
831 {
832 def_data = get_lim_data (stmt: def_stmt);
833 if (def_data)
834 {
835 min_cost = MIN (min_cost, def_data->cost);
836 total_cost += def_data->cost;
837 }
838 }
839 }
840
841 min_cost = MIN (min_cost, total_cost);
842 lim_data->cost += min_cost;
843
844 if (gimple_phi_num_args (gs: phi) > 1)
845 {
846 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
847 gimple *cond;
848 if (gsi_end_p (i: gsi_last_bb (bb: dom)))
849 return false;
850 cond = gsi_stmt (i: gsi_last_bb (bb: dom));
851 if (gimple_code (g: cond) != GIMPLE_COND)
852 return false;
853 /* Verify that this is an extended form of a diamond and
854 the PHI arguments are completely controlled by the
855 predicate in DOM. */
856 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
857 return false;
858
859 /* Check if one of the depedent statement is a vector compare whether
860 the target supports it, otherwise it's invalid to hoist it out of
861 the gcond it belonged to. */
862 if (VECTOR_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond))))
863 {
864 tree type = TREE_TYPE (gimple_cond_lhs (cond));
865 auto code = gimple_cond_code (gs: cond);
866 if (!target_supports_op_p (type, code, optab_vector))
867 return false;
868 }
869
870 /* Fold in dependencies and cost of the condition. */
871 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
872 {
873 if (!add_dependency (def: val, data: lim_data, loop, add_cost: false))
874 return false;
875 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
876 if (def_data)
877 lim_data->cost += def_data->cost;
878 }
879
880 /* We want to avoid unconditionally executing very expensive
881 operations. As costs for our dependencies cannot be
882 negative just claim we are not invariand for this case.
883 We also are not sure whether the control-flow inside the
884 loop will vanish. */
885 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
886 && !(min_cost != 0
887 && total_cost / min_cost <= 2))
888 return false;
889
890 /* Assume that the control-flow in the loop will vanish.
891 ??? We should verify this and not artificially increase
892 the cost if that is not the case. */
893 lim_data->cost += stmt_cost (stmt);
894 }
895
896 return true;
897 }
898
899 /* A stmt that receives abnormal edges cannot be hoisted. */
900 if (is_a <gcall *> (p: stmt)
901 && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE))
902 return false;
903
904 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
905 if (!add_dependency (def: val, data: lim_data, loop, add_cost: true))
906 return false;
907
908 if (gimple_vuse (g: stmt))
909 {
910 im_mem_ref *ref
911 = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
912 if (ref
913 && MEM_ANALYZABLE (ref))
914 {
915 lim_data->max_loop = outermost_indep_loop (outer: lim_data->max_loop,
916 loop, ref);
917 if (!lim_data->max_loop)
918 return false;
919 }
920 else if (! add_dependency (def: gimple_vuse (g: stmt), data: lim_data, loop, add_cost: false))
921 return false;
922 }
923
924 lim_data->cost += stmt_cost (stmt);
925
926 return true;
927}
928
929/* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
930 and that one of the operands of this statement is computed by STMT.
931 Ensure that STMT (together with all the statements that define its
932 operands) is hoisted at least out of the loop LEVEL. */
933
934static void
935set_level (gimple *stmt, class loop *orig_loop, class loop *level)
936{
937 class loop *stmt_loop = gimple_bb (g: stmt)->loop_father;
938 struct lim_aux_data *lim_data;
939 gimple *dep_stmt;
940 unsigned i;
941
942 stmt_loop = find_common_loop (orig_loop, stmt_loop);
943 lim_data = get_lim_data (stmt);
944 if (lim_data != NULL && lim_data->tgt_loop != NULL)
945 stmt_loop = find_common_loop (stmt_loop,
946 loop_outer (loop: lim_data->tgt_loop));
947 if (flow_loop_nested_p (stmt_loop, level))
948 return;
949
950 gcc_assert (level == lim_data->max_loop
951 || flow_loop_nested_p (lim_data->max_loop, level));
952
953 lim_data->tgt_loop = level;
954 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
955 set_level (stmt: dep_stmt, orig_loop, level);
956}
957
958/* Determines an outermost loop from that we want to hoist the statement STMT.
959 For now we chose the outermost possible loop. TODO -- use profiling
960 information to set it more sanely. */
961
962static void
963set_profitable_level (gimple *stmt)
964{
965 set_level (stmt, orig_loop: gimple_bb (g: stmt)->loop_father, level: get_lim_data (stmt)->max_loop);
966}
967
968/* Returns true if STMT is a call that has side effects. */
969
970static bool
971nonpure_call_p (gimple *stmt)
972{
973 if (gimple_code (g: stmt) != GIMPLE_CALL)
974 return false;
975
976 return gimple_has_side_effects (stmt);
977}
978
979/* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
980
981static gimple *
982rewrite_reciprocal (gimple_stmt_iterator *bsi)
983{
984 gassign *stmt, *stmt1, *stmt2;
985 tree name, lhs, type;
986 tree real_one;
987 gimple_stmt_iterator gsi;
988
989 stmt = as_a <gassign *> (p: gsi_stmt (i: *bsi));
990 lhs = gimple_assign_lhs (gs: stmt);
991 type = TREE_TYPE (lhs);
992
993 real_one = build_one_cst (type);
994
995 name = make_temp_ssa_name (type, NULL, name: "reciptmp");
996 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
997 gimple_assign_rhs2 (gs: stmt));
998 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
999 gimple_assign_rhs1 (gs: stmt));
1000
1001 /* Replace division stmt with reciprocal and multiply stmts.
1002 The multiply stmt is not invariant, so update iterator
1003 and avoid rescanning. */
1004 gsi = *bsi;
1005 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1006 gsi_replace (&gsi, stmt2, true);
1007
1008 /* Continue processing with invariant reciprocal statement. */
1009 return stmt1;
1010}
1011
1012/* Check if the pattern at *BSI is a bittest of the form
1013 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
1014
1015static gimple *
1016rewrite_bittest (gimple_stmt_iterator *bsi)
1017{
1018 gassign *stmt;
1019 gimple *stmt1;
1020 gassign *stmt2;
1021 gimple *use_stmt;
1022 gcond *cond_stmt;
1023 tree lhs, name, t, a, b;
1024 use_operand_p use;
1025
1026 stmt = as_a <gassign *> (p: gsi_stmt (i: *bsi));
1027 lhs = gimple_assign_lhs (gs: stmt);
1028
1029 /* Verify that the single use of lhs is a comparison against zero. */
1030 if (TREE_CODE (lhs) != SSA_NAME
1031 || !single_imm_use (var: lhs, use_p: &use, stmt: &use_stmt))
1032 return stmt;
1033 cond_stmt = dyn_cast <gcond *> (p: use_stmt);
1034 if (!cond_stmt)
1035 return stmt;
1036 if (gimple_cond_lhs (gs: cond_stmt) != lhs
1037 || (gimple_cond_code (gs: cond_stmt) != NE_EXPR
1038 && gimple_cond_code (gs: cond_stmt) != EQ_EXPR)
1039 || !integer_zerop (gimple_cond_rhs (gs: cond_stmt)))
1040 return stmt;
1041
1042 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
1043 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1044 if (gimple_code (g: stmt1) != GIMPLE_ASSIGN)
1045 return stmt;
1046
1047 /* There is a conversion in between possibly inserted by fold. */
1048 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
1049 {
1050 t = gimple_assign_rhs1 (gs: stmt1);
1051 if (TREE_CODE (t) != SSA_NAME
1052 || !has_single_use (var: t))
1053 return stmt;
1054 stmt1 = SSA_NAME_DEF_STMT (t);
1055 if (gimple_code (g: stmt1) != GIMPLE_ASSIGN)
1056 return stmt;
1057 }
1058
1059 /* Verify that B is loop invariant but A is not. Verify that with
1060 all the stmt walking we are still in the same loop. */
1061 if (gimple_assign_rhs_code (gs: stmt1) != RSHIFT_EXPR
1062 || loop_containing_stmt (stmt: stmt1) != loop_containing_stmt (stmt))
1063 return stmt;
1064
1065 a = gimple_assign_rhs1 (gs: stmt1);
1066 b = gimple_assign_rhs2 (gs: stmt1);
1067
1068 if (outermost_invariant_loop (def: b, loop: loop_containing_stmt (stmt: stmt1)) != NULL
1069 && outermost_invariant_loop (def: a, loop: loop_containing_stmt (stmt: stmt1)) == NULL)
1070 {
1071 gimple_stmt_iterator rsi;
1072
1073 /* 1 << B */
1074 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1075 build_int_cst (TREE_TYPE (a), 1), b);
1076 name = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "shifttmp");
1077 stmt1 = gimple_build_assign (name, t);
1078
1079 /* A & (1 << B) */
1080 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1081 name = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "shifttmp");
1082 stmt2 = gimple_build_assign (name, t);
1083
1084 /* Replace the SSA_NAME we compare against zero. Adjust
1085 the type of zero accordingly. */
1086 SET_USE (use, name);
1087 gimple_cond_set_rhs (gs: cond_stmt,
1088 rhs: build_int_cst_type (TREE_TYPE (name),
1089 0));
1090
1091 /* Don't use gsi_replace here, none of the new assignments sets
1092 the variable originally set in stmt. Move bsi to stmt1, and
1093 then remove the original stmt, so that we get a chance to
1094 retain debug info for it. */
1095 rsi = *bsi;
1096 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1097 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1098 gimple *to_release = gsi_stmt (i: rsi);
1099 gsi_remove (&rsi, true);
1100 release_defs (to_release);
1101
1102 return stmt1;
1103 }
1104
1105 return stmt;
1106}
1107
1108/* Determine the outermost loops in that statements in basic block BB are
1109 invariant, and record them to the LIM_DATA associated with the
1110 statements. */
1111
1112static void
1113compute_invariantness (basic_block bb)
1114{
1115 enum move_pos pos;
1116 gimple_stmt_iterator bsi;
1117 gimple *stmt;
1118 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1119 class loop *outermost = ALWAYS_EXECUTED_IN (bb);
1120 struct lim_aux_data *lim_data;
1121
1122 if (!loop_outer (loop: bb->loop_father))
1123 return;
1124
1125 if (dump_file && (dump_flags & TDF_DETAILS))
1126 fprintf (stream: dump_file, format: "Basic block %d (loop %d -- depth %d):\n\n",
1127 bb->index, bb->loop_father->num, loop_depth (loop: bb->loop_father));
1128
1129 /* Look at PHI nodes, but only if there is at most two.
1130 ??? We could relax this further by post-processing the inserted
1131 code and transforming adjacent cond-exprs with the same predicate
1132 to control flow again. */
1133 bsi = gsi_start_phis (bb);
1134 if (!gsi_end_p (i: bsi)
1135 && ((gsi_next (i: &bsi), gsi_end_p (i: bsi))
1136 || (gsi_next (i: &bsi), gsi_end_p (i: bsi))))
1137 for (bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1138 {
1139 stmt = gsi_stmt (i: bsi);
1140
1141 pos = movement_possibility (stmt);
1142 if (pos == MOVE_IMPOSSIBLE)
1143 continue;
1144
1145 lim_data = get_lim_data (stmt);
1146 if (! lim_data)
1147 lim_data = init_lim_data (stmt);
1148 lim_data->always_executed_in = outermost;
1149
1150 if (!determine_max_movement (stmt, must_preserve_exec: false))
1151 {
1152 lim_data->max_loop = NULL;
1153 continue;
1154 }
1155
1156 if (dump_file && (dump_flags & TDF_DETAILS))
1157 {
1158 print_gimple_stmt (dump_file, stmt, 2);
1159 fprintf (stream: dump_file, format: " invariant up to level %d, cost %d.\n\n",
1160 loop_depth (loop: lim_data->max_loop),
1161 lim_data->cost);
1162 }
1163
1164 if (lim_data->cost >= LIM_EXPENSIVE)
1165 set_profitable_level (stmt);
1166 }
1167
1168 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1169 {
1170 stmt = gsi_stmt (i: bsi);
1171
1172 pos = movement_possibility (stmt);
1173 if (pos == MOVE_IMPOSSIBLE)
1174 {
1175 if (nonpure_call_p (stmt))
1176 {
1177 maybe_never = true;
1178 outermost = NULL;
1179 }
1180 /* Make sure to note always_executed_in for stores to make
1181 store-motion work. */
1182 else if (stmt_makes_single_store (stmt))
1183 {
1184 struct lim_aux_data *lim_data = get_lim_data (stmt);
1185 if (! lim_data)
1186 lim_data = init_lim_data (stmt);
1187 lim_data->always_executed_in = outermost;
1188 }
1189 continue;
1190 }
1191
1192 if (is_gimple_assign (gs: stmt)
1193 && (get_gimple_rhs_class (code: gimple_assign_rhs_code (gs: stmt))
1194 == GIMPLE_BINARY_RHS))
1195 {
1196 tree op0 = gimple_assign_rhs1 (gs: stmt);
1197 tree op1 = gimple_assign_rhs2 (gs: stmt);
1198 class loop *ol1 = outermost_invariant_loop (def: op1,
1199 loop: loop_containing_stmt (stmt));
1200
1201 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1202 to be hoisted out of loop, saving expensive divide. */
1203 if (pos == MOVE_POSSIBLE
1204 && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR
1205 && flag_unsafe_math_optimizations
1206 && !flag_trapping_math
1207 && ol1 != NULL
1208 && outermost_invariant_loop (def: op0, loop: ol1) == NULL)
1209 stmt = rewrite_reciprocal (bsi: &bsi);
1210
1211 /* If the shift count is invariant, convert (A >> B) & 1 to
1212 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1213 saving an expensive shift. */
1214 if (pos == MOVE_POSSIBLE
1215 && gimple_assign_rhs_code (gs: stmt) == BIT_AND_EXPR
1216 && integer_onep (op1)
1217 && TREE_CODE (op0) == SSA_NAME
1218 && has_single_use (var: op0))
1219 stmt = rewrite_bittest (bsi: &bsi);
1220 }
1221
1222 lim_data = get_lim_data (stmt);
1223 if (! lim_data)
1224 lim_data = init_lim_data (stmt);
1225 lim_data->always_executed_in = outermost;
1226
1227 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1228 continue;
1229
1230 if (!determine_max_movement (stmt, must_preserve_exec: pos == MOVE_PRESERVE_EXECUTION))
1231 {
1232 lim_data->max_loop = NULL;
1233 continue;
1234 }
1235
1236 if (dump_file && (dump_flags & TDF_DETAILS))
1237 {
1238 print_gimple_stmt (dump_file, stmt, 2);
1239 fprintf (stream: dump_file, format: " invariant up to level %d, cost %d.\n\n",
1240 loop_depth (loop: lim_data->max_loop),
1241 lim_data->cost);
1242 }
1243
1244 if (lim_data->cost >= LIM_EXPENSIVE)
1245 set_profitable_level (stmt);
1246 /* When we run before PRE and PRE is active hoist all expressions
1247 to the always executed loop since PRE would do so anyway
1248 and we can preserve range info while PRE cannot. */
1249 else if (flag_tree_pre && !in_loop_pipeline
1250 && outermost)
1251 {
1252 class loop *mloop = lim_data->max_loop;
1253 if (loop_depth (loop: outermost) > loop_depth (loop: mloop))
1254 {
1255 mloop = outermost;
1256 if (dump_file && (dump_flags & TDF_DETAILS))
1257 fprintf (stream: dump_file, format: " constraining to loop depth %d\n\n\n",
1258 loop_depth (loop: mloop));
1259 }
1260 set_level (stmt, orig_loop: bb->loop_father, level: mloop);
1261 }
1262 }
1263}
1264
1265/* Hoist the statements in basic block BB out of the loops prescribed by
1266 data stored in LIM_DATA structures associated with each statement. Callback
1267 for walk_dominator_tree. */
1268
1269unsigned int
1270move_computations_worker (basic_block bb)
1271{
1272 class loop *level;
1273 unsigned cost = 0;
1274 struct lim_aux_data *lim_data;
1275 unsigned int todo = 0;
1276
1277 if (!loop_outer (loop: bb->loop_father))
1278 return todo;
1279
1280 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); )
1281 {
1282 gassign *new_stmt;
1283 gphi *stmt = bsi.phi ();
1284
1285 lim_data = get_lim_data (stmt);
1286 if (lim_data == NULL)
1287 {
1288 gsi_next (i: &bsi);
1289 continue;
1290 }
1291
1292 cost = lim_data->cost;
1293 level = lim_data->tgt_loop;
1294 clear_lim_data (stmt);
1295
1296 if (!level)
1297 {
1298 gsi_next (i: &bsi);
1299 continue;
1300 }
1301
1302 if (dump_file && (dump_flags & TDF_DETAILS))
1303 {
1304 fprintf (stream: dump_file, format: "Moving PHI node\n");
1305 print_gimple_stmt (dump_file, stmt, 0);
1306 fprintf (stream: dump_file, format: "(cost %u) out of loop %d.\n\n",
1307 cost, level->num);
1308 }
1309
1310 if (gimple_phi_num_args (gs: stmt) == 1)
1311 {
1312 tree arg = PHI_ARG_DEF (stmt, 0);
1313 new_stmt = gimple_build_assign (gimple_phi_result (gs: stmt),
1314 TREE_CODE (arg), arg);
1315 }
1316 else
1317 {
1318 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1319 gimple *cond = gsi_stmt (i: gsi_last_bb (bb: dom));
1320 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1321 /* Get the PHI arguments corresponding to the true and false
1322 edges of COND. */
1323 extract_true_false_args_from_phi (dom, phi: stmt, true_arg_p: &arg0, false_arg_p: &arg1);
1324 gcc_assert (arg0 && arg1);
1325 /* For `bool_val != 0`, reuse bool_val. */
1326 if (gimple_cond_code (gs: cond) == NE_EXPR
1327 && integer_zerop (gimple_cond_rhs (gs: cond))
1328 && types_compatible_p (TREE_TYPE (gimple_cond_lhs (cond)),
1329 boolean_type_node))
1330 {
1331 t = gimple_cond_lhs (gs: cond);
1332 }
1333 else
1334 {
1335 t = make_ssa_name (boolean_type_node);
1336 new_stmt = gimple_build_assign (t, gimple_cond_code (gs: cond),
1337 gimple_cond_lhs (gs: cond),
1338 gimple_cond_rhs (gs: cond));
1339 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1340 }
1341 new_stmt = gimple_build_assign (gimple_phi_result (gs: stmt),
1342 COND_EXPR, t, arg0, arg1);
1343 todo |= TODO_cleanup_cfg;
1344 }
1345 if (!ALWAYS_EXECUTED_IN (bb)
1346 || (ALWAYS_EXECUTED_IN (bb) != level
1347 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))
1348 reset_flow_sensitive_info (gimple_assign_lhs (gs: new_stmt));
1349 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1350 remove_phi_node (&bsi, false);
1351 }
1352
1353 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); )
1354 {
1355 edge e;
1356
1357 gimple *stmt = gsi_stmt (i: bsi);
1358
1359 lim_data = get_lim_data (stmt);
1360 if (lim_data == NULL)
1361 {
1362 gsi_next (i: &bsi);
1363 continue;
1364 }
1365
1366 cost = lim_data->cost;
1367 level = lim_data->tgt_loop;
1368 clear_lim_data (stmt);
1369
1370 if (!level)
1371 {
1372 gsi_next (i: &bsi);
1373 continue;
1374 }
1375
1376 /* We do not really want to move conditionals out of the loop; we just
1377 placed it here to force its operands to be moved if necessary. */
1378 if (gimple_code (g: stmt) == GIMPLE_COND)
1379 {
1380 gsi_next (i: &bsi);
1381 continue;
1382 }
1383
1384 if (dump_file && (dump_flags & TDF_DETAILS))
1385 {
1386 fprintf (stream: dump_file, format: "Moving statement ");
1387 print_gimple_stmt (dump_file, stmt, 0);
1388 fprintf (stream: dump_file, format: "(cost %u) out of loop %d.\n\n",
1389 cost, level->num);
1390 }
1391
1392 e = loop_preheader_edge (level);
1393 gcc_assert (!gimple_vdef (stmt));
1394 if (gimple_vuse (g: stmt))
1395 {
1396 /* The new VUSE is the one from the virtual PHI in the loop
1397 header or the one already present. */
1398 gphi_iterator gsi2;
1399 for (gsi2 = gsi_start_phis (e->dest);
1400 !gsi_end_p (i: gsi2); gsi_next (i: &gsi2))
1401 {
1402 gphi *phi = gsi2.phi ();
1403 if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
1404 {
1405 SET_USE (gimple_vuse_op (stmt),
1406 PHI_ARG_DEF_FROM_EDGE (phi, e));
1407 break;
1408 }
1409 }
1410 }
1411 gsi_remove (&bsi, false);
1412 if (gimple_has_lhs (stmt)
1413 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1414 && (!ALWAYS_EXECUTED_IN (bb)
1415 || !(ALWAYS_EXECUTED_IN (bb) == level
1416 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1417 reset_flow_sensitive_info (gimple_get_lhs (stmt));
1418 /* In case this is a stmt that is not unconditionally executed
1419 when the target loop header is executed and the stmt may
1420 invoke undefined integer or pointer overflow rewrite it to
1421 unsigned arithmetic. */
1422 if (gimple_needing_rewrite_undefined (stmt)
1423 && (!ALWAYS_EXECUTED_IN (bb)
1424 || !(ALWAYS_EXECUTED_IN (bb) == level
1425 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1426 gsi_insert_seq_on_edge (e, rewrite_to_defined_unconditional (stmt));
1427 else
1428 gsi_insert_on_edge (e, stmt);
1429 }
1430
1431 return todo;
1432}
1433
1434/* Checks whether the statement defining variable *INDEX can be hoisted
1435 out of the loop passed in DATA. Callback for for_each_index. */
1436
1437static bool
1438may_move_till (tree ref, tree *index, void *data)
1439{
1440 class loop *loop = (class loop *) data, *max_loop;
1441
1442 /* If REF is an array reference, check also that the step and the lower
1443 bound is invariant in LOOP. */
1444 if (TREE_CODE (ref) == ARRAY_REF)
1445 {
1446 tree step = TREE_OPERAND (ref, 3);
1447 tree lbound = TREE_OPERAND (ref, 2);
1448
1449 max_loop = outermost_invariant_loop (def: step, loop);
1450 if (!max_loop)
1451 return false;
1452
1453 max_loop = outermost_invariant_loop (def: lbound, loop);
1454 if (!max_loop)
1455 return false;
1456 }
1457
1458 max_loop = outermost_invariant_loop (def: *index, loop);
1459 if (!max_loop)
1460 return false;
1461
1462 return true;
1463}
1464
1465/* If OP is SSA NAME, force the statement that defines it to be
1466 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1467
1468static void
1469force_move_till_op (tree op, class loop *orig_loop, class loop *loop)
1470{
1471 gimple *stmt;
1472
1473 if (!op
1474 || is_gimple_min_invariant (op))
1475 return;
1476
1477 gcc_assert (TREE_CODE (op) == SSA_NAME);
1478
1479 stmt = SSA_NAME_DEF_STMT (op);
1480 if (gimple_nop_p (g: stmt))
1481 return;
1482
1483 set_level (stmt, orig_loop, level: loop);
1484}
1485
1486/* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1487 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1488 for_each_index. */
1489
1490struct fmt_data
1491{
1492 class loop *loop;
1493 class loop *orig_loop;
1494};
1495
1496static bool
1497force_move_till (tree ref, tree *index, void *data)
1498{
1499 struct fmt_data *fmt_data = (struct fmt_data *) data;
1500
1501 if (TREE_CODE (ref) == ARRAY_REF)
1502 {
1503 tree step = TREE_OPERAND (ref, 3);
1504 tree lbound = TREE_OPERAND (ref, 2);
1505
1506 force_move_till_op (op: step, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop);
1507 force_move_till_op (op: lbound, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop);
1508 }
1509
1510 force_move_till_op (op: *index, orig_loop: fmt_data->orig_loop, loop: fmt_data->loop);
1511
1512 return true;
1513}
1514
1515/* A function to free the mem_ref object OBJ. */
1516
1517static void
1518memref_free (class im_mem_ref *mem)
1519{
1520 mem->accesses_in_loop.release ();
1521}
1522
1523/* Allocates and returns a memory reference description for MEM whose hash
1524 value is HASH and id is ID. */
1525
1526static im_mem_ref *
1527mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
1528{
1529 im_mem_ref *ref = XOBNEW (&mem_ref_obstack, class im_mem_ref);
1530 if (mem)
1531 ref->mem = *mem;
1532 else
1533 ao_ref_init (&ref->mem, error_mark_node);
1534 ref->id = id;
1535 ref->ref_canonical = false;
1536 ref->ref_decomposed = false;
1537 ref->hash = hash;
1538 ref->stored = NULL;
1539 ref->loaded = NULL;
1540 bitmap_initialize (head: &ref->dep_loop, obstack: &lim_bitmap_obstack);
1541 ref->accesses_in_loop.create (nelems: 1);
1542
1543 return ref;
1544}
1545
1546/* Records memory reference location *LOC in LOOP to the memory reference
1547 description REF. The reference occurs in statement STMT. */
1548
1549static void
1550record_mem_ref_loc (im_mem_ref *ref, gimple *stmt, tree *loc)
1551{
1552 mem_ref_loc aref;
1553 aref.stmt = stmt;
1554 aref.ref = loc;
1555 ref->accesses_in_loop.safe_push (obj: aref);
1556}
1557
1558/* Set the LOOP bit in REF stored bitmap and allocate that if
1559 necessary. Return whether a bit was changed. */
1560
1561static bool
1562set_ref_stored_in_loop (im_mem_ref *ref, class loop *loop)
1563{
1564 if (!ref->stored)
1565 ref->stored = BITMAP_ALLOC (obstack: &lim_bitmap_obstack);
1566 return bitmap_set_bit (ref->stored, loop->num);
1567}
1568
1569/* Marks reference REF as stored in LOOP. */
1570
1571static void
1572mark_ref_stored (im_mem_ref *ref, class loop *loop)
1573{
1574 while (loop != current_loops->tree_root
1575 && set_ref_stored_in_loop (ref, loop))
1576 loop = loop_outer (loop);
1577}
1578
1579/* Set the LOOP bit in REF loaded bitmap and allocate that if
1580 necessary. Return whether a bit was changed. */
1581
1582static bool
1583set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
1584{
1585 if (!ref->loaded)
1586 ref->loaded = BITMAP_ALLOC (obstack: &lim_bitmap_obstack);
1587 return bitmap_set_bit (ref->loaded, loop->num);
1588}
1589
1590/* Marks reference REF as loaded in LOOP. */
1591
1592static void
1593mark_ref_loaded (im_mem_ref *ref, class loop *loop)
1594{
1595 while (loop != current_loops->tree_root
1596 && set_ref_loaded_in_loop (ref, loop))
1597 loop = loop_outer (loop);
1598}
1599
1600/* Gathers memory references in statement STMT in LOOP, storing the
1601 information about them in the memory_accesses structure. Marks
1602 the vops accessed through unrecognized statements there as
1603 well. */
1604
1605static void
1606gather_mem_refs_stmt (class loop *loop, gimple *stmt)
1607{
1608 tree *mem = NULL;
1609 hashval_t hash;
1610 im_mem_ref **slot;
1611 im_mem_ref *ref;
1612 bool is_stored;
1613 unsigned id;
1614
1615 if (!gimple_vuse (g: stmt))
1616 return;
1617
1618 mem = simple_mem_ref_in_stmt (stmt, is_store: &is_stored);
1619 if (!mem && is_gimple_assign (gs: stmt))
1620 {
1621 /* For aggregate copies record distinct references but use them
1622 only for disambiguation purposes. */
1623 id = memory_accesses.refs_list.length ();
1624 ref = mem_ref_alloc (NULL, hash: 0, id);
1625 memory_accesses.refs_list.safe_push (obj: ref);
1626 if (dump_file && (dump_flags & TDF_DETAILS))
1627 {
1628 fprintf (stream: dump_file, format: "Unhandled memory reference %u: ", id);
1629 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1630 }
1631 record_mem_ref_loc (ref, stmt, loc: mem);
1632 is_stored = gimple_vdef (g: stmt);
1633 }
1634 else if (!mem)
1635 {
1636 /* We use the shared mem_ref for all unanalyzable refs. */
1637 id = UNANALYZABLE_MEM_ID;
1638 ref = memory_accesses.refs_list[id];
1639 if (dump_file && (dump_flags & TDF_DETAILS))
1640 {
1641 fprintf (stream: dump_file, format: "Unanalyzed memory reference %u: ", id);
1642 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1643 }
1644 is_stored = gimple_vdef (g: stmt);
1645 }
1646 else
1647 {
1648 /* We are looking for equal refs that might differ in structure
1649 such as a.b vs. MEM[&a + 4]. So we key off the ao_ref but
1650 make sure we can canonicalize the ref in the hashtable if
1651 non-operand_equal_p refs are found. For the lookup we mark
1652 the case we want strict equality with aor.max_size == -1. */
1653 ao_ref aor;
1654 ao_ref_init (&aor, *mem);
1655 ao_ref_base (&aor);
1656 ao_ref_alias_set (&aor);
1657 HOST_WIDE_INT offset, size, max_size;
1658 poly_int64 saved_maxsize = aor.max_size, mem_off;
1659 tree mem_base;
1660 bool ref_decomposed;
1661 if (aor.max_size_known_p ()
1662 && aor.offset.is_constant (const_value: &offset)
1663 && aor.size.is_constant (const_value: &size)
1664 && aor.max_size.is_constant (const_value: &max_size)
1665 && size == max_size
1666 && (size % BITS_PER_UNIT) == 0
1667 /* We're canonicalizing to a MEM where TYPE_SIZE specifies the
1668 size. Make sure this is consistent with the extraction. */
1669 && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (*mem)))
1670 && known_eq (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (*mem))),
1671 aor.size)
1672 && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
1673 {
1674 ref_decomposed = true;
1675 tree base = ao_ref_base (&aor);
1676 poly_int64 moffset;
1677 HOST_WIDE_INT mcoffset;
1678 if (TREE_CODE (base) == MEM_REF
1679 && (mem_ref_offset (base) * BITS_PER_UNIT + offset).to_shwi (r: &moffset)
1680 && moffset.is_constant (const_value: &mcoffset))
1681 {
1682 hash = iterative_hash_expr (TREE_OPERAND (base, 0), seed: 0);
1683 hash = iterative_hash_host_wide_int (val: mcoffset, val2: hash);
1684 }
1685 else
1686 {
1687 hash = iterative_hash_expr (tree: base, seed: 0);
1688 hash = iterative_hash_host_wide_int (val: offset, val2: hash);
1689 }
1690 hash = iterative_hash_host_wide_int (val: size, val2: hash);
1691 }
1692 else
1693 {
1694 ref_decomposed = false;
1695 hash = iterative_hash_expr (tree: aor.ref, seed: 0);
1696 aor.max_size = -1;
1697 }
1698 slot = memory_accesses.refs->find_slot_with_hash (comparable: &aor, hash, insert: INSERT);
1699 aor.max_size = saved_maxsize;
1700 if (*slot)
1701 {
1702 if (!(*slot)->ref_canonical
1703 && !operand_equal_p (*mem, (*slot)->mem.ref, flags: 0))
1704 {
1705 /* If we didn't yet canonicalize the hashtable ref (which
1706 we'll end up using for code insertion) and hit a second
1707 equal ref that is not structurally equivalent create
1708 a canonical ref which is a bare MEM_REF. */
1709 if (TREE_CODE (*mem) == MEM_REF
1710 || TREE_CODE (*mem) == TARGET_MEM_REF)
1711 {
1712 (*slot)->mem.ref = *mem;
1713 (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
1714 }
1715 else
1716 {
1717 tree ref_alias_type = reference_alias_ptr_type (*mem);
1718 unsigned int ref_align = get_object_alignment (*mem);
1719 tree ref_type = TREE_TYPE (*mem);
1720 tree tmp = build1 (ADDR_EXPR, ptr_type_node,
1721 unshare_expr (mem_base));
1722 if (TYPE_ALIGN (ref_type) != ref_align)
1723 ref_type = build_aligned_type (ref_type, ref_align);
1724 tree new_ref
1725 = fold_build2 (MEM_REF, ref_type, tmp,
1726 build_int_cst (ref_alias_type, mem_off));
1727 if ((*slot)->mem.volatile_p)
1728 TREE_THIS_VOLATILE (new_ref) = 1;
1729 (*slot)->mem.ref = new_ref;
1730 /* Make sure the recorded base and offset are consistent
1731 with the newly built ref. */
1732 if (TREE_CODE (TREE_OPERAND (new_ref, 0)) == ADDR_EXPR)
1733 ;
1734 else
1735 {
1736 (*slot)->mem.base = new_ref;
1737 (*slot)->mem.offset = 0;
1738 }
1739 gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
1740 && is_gimple_mem_ref_addr
1741 (TREE_OPERAND ((*slot)->mem.ref,
1742 0)));
1743 (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
1744 }
1745 (*slot)->ref_canonical = true;
1746 }
1747 ref = *slot;
1748 id = ref->id;
1749 }
1750 else
1751 {
1752 id = memory_accesses.refs_list.length ();
1753 ref = mem_ref_alloc (mem: &aor, hash, id);
1754 ref->ref_decomposed = ref_decomposed;
1755 memory_accesses.refs_list.safe_push (obj: ref);
1756 *slot = ref;
1757
1758 if (dump_file && (dump_flags & TDF_DETAILS))
1759 {
1760 fprintf (stream: dump_file, format: "Memory reference %u: ", id);
1761 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1762 fprintf (stream: dump_file, format: "\n");
1763 }
1764 }
1765
1766 record_mem_ref_loc (ref, stmt, loc: mem);
1767 }
1768 if (is_stored)
1769 {
1770 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1771 mark_ref_stored (ref, loop);
1772 }
1773 /* A not simple memory op is also a read when it is a write. */
1774 if (!is_stored || id == UNANALYZABLE_MEM_ID
1775 || ref->mem.ref == error_mark_node)
1776 {
1777 bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
1778 mark_ref_loaded (ref, loop);
1779 }
1780 init_lim_data (stmt)->ref = ref->id;
1781 return;
1782}
1783
1784static unsigned *bb_loop_postorder;
1785
1786/* qsort sort function to sort blocks after their loop fathers postorder. */
1787
1788static int
1789sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_,
1790 void *bb_loop_postorder_)
1791{
1792 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1793 basic_block bb1 = *(const basic_block *)bb1_;
1794 basic_block bb2 = *(const basic_block *)bb2_;
1795 class loop *loop1 = bb1->loop_father;
1796 class loop *loop2 = bb2->loop_father;
1797 if (loop1->num == loop2->num)
1798 return bb1->index - bb2->index;
1799 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1800}
1801
1802/* qsort sort function to sort ref locs after their loop fathers postorder. */
1803
1804static int
1805sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_,
1806 void *bb_loop_postorder_)
1807{
1808 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1809 const mem_ref_loc *loc1 = (const mem_ref_loc *)loc1_;
1810 const mem_ref_loc *loc2 = (const mem_ref_loc *)loc2_;
1811 class loop *loop1 = gimple_bb (g: loc1->stmt)->loop_father;
1812 class loop *loop2 = gimple_bb (g: loc2->stmt)->loop_father;
1813 if (loop1->num == loop2->num)
1814 return 0;
1815 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1816}
1817
1818/* Gathers memory references in loops. */
1819
1820static void
1821analyze_memory_references (bool store_motion)
1822{
1823 gimple_stmt_iterator bsi;
1824 basic_block bb, *bbs;
1825 class loop *outer;
1826 unsigned i, n;
1827
1828 /* Collect all basic-blocks in loops and sort them after their
1829 loops postorder. */
1830 i = 0;
1831 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1832 FOR_EACH_BB_FN (bb, cfun)
1833 if (bb->loop_father != current_loops->tree_root)
1834 bbs[i++] = bb;
1835 n = i;
1836 gcc_sort_r (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp,
1837 bb_loop_postorder);
1838
1839 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1840 That results in better locality for all the bitmaps. It also
1841 automatically sorts the location list of gathered memory references
1842 after their loop postorder number allowing to binary-search it. */
1843 for (i = 0; i < n; ++i)
1844 {
1845 basic_block bb = bbs[i];
1846 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1847 gather_mem_refs_stmt (loop: bb->loop_father, stmt: gsi_stmt (i: bsi));
1848 }
1849
1850 /* Verify the list of gathered memory references is sorted after their
1851 loop postorder number. */
1852 if (flag_checking)
1853 {
1854 im_mem_ref *ref;
1855 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1856 for (unsigned j = 1; j < ref->accesses_in_loop.length (); ++j)
1857 gcc_assert (sort_locs_in_loop_postorder_cmp
1858 (&ref->accesses_in_loop[j-1], &ref->accesses_in_loop[j],
1859 bb_loop_postorder) <= 0);
1860 }
1861
1862 free (ptr: bbs);
1863
1864 if (!store_motion)
1865 return;
1866
1867 /* Propagate the information about accessed memory references up
1868 the loop hierarchy. */
1869 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1870 {
1871 /* Finalize the overall touched references (including subloops). */
1872 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1873 &memory_accesses.refs_stored_in_loop[loop->num]);
1874
1875 /* Propagate the information about accessed memory references up
1876 the loop hierarchy. */
1877 outer = loop_outer (loop);
1878 if (outer == current_loops->tree_root)
1879 continue;
1880
1881 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1882 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1883 }
1884}
1885
1886/* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1887 tree_to_aff_combination_expand. */
1888
1889static bool
1890mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
1891 hash_map<tree, name_expansion *> **ttae_cache,
1892 bool tbaa_p)
1893{
1894 gcc_checking_assert (mem1->mem.ref != error_mark_node
1895 && mem2->mem.ref != error_mark_node);
1896
1897 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1898 object and their offset differ in such a way that the locations cannot
1899 overlap, then they cannot alias. */
1900 poly_widest_int size1, size2;
1901 aff_tree off1, off2;
1902
1903 /* Perform basic offset and type-based disambiguation. */
1904 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, tbaa_p))
1905 return false;
1906
1907 /* The expansion of addresses may be a bit expensive, thus we only do
1908 the check at -O2 and higher optimization levels. */
1909 if (optimize < 2)
1910 return true;
1911
1912 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1913 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1914 aff_combination_expand (&off1, ttae_cache);
1915 aff_combination_expand (&off2, ttae_cache);
1916 aff_combination_scale (&off1, -1);
1917 aff_combination_add (&off2, &off1);
1918
1919 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1920 return false;
1921
1922 return true;
1923}
1924
1925/* Compare function for bsearch searching for reference locations
1926 in a loop. */
1927
1928static int
1929find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_,
1930 void *bb_loop_postorder_)
1931{
1932 unsigned *bb_loop_postorder = (unsigned *)bb_loop_postorder_;
1933 class loop *loop = (class loop *)const_cast<void *>(loop_);
1934 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1935 class loop *loc_loop = gimple_bb (g: loc->stmt)->loop_father;
1936 if (loop->num == loc_loop->num
1937 || flow_loop_nested_p (loop, loc_loop))
1938 return 0;
1939 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1940 ? -1 : 1);
1941}
1942
1943/* Iterates over all locations of REF in LOOP and its subloops calling
1944 fn.operator() with the location as argument. When that operator
1945 returns true the iteration is stopped and true is returned.
1946 Otherwise false is returned. */
1947
1948template <typename FN>
1949static bool
1950for_all_locs_in_loop (class loop *loop, im_mem_ref *ref, FN fn)
1951{
1952 unsigned i;
1953 mem_ref_loc *loc;
1954
1955 /* Search for the cluster of locs in the accesses_in_loop vector
1956 which is sorted after postorder index of the loop father. */
1957 loc = ref->accesses_in_loop.bsearch (key: loop, cmp: find_ref_loc_in_loop_cmp,
1958 data: bb_loop_postorder);
1959 if (!loc)
1960 return false;
1961
1962 /* We have found one location inside loop or its sub-loops. Iterate
1963 both forward and backward to cover the whole cluster. */
1964 i = loc - ref->accesses_in_loop.address ();
1965 while (i > 0)
1966 {
1967 --i;
1968 mem_ref_loc *l = &ref->accesses_in_loop[i];
1969 if (!flow_bb_inside_loop_p (loop, gimple_bb (g: l->stmt)))
1970 break;
1971 if (fn (l))
1972 return true;
1973 }
1974 for (i = loc - ref->accesses_in_loop.address ();
1975 i < ref->accesses_in_loop.length (); ++i)
1976 {
1977 mem_ref_loc *l = &ref->accesses_in_loop[i];
1978 if (!flow_bb_inside_loop_p (loop, gimple_bb (g: l->stmt)))
1979 break;
1980 if (fn (l))
1981 return true;
1982 }
1983
1984 return false;
1985}
1986
1987/* Rewrites location LOC by TMP_VAR. */
1988
1989class rewrite_mem_ref_loc
1990{
1991public:
1992 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1993 bool operator () (mem_ref_loc *loc);
1994 tree tmp_var;
1995};
1996
1997bool
1998rewrite_mem_ref_loc::operator () (mem_ref_loc *loc)
1999{
2000 *loc->ref = tmp_var;
2001 update_stmt (s: loc->stmt);
2002 return false;
2003}
2004
2005/* Rewrites all references to REF in LOOP by variable TMP_VAR. */
2006
2007static void
2008rewrite_mem_refs (class loop *loop, im_mem_ref *ref, tree tmp_var)
2009{
2010 for_all_locs_in_loop (loop, ref, fn: rewrite_mem_ref_loc (tmp_var));
2011}
2012
2013/* Stores the first reference location in LOCP. */
2014
2015class first_mem_ref_loc_1
2016{
2017public:
2018 first_mem_ref_loc_1 (mem_ref_loc **locp_) : locp (locp_) {}
2019 bool operator () (mem_ref_loc *loc);
2020 mem_ref_loc **locp;
2021};
2022
2023bool
2024first_mem_ref_loc_1::operator () (mem_ref_loc *loc)
2025{
2026 *locp = loc;
2027 return true;
2028}
2029
2030/* Returns the first reference location to REF in LOOP. */
2031
2032static mem_ref_loc *
2033first_mem_ref_loc (class loop *loop, im_mem_ref *ref)
2034{
2035 mem_ref_loc *locp = NULL;
2036 for_all_locs_in_loop (loop, ref, fn: first_mem_ref_loc_1 (&locp));
2037 return locp;
2038}
2039
2040/* Helper function for execute_sm. Emit code to store TMP_VAR into
2041 MEM along edge EX.
2042
2043 The store is only done if MEM has changed. We do this so no
2044 changes to MEM occur on code paths that did not originally store
2045 into it.
2046
2047 The common case for execute_sm will transform:
2048
2049 for (...) {
2050 if (foo)
2051 stuff;
2052 else
2053 MEM = TMP_VAR;
2054 }
2055
2056 into:
2057
2058 lsm = MEM;
2059 for (...) {
2060 if (foo)
2061 stuff;
2062 else
2063 lsm = TMP_VAR;
2064 }
2065 MEM = lsm;
2066
2067 This function will generate:
2068
2069 lsm = MEM;
2070
2071 lsm_flag = false;
2072 ...
2073 for (...) {
2074 if (foo)
2075 stuff;
2076 else {
2077 lsm = TMP_VAR;
2078 lsm_flag = true;
2079 }
2080 }
2081 if (lsm_flag) <--
2082 MEM = lsm; <-- (X)
2083
2084 In case MEM and TMP_VAR are NULL the function will return the then
2085 block so the caller can insert (X) and other related stmts.
2086*/
2087
2088static basic_block
2089execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
2090 edge preheader, hash_set <basic_block> *flag_bbs,
2091 edge &append_cond_position, edge &last_cond_fallthru)
2092{
2093 basic_block new_bb, then_bb, old_dest;
2094 bool loop_has_only_one_exit;
2095 edge then_old_edge;
2096 gimple_stmt_iterator gsi;
2097 gimple *stmt;
2098 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
2099
2100 profile_count count_sum = profile_count::zero ();
2101 int nbbs = 0, ncount = 0;
2102 profile_probability flag_probability = profile_probability::uninitialized ();
2103
2104 /* Flag is set in FLAG_BBS. Determine probability that flag will be true
2105 at loop exit.
2106
2107 This code may look fancy, but it cannot update profile very realistically
2108 because we do not know the probability that flag will be true at given
2109 loop exit.
2110
2111 We look for two interesting extremes
2112 - when exit is dominated by block setting the flag, we know it will
2113 always be true. This is a common case.
2114 - when all blocks setting the flag have very low frequency we know
2115 it will likely be false.
2116 In all other cases we default to 2/3 for flag being true. */
2117
2118 for (hash_set<basic_block>::iterator it = flag_bbs->begin ();
2119 it != flag_bbs->end (); ++it)
2120 {
2121 if ((*it)->count.initialized_p ())
2122 count_sum += (*it)->count, ncount ++;
2123 if (dominated_by_p (CDI_DOMINATORS, ex->src, *it))
2124 flag_probability = profile_probability::always ();
2125 nbbs++;
2126 }
2127
2128 profile_probability cap
2129 = profile_probability::guessed_always ().apply_scale (num: 2, den: 3);
2130
2131 if (flag_probability.initialized_p ())
2132 ;
2133 else if (ncount == nbbs
2134 && preheader->count () >= count_sum && preheader->count ().nonzero_p ())
2135 {
2136 flag_probability = count_sum.probability_in (overall: preheader->count ());
2137 if (flag_probability > cap)
2138 flag_probability = cap;
2139 }
2140
2141 if (!flag_probability.initialized_p ())
2142 flag_probability = cap;
2143
2144 /* ?? Insert store after previous store if applicable. See note
2145 below. */
2146 if (append_cond_position)
2147 ex = append_cond_position;
2148
2149 loop_has_only_one_exit = single_pred_p (bb: ex->dest);
2150
2151 if (loop_has_only_one_exit)
2152 ex = split_block_after_labels (ex->dest);
2153 else
2154 {
2155 for (gphi_iterator gpi = gsi_start_phis (ex->dest);
2156 !gsi_end_p (i: gpi); gsi_next (i: &gpi))
2157 {
2158 gphi *phi = gpi.phi ();
2159 if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
2160 continue;
2161
2162 /* When the destination has a non-virtual PHI node with multiple
2163 predecessors make sure we preserve the PHI structure by
2164 forcing a forwarder block so that hoisting of that PHI will
2165 still work. */
2166 split_edge (ex);
2167 break;
2168 }
2169 }
2170
2171 old_dest = ex->dest;
2172 new_bb = split_edge (ex);
2173 if (append_cond_position)
2174 new_bb->count += last_cond_fallthru->count ();
2175 then_bb = create_empty_bb (new_bb);
2176 then_bb->count = new_bb->count.apply_probability (prob: flag_probability);
2177 if (irr)
2178 then_bb->flags = BB_IRREDUCIBLE_LOOP;
2179 add_bb_to_loop (then_bb, new_bb->loop_father);
2180
2181 gsi = gsi_start_bb (bb: new_bb);
2182 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2183 NULL_TREE, NULL_TREE);
2184 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2185
2186 /* Insert actual store. */
2187 if (mem)
2188 {
2189 gsi = gsi_start_bb (bb: then_bb);
2190 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2191 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2192 }
2193
2194 edge e1 = single_succ_edge (bb: new_bb);
2195 edge e2 = make_edge (new_bb, then_bb,
2196 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2197 e2->probability = flag_probability;
2198
2199 e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0);
2200 e1->flags &= ~EDGE_FALLTHRU;
2201
2202 e1->probability = flag_probability.invert ();
2203
2204 then_old_edge = make_single_succ_edge (then_bb, old_dest,
2205 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
2206
2207 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2208
2209 if (append_cond_position)
2210 {
2211 basic_block prevbb = last_cond_fallthru->src;
2212 redirect_edge_succ (last_cond_fallthru, new_bb);
2213 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2214 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2215 recompute_dominator (CDI_DOMINATORS, old_dest));
2216 }
2217
2218 /* ?? Because stores may alias, they must happen in the exact
2219 sequence they originally happened. Save the position right after
2220 the (_lsm) store we just created so we can continue appending after
2221 it and maintain the original order. */
2222 append_cond_position = then_old_edge;
2223 last_cond_fallthru = find_edge (new_bb, old_dest);
2224
2225 if (!loop_has_only_one_exit)
2226 for (gphi_iterator gpi = gsi_start_phis (old_dest);
2227 !gsi_end_p (i: gpi); gsi_next (i: &gpi))
2228 {
2229 gphi *phi = gpi.phi ();
2230 unsigned i;
2231
2232 for (i = 0; i < gimple_phi_num_args (gs: phi); i++)
2233 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2234 {
2235 tree arg = gimple_phi_arg_def (gs: phi, index: i);
2236 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2237 update_stmt (s: phi);
2238 }
2239 }
2240
2241 return then_bb;
2242}
2243
2244/* When REF is set on the location, set flag indicating the store. */
2245
2246class sm_set_flag_if_changed
2247{
2248public:
2249 sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_)
2250 : flag (flag_), bbs (bbs_) {}
2251 bool operator () (mem_ref_loc *loc);
2252 tree flag;
2253 hash_set <basic_block> *bbs;
2254};
2255
2256bool
2257sm_set_flag_if_changed::operator () (mem_ref_loc *loc)
2258{
2259 /* Only set the flag for writes. */
2260 if (is_gimple_assign (gs: loc->stmt)
2261 && gimple_assign_lhs_ptr (gs: loc->stmt) == loc->ref)
2262 {
2263 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
2264 gimple *stmt = gimple_build_assign (flag, boolean_true_node);
2265 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2266 bbs->add (k: gimple_bb (g: stmt));
2267 }
2268 return false;
2269}
2270
2271/* Helper function for execute_sm. On every location where REF is
2272 set, set an appropriate flag indicating the store. */
2273
2274static tree
2275execute_sm_if_changed_flag_set (class loop *loop, im_mem_ref *ref,
2276 hash_set <basic_block> *bbs)
2277{
2278 tree flag;
2279 char *str = get_lsm_tmp_name (ref: ref->mem.ref, n: ~0, suffix: "_flag");
2280 flag = create_tmp_reg (boolean_type_node, str);
2281 for_all_locs_in_loop (loop, ref, fn: sm_set_flag_if_changed (flag, bbs));
2282 return flag;
2283}
2284
2285struct sm_aux
2286{
2287 tree tmp_var;
2288 tree store_flag;
2289 hash_set <basic_block> flag_bbs;
2290};
2291
2292/* Executes store motion of memory reference REF from LOOP.
2293 Exits from the LOOP are stored in EXITS. The initialization of the
2294 temporary variable is put to the preheader of the loop, and assignments
2295 to the reference from the temporary variable are emitted to exits. */
2296
2297static sm_aux *
2298execute_sm (class loop *loop, im_mem_ref *ref,
2299 hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
2300 bool use_other_flag_var)
2301{
2302 gassign *load;
2303 struct fmt_data fmt_data;
2304 struct lim_aux_data *lim_data;
2305 bool multi_threaded_model_p = false;
2306 gimple_stmt_iterator gsi;
2307 sm_aux *aux = new sm_aux;
2308
2309 if (dump_file && (dump_flags & TDF_DETAILS))
2310 {
2311 fprintf (stream: dump_file, format: "Executing store motion of ");
2312 print_generic_expr (dump_file, ref->mem.ref);
2313 fprintf (stream: dump_file, format: " from loop %d\n", loop->num);
2314 }
2315
2316 aux->tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2317 get_lsm_tmp_name (ref: ref->mem.ref, n: ~0));
2318
2319 fmt_data.loop = loop;
2320 fmt_data.orig_loop = loop;
2321 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2322
2323 bool always_stored = ref_always_accessed_p (loop, ref, true);
2324 if (maybe_mt
2325 && (bb_in_transaction (bb: loop_preheader_edge (loop)->src)
2326 || (ref_can_have_store_data_races (ref->mem.ref) && ! always_stored)))
2327 multi_threaded_model_p = true;
2328
2329 if (multi_threaded_model_p && !use_other_flag_var)
2330 aux->store_flag
2331 = execute_sm_if_changed_flag_set (loop, ref, bbs: &aux->flag_bbs);
2332 else
2333 aux->store_flag = NULL_TREE;
2334
2335 /* Remember variable setup. */
2336 aux_map.put (k: ref, v: aux);
2337
2338 rewrite_mem_refs (loop, ref, tmp_var: aux->tmp_var);
2339
2340 /* Emit the load code on a random exit edge or into the latch if
2341 the loop does not exit, so that we are sure it will be processed
2342 by move_computations after all dependencies. */
2343 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2344
2345 /* Avoid doing a load if there was no load of the ref in the loop.
2346 Esp. when the ref is not always stored we cannot optimize it
2347 away later. But when it is not always stored we must use a conditional
2348 store then. */
2349 if ((!always_stored && !multi_threaded_model_p)
2350 || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
2351 load = gimple_build_assign (aux->tmp_var, unshare_expr (ref->mem.ref));
2352 else
2353 {
2354 /* If not emitting a load mark the uninitialized state on the
2355 loop entry as not to be warned for. */
2356 tree uninit = create_tmp_reg (TREE_TYPE (aux->tmp_var));
2357 suppress_warning (uninit, OPT_Wuninitialized);
2358 uninit = get_or_create_ssa_default_def (cfun, uninit);
2359 load = gimple_build_assign (aux->tmp_var, uninit);
2360 }
2361 lim_data = init_lim_data (stmt: load);
2362 lim_data->max_loop = loop;
2363 lim_data->tgt_loop = loop;
2364 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2365
2366 if (aux->store_flag)
2367 {
2368 load = gimple_build_assign (aux->store_flag, boolean_false_node);
2369 lim_data = init_lim_data (stmt: load);
2370 lim_data->max_loop = loop;
2371 lim_data->tgt_loop = loop;
2372 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2373 }
2374
2375 return aux;
2376}
2377
2378/* sm_ord is used for ordinary stores we can retain order with respect
2379 to other stores
2380 sm_unord is used for conditional executed stores which need to be
2381 able to execute in arbitrary order with respect to other stores
2382 sm_other is used for stores we do not try to apply store motion to. */
2383enum sm_kind { sm_ord, sm_unord, sm_other };
2384struct seq_entry
2385{
2386 seq_entry () = default;
2387 seq_entry (unsigned f, sm_kind k, tree fr = NULL)
2388 : first (f), second (k), from (fr) {}
2389 unsigned first;
2390 sm_kind second;
2391 tree from;
2392};
2393
2394static void
2395execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
2396 hash_map<im_mem_ref *, sm_aux *> &aux_map, sm_kind kind,
2397 edge &append_cond_position, edge &last_cond_fallthru,
2398 bitmap clobbers_to_prune)
2399{
2400 /* Sink the stores to exit from the loop. */
2401 for (unsigned i = seq.length (); i > 0; --i)
2402 {
2403 im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
2404 if (seq[i-1].second == sm_other)
2405 {
2406 gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
2407 gassign *store;
2408 if (ref->mem.ref == error_mark_node)
2409 {
2410 tree lhs = gimple_assign_lhs (gs: ref->accesses_in_loop[0].stmt);
2411 if (dump_file && (dump_flags & TDF_DETAILS))
2412 {
2413 fprintf (stream: dump_file, format: "Re-issueing dependent ");
2414 print_generic_expr (dump_file, unshare_expr (seq[i-1].from));
2415 fprintf (stream: dump_file, format: " of ");
2416 print_generic_expr (dump_file, lhs);
2417 fprintf (stream: dump_file, format: " from loop %d on exit %d -> %d\n",
2418 loop->num, ex->src->index, ex->dest->index);
2419 }
2420 store = gimple_build_assign (unshare_expr (lhs),
2421 unshare_expr (seq[i-1].from));
2422 bitmap_set_bit (clobbers_to_prune, seq[i-1].first);
2423 }
2424 else
2425 {
2426 if (dump_file && (dump_flags & TDF_DETAILS))
2427 {
2428 fprintf (stream: dump_file, format: "Re-issueing dependent store of ");
2429 print_generic_expr (dump_file, ref->mem.ref);
2430 fprintf (stream: dump_file, format: " from loop %d on exit %d -> %d\n",
2431 loop->num, ex->src->index, ex->dest->index);
2432 }
2433 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2434 seq[i-1].from);
2435 }
2436 gsi_insert_on_edge (ex, store);
2437 }
2438 else
2439 {
2440 sm_aux *aux = *aux_map.get (k: ref);
2441 if (!aux->store_flag || kind == sm_ord)
2442 {
2443 gassign *store;
2444 store = gimple_build_assign (unshare_expr (ref->mem.ref),
2445 aux->tmp_var);
2446 gsi_insert_on_edge (ex, store);
2447 }
2448 else
2449 execute_sm_if_changed (ex, mem: ref->mem.ref, tmp_var: aux->tmp_var,
2450 flag: aux->store_flag,
2451 preheader: loop_preheader_edge (loop), flag_bbs: &aux->flag_bbs,
2452 append_cond_position, last_cond_fallthru);
2453 }
2454 }
2455}
2456
2457/* Push the SM candidate at index PTR in the sequence SEQ down until
2458 we hit the next SM candidate. Return true if that went OK and
2459 false if we could not disambiguate agains another unrelated ref.
2460 Update *AT to the index where the candidate now resides. */
2461
2462static bool
2463sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
2464{
2465 *at = ptr;
2466 for (; ptr > 0; --ptr)
2467 {
2468 seq_entry &new_cand = seq[ptr];
2469 seq_entry &against = seq[ptr-1];
2470 if (against.second == sm_ord
2471 || (against.second == sm_other && against.from != NULL_TREE))
2472 /* Found the tail of the sequence. */
2473 break;
2474 /* We may not ignore self-dependences here. */
2475 if (new_cand.first == against.first
2476 /* ??? We could actually handle clobbers here, but not easily
2477 with LIMs dependence analysis. */
2478 || (memory_accesses.refs_list[new_cand.first]->mem.ref
2479 == error_mark_node)
2480 || (memory_accesses.refs_list[against.first]->mem.ref
2481 == error_mark_node)
2482 || !refs_independent_p (memory_accesses.refs_list[new_cand.first],
2483 memory_accesses.refs_list[against.first],
2484 false))
2485 /* ??? Prune new_cand from the list of refs to apply SM to. */
2486 return false;
2487 std::swap (a&: new_cand, b&: against);
2488 *at = ptr - 1;
2489 }
2490 return true;
2491}
2492
2493/* Computes the sequence of stores from candidates in REFS_NOT_IN_SEQ to SEQ
2494 walking backwards from VDEF (or the end of BB if VDEF is NULL). */
2495
2496static int
2497sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
2498 vec<seq_entry> &seq, bitmap refs_not_in_seq,
2499 bitmap refs_not_supported, bool forked,
2500 bitmap fully_visited)
2501{
2502 if (!vdef)
2503 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (i: gsi);
2504 gsi_prev (i: &gsi))
2505 {
2506 vdef = gimple_vdef (g: gsi_stmt (i: gsi));
2507 if (vdef)
2508 break;
2509 }
2510 if (!vdef)
2511 {
2512 gphi *vphi = get_virtual_phi (bb);
2513 if (vphi)
2514 vdef = gimple_phi_result (gs: vphi);
2515 }
2516 if (!vdef)
2517 {
2518 if (single_pred_p (bb))
2519 /* This handles the perfect nest case. */
2520 return sm_seq_valid_bb (loop, bb: single_pred (bb), vdef,
2521 seq, refs_not_in_seq, refs_not_supported,
2522 forked, fully_visited);
2523 return 0;
2524 }
2525 do
2526 {
2527 gimple *def = SSA_NAME_DEF_STMT (vdef);
2528 if (gimple_bb (g: def) != bb)
2529 {
2530 /* If we forked by processing a PHI do not allow our walk to
2531 merge again until we handle that robustly. */
2532 if (forked)
2533 {
2534 /* Mark refs_not_in_seq as unsupported. */
2535 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2536 return 1;
2537 }
2538 /* Otherwise it doesn't really matter if we end up in different
2539 BBs. */
2540 bb = gimple_bb (g: def);
2541 }
2542 if (gphi *phi = dyn_cast <gphi *> (p: def))
2543 {
2544 /* Handle CFG merges. Until we handle forks (gimple_bb (def) != bb)
2545 this is still linear.
2546 Eventually we want to cache intermediate results per BB
2547 (but we can't easily cache for different exits?). */
2548 /* Stop at PHIs with possible backedges. */
2549 if (bb == bb->loop_father->header
2550 || bb->flags & BB_IRREDUCIBLE_LOOP)
2551 {
2552 /* Mark refs_not_in_seq as unsupported. */
2553 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2554 return 1;
2555 }
2556 if (gimple_phi_num_args (gs: phi) == 1)
2557 return sm_seq_valid_bb (loop, bb: gimple_phi_arg_edge (phi, i: 0)->src,
2558 vdef: gimple_phi_arg_def (gs: phi, index: 0), seq,
2559 refs_not_in_seq, refs_not_supported,
2560 forked: false, fully_visited);
2561 if (bitmap_bit_p (fully_visited,
2562 SSA_NAME_VERSION (gimple_phi_result (phi))))
2563 return 1;
2564 auto_vec<seq_entry> first_edge_seq;
2565 auto_bitmap tem_refs_not_in_seq (&lim_bitmap_obstack);
2566 int eret;
2567 bitmap_copy (tem_refs_not_in_seq, refs_not_in_seq);
2568 eret = sm_seq_valid_bb (loop, bb: gimple_phi_arg_edge (phi, i: 0)->src,
2569 vdef: gimple_phi_arg_def (gs: phi, index: 0),
2570 seq&: first_edge_seq,
2571 refs_not_in_seq: tem_refs_not_in_seq, refs_not_supported,
2572 forked: true, fully_visited);
2573 if (eret != 1)
2574 return -1;
2575 /* Simplify our lives by pruning the sequence of !sm_ord. */
2576 while (!first_edge_seq.is_empty ()
2577 && first_edge_seq.last ().second != sm_ord)
2578 first_edge_seq.pop ();
2579 for (unsigned int i = 1; i < gimple_phi_num_args (gs: phi); ++i)
2580 {
2581 tree vuse = gimple_phi_arg_def (gs: phi, index: i);
2582 edge e = gimple_phi_arg_edge (phi, i);
2583 auto_vec<seq_entry> edge_seq;
2584 bitmap_and_compl (tem_refs_not_in_seq,
2585 refs_not_in_seq, refs_not_supported);
2586 /* If we've marked all refs we search for as unsupported
2587 we can stop processing and use the sequence as before
2588 the PHI. */
2589 if (bitmap_empty_p (map: tem_refs_not_in_seq))
2590 return 1;
2591 eret = sm_seq_valid_bb (loop, bb: e->src, vdef: vuse, seq&: edge_seq,
2592 refs_not_in_seq: tem_refs_not_in_seq, refs_not_supported,
2593 forked: true, fully_visited);
2594 if (eret != 1)
2595 return -1;
2596 /* Simplify our lives by pruning the sequence of !sm_ord. */
2597 while (!edge_seq.is_empty ()
2598 && edge_seq.last ().second != sm_ord)
2599 edge_seq.pop ();
2600 unsigned min_len = MIN(first_edge_seq.length (),
2601 edge_seq.length ());
2602 /* Incrementally merge seqs into first_edge_seq. */
2603 int first_uneq = -1;
2604 auto_vec<seq_entry, 2> extra_refs;
2605 for (unsigned int i = 0; i < min_len; ++i)
2606 {
2607 /* ??? We can more intelligently merge when we face different
2608 order by additional sinking operations in one sequence.
2609 For now we simply mark them as to be processed by the
2610 not order-preserving SM code. */
2611 if (first_edge_seq[i].first != edge_seq[i].first)
2612 {
2613 if (first_edge_seq[i].second == sm_ord)
2614 bitmap_set_bit (refs_not_supported,
2615 first_edge_seq[i].first);
2616 if (edge_seq[i].second == sm_ord)
2617 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2618 first_edge_seq[i].second = sm_other;
2619 first_edge_seq[i].from = NULL_TREE;
2620 /* Record the dropped refs for later processing. */
2621 if (first_uneq == -1)
2622 first_uneq = i;
2623 extra_refs.safe_push (obj: seq_entry (edge_seq[i].first,
2624 sm_other, NULL_TREE));
2625 }
2626 /* sm_other prevails. */
2627 else if (first_edge_seq[i].second != edge_seq[i].second)
2628 {
2629 /* Make sure the ref is marked as not supported. */
2630 bitmap_set_bit (refs_not_supported,
2631 first_edge_seq[i].first);
2632 first_edge_seq[i].second = sm_other;
2633 first_edge_seq[i].from = NULL_TREE;
2634 }
2635 else if (first_edge_seq[i].second == sm_other
2636 && first_edge_seq[i].from != NULL_TREE
2637 && (edge_seq[i].from == NULL_TREE
2638 || !operand_equal_p (first_edge_seq[i].from,
2639 edge_seq[i].from, flags: 0)))
2640 first_edge_seq[i].from = NULL_TREE;
2641 }
2642 /* Any excess elements become sm_other since they are now
2643 coonditionally executed. */
2644 if (first_edge_seq.length () > edge_seq.length ())
2645 {
2646 for (unsigned i = edge_seq.length ();
2647 i < first_edge_seq.length (); ++i)
2648 {
2649 if (first_edge_seq[i].second == sm_ord)
2650 bitmap_set_bit (refs_not_supported,
2651 first_edge_seq[i].first);
2652 first_edge_seq[i].second = sm_other;
2653 }
2654 }
2655 else if (edge_seq.length () > first_edge_seq.length ())
2656 {
2657 if (first_uneq == -1)
2658 first_uneq = first_edge_seq.length ();
2659 for (unsigned i = first_edge_seq.length ();
2660 i < edge_seq.length (); ++i)
2661 {
2662 if (edge_seq[i].second == sm_ord)
2663 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
2664 extra_refs.safe_push (obj: seq_entry (edge_seq[i].first,
2665 sm_other, NULL_TREE));
2666 }
2667 }
2668 /* Put unmerged refs at first_uneq to force dependence checking
2669 on them. */
2670 if (first_uneq != -1)
2671 {
2672 /* Missing ordered_splice_at. */
2673 if ((unsigned)first_uneq == first_edge_seq.length ())
2674 first_edge_seq.safe_splice (src: extra_refs);
2675 else
2676 {
2677 unsigned fes_length = first_edge_seq.length ();
2678 first_edge_seq.safe_grow (len: fes_length
2679 + extra_refs.length ());
2680 memmove (dest: &first_edge_seq[first_uneq + extra_refs.length ()],
2681 src: &first_edge_seq[first_uneq],
2682 n: (fes_length - first_uneq) * sizeof (seq_entry));
2683 memcpy (dest: &first_edge_seq[first_uneq],
2684 src: extra_refs.address (),
2685 n: extra_refs.length () * sizeof (seq_entry));
2686 }
2687 }
2688 }
2689 /* Use the sequence from the first edge and push SMs down. */
2690 for (unsigned i = 0; i < first_edge_seq.length (); ++i)
2691 {
2692 unsigned id = first_edge_seq[i].first;
2693 seq.safe_push (obj: first_edge_seq[i]);
2694 unsigned new_idx;
2695 if ((first_edge_seq[i].second == sm_ord
2696 || (first_edge_seq[i].second == sm_other
2697 && first_edge_seq[i].from != NULL_TREE))
2698 && !sm_seq_push_down (seq, ptr: seq.length () - 1, at: &new_idx))
2699 {
2700 if (first_edge_seq[i].second == sm_ord)
2701 bitmap_set_bit (refs_not_supported, id);
2702 /* Mark it sm_other. */
2703 seq[new_idx].second = sm_other;
2704 seq[new_idx].from = NULL_TREE;
2705 }
2706 }
2707 bitmap_set_bit (fully_visited,
2708 SSA_NAME_VERSION (gimple_phi_result (phi)));
2709 return 1;
2710 }
2711 lim_aux_data *data = get_lim_data (stmt: def);
2712 im_mem_ref *ref = memory_accesses.refs_list[data->ref];
2713 if (data->ref == UNANALYZABLE_MEM_ID)
2714 return -1;
2715 /* Stop at memory references which we can't move. */
2716 else if ((ref->mem.ref == error_mark_node
2717 /* We can move end-of-storage/object down. */
2718 && !gimple_clobber_p (s: ref->accesses_in_loop[0].stmt,
2719 kind: CLOBBER_STORAGE_END)
2720 && !gimple_clobber_p (s: ref->accesses_in_loop[0].stmt,
2721 kind: CLOBBER_OBJECT_END))
2722 || TREE_THIS_VOLATILE (ref->mem.ref))
2723 {
2724 /* Mark refs_not_in_seq as unsupported. */
2725 bitmap_ior_into (refs_not_supported, refs_not_in_seq);
2726 return 1;
2727 }
2728 /* One of the stores we want to apply SM to and we've not yet seen. */
2729 else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
2730 {
2731 seq.safe_push (obj: seq_entry (data->ref, sm_ord));
2732
2733 /* 1) push it down the queue until a SMed
2734 and not ignored ref is reached, skipping all not SMed refs
2735 and ignored refs via non-TBAA disambiguation. */
2736 unsigned new_idx;
2737 if (!sm_seq_push_down (seq, ptr: seq.length () - 1, at: &new_idx)
2738 /* If that fails but we did not fork yet continue, we'll see
2739 to re-materialize all of the stores in the sequence then.
2740 Further stores will only be pushed up to this one. */
2741 && forked)
2742 {
2743 bitmap_set_bit (refs_not_supported, data->ref);
2744 /* Mark it sm_other. */
2745 seq[new_idx].second = sm_other;
2746 }
2747
2748 /* 2) check whether we've seen all refs we want to SM and if so
2749 declare success for the active exit */
2750 if (bitmap_empty_p (map: refs_not_in_seq))
2751 return 1;
2752 }
2753 else
2754 /* Another store not part of the final sequence. Simply push it. */
2755 seq.safe_push (obj: seq_entry (data->ref, sm_other,
2756 gimple_assign_rhs1 (gs: def)));
2757
2758 vdef = gimple_vuse (g: def);
2759 }
2760 while (1);
2761}
2762
2763/* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2764 edges of the LOOP. */
2765
2766static void
2767hoist_memory_references (class loop *loop, bitmap mem_refs,
2768 const vec<edge> &exits)
2769{
2770 im_mem_ref *ref;
2771 unsigned i;
2772 bitmap_iterator bi;
2773
2774 /* There's a special case we can use ordered re-materialization for
2775 conditionally excuted stores which is when all stores in the loop
2776 happen in the same basic-block. In that case we know we'll reach
2777 all stores and thus can simply process that BB and emit a single
2778 conditional block of ordered materializations. See PR102436. */
2779 basic_block single_store_bb = NULL;
2780 EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num],
2781 0, i, bi)
2782 {
2783 bool fail = false;
2784 ref = memory_accesses.refs_list[i];
2785 for (auto loc : ref->accesses_in_loop)
2786 if (!gimple_vdef (g: loc.stmt))
2787 ;
2788 else if (!single_store_bb)
2789 {
2790 single_store_bb = gimple_bb (g: loc.stmt);
2791 bool conditional = false;
2792 for (edge e : exits)
2793 if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb))
2794 {
2795 /* Conditional as seen from e. */
2796 conditional = true;
2797 break;
2798 }
2799 if (!conditional)
2800 {
2801 fail = true;
2802 break;
2803 }
2804 }
2805 else if (single_store_bb != gimple_bb (g: loc.stmt))
2806 {
2807 fail = true;
2808 break;
2809 }
2810 if (fail)
2811 {
2812 single_store_bb = NULL;
2813 break;
2814 }
2815 }
2816 if (single_store_bb)
2817 {
2818 /* Analyze the single block with stores. */
2819 auto_bitmap fully_visited;
2820 auto_bitmap refs_not_supported;
2821 auto_bitmap refs_not_in_seq;
2822 auto_vec<seq_entry> seq;
2823 bitmap_copy (refs_not_in_seq, mem_refs);
2824 int res = sm_seq_valid_bb (loop, bb: single_store_bb, NULL_TREE,
2825 seq, refs_not_in_seq, refs_not_supported,
2826 forked: false, fully_visited);
2827 if (res != 1)
2828 {
2829 /* Unhandled refs can still fail this. */
2830 bitmap_clear (mem_refs);
2831 return;
2832 }
2833
2834 /* We cannot handle sm_other since we neither remember the
2835 stored location nor the value at the point we execute them. */
2836 for (unsigned i = 0; i < seq.length (); ++i)
2837 {
2838 unsigned new_i;
2839 if (seq[i].second == sm_other
2840 && seq[i].from != NULL_TREE)
2841 seq[i].from = NULL_TREE;
2842 else if ((seq[i].second == sm_ord
2843 || (seq[i].second == sm_other
2844 && seq[i].from != NULL_TREE))
2845 && !sm_seq_push_down (seq, ptr: i, at: &new_i))
2846 {
2847 bitmap_set_bit (refs_not_supported, seq[new_i].first);
2848 seq[new_i].second = sm_other;
2849 seq[new_i].from = NULL_TREE;
2850 }
2851 }
2852 bitmap_and_compl_into (mem_refs, refs_not_supported);
2853 if (bitmap_empty_p (map: mem_refs))
2854 return;
2855
2856 /* Prune seq. */
2857 while (seq.last ().second == sm_other
2858 && seq.last ().from == NULL_TREE)
2859 seq.pop ();
2860
2861 hash_map<im_mem_ref *, sm_aux *> aux_map;
2862
2863 /* Execute SM but delay the store materialization for ordered
2864 sequences on exit. Remember a created flag var and make
2865 sure to re-use it. */
2866 sm_aux *flag_var_aux = nullptr;
2867 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2868 {
2869 ref = memory_accesses.refs_list[i];
2870 sm_aux *aux = execute_sm (loop, ref, aux_map, maybe_mt: true,
2871 use_other_flag_var: flag_var_aux != nullptr);
2872 if (aux->store_flag)
2873 flag_var_aux = aux;
2874 }
2875
2876 /* Materialize ordered store sequences on exits. */
2877 edge e;
2878 auto_bitmap clobbers_to_prune;
2879 FOR_EACH_VEC_ELT (exits, i, e)
2880 {
2881 edge append_cond_position = NULL;
2882 edge last_cond_fallthru = NULL;
2883 edge insert_e = e;
2884 /* Construct the single flag variable control flow and insert
2885 the ordered seq of stores in the then block. With
2886 -fstore-data-races we can do the stores unconditionally. */
2887 if (flag_var_aux)
2888 insert_e
2889 = single_pred_edge
2890 (bb: execute_sm_if_changed (ex: e, NULL_TREE, NULL_TREE,
2891 flag: flag_var_aux->store_flag,
2892 preheader: loop_preheader_edge (loop),
2893 flag_bbs: &flag_var_aux->flag_bbs,
2894 append_cond_position,
2895 last_cond_fallthru));
2896 execute_sm_exit (loop, ex: insert_e, seq, aux_map, kind: sm_ord,
2897 append_cond_position, last_cond_fallthru,
2898 clobbers_to_prune);
2899 gsi_commit_one_edge_insert (insert_e, NULL);
2900 }
2901
2902 /* Remove clobbers inside the loop we re-materialized on exits. */
2903 EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
2904 {
2905 gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
2906 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2907 unlink_stmt_vdef (stmt);
2908 release_defs (stmt);
2909 gimple_set_vdef (g: stmt, NULL_TREE);
2910 gsi_remove (&gsi, true);
2911 }
2912
2913 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
2914 iter != aux_map.end (); ++iter)
2915 delete (*iter).second;
2916
2917 return;
2918 }
2919
2920 /* To address PR57359 before actually applying store-motion check
2921 the candidates found for validity with regards to reordering
2922 relative to other stores which we until here disambiguated using
2923 TBAA which isn't valid.
2924 What matters is the order of the last stores to the mem_refs
2925 with respect to the other stores of the loop at the point of the
2926 loop exits. */
2927
2928 /* For each exit compute the store order, pruning from mem_refs
2929 on the fly. */
2930 /* The complexity of this is at least
2931 O(number of exits * number of SM refs) but more approaching
2932 O(number of exits * number of SM refs * number of stores). */
2933 /* ??? Somehow do this in a single sweep over the loop body. */
2934 auto_vec<std::pair<edge, vec<seq_entry> > > sms;
2935 auto_bitmap refs_not_supported (&lim_bitmap_obstack);
2936 edge e;
2937 FOR_EACH_VEC_ELT (exits, i, e)
2938 {
2939 vec<seq_entry> seq;
2940 seq.create (nelems: 4);
2941 auto_bitmap refs_not_in_seq (&lim_bitmap_obstack);
2942 bitmap_and_compl (refs_not_in_seq, mem_refs, refs_not_supported);
2943 if (bitmap_empty_p (map: refs_not_in_seq))
2944 {
2945 seq.release ();
2946 break;
2947 }
2948 auto_bitmap fully_visited;
2949 int res = sm_seq_valid_bb (loop, bb: e->src, NULL_TREE,
2950 seq, refs_not_in_seq,
2951 refs_not_supported, forked: false,
2952 fully_visited);
2953 if (res != 1)
2954 {
2955 bitmap_copy (refs_not_supported, mem_refs);
2956 seq.release ();
2957 break;
2958 }
2959 sms.safe_push (obj: std::make_pair (x&: e, y&: seq));
2960 }
2961
2962 /* Prune pruned mem_refs from earlier processed exits. */
2963 bool changed = !bitmap_empty_p (map: refs_not_supported);
2964 while (changed)
2965 {
2966 changed = false;
2967 std::pair<edge, vec<seq_entry> > *seq;
2968 FOR_EACH_VEC_ELT (sms, i, seq)
2969 {
2970 bool need_to_push = false;
2971 for (unsigned i = 0; i < seq->second.length (); ++i)
2972 {
2973 sm_kind kind = seq->second[i].second;
2974 if (kind == sm_other && seq->second[i].from == NULL_TREE)
2975 break;
2976 unsigned id = seq->second[i].first;
2977 unsigned new_idx;
2978 if (kind == sm_ord
2979 && bitmap_bit_p (refs_not_supported, id))
2980 {
2981 seq->second[i].second = sm_other;
2982 gcc_assert (seq->second[i].from == NULL_TREE);
2983 need_to_push = true;
2984 }
2985 else if (need_to_push
2986 && !sm_seq_push_down (seq&: seq->second, ptr: i, at: &new_idx))
2987 {
2988 /* We need to push down both sm_ord and sm_other
2989 but for the latter we need to disqualify all
2990 following refs. */
2991 if (kind == sm_ord)
2992 {
2993 if (bitmap_set_bit (refs_not_supported, id))
2994 changed = true;
2995 seq->second[new_idx].second = sm_other;
2996 }
2997 else
2998 {
2999 for (unsigned j = seq->second.length () - 1;
3000 j > new_idx; --j)
3001 if (seq->second[j].second == sm_ord
3002 && bitmap_set_bit (refs_not_supported,
3003 seq->second[j].first))
3004 changed = true;
3005 seq->second.truncate (size: new_idx);
3006 break;
3007 }
3008 }
3009 }
3010 }
3011 }
3012 std::pair<edge, vec<seq_entry> > *seq;
3013 FOR_EACH_VEC_ELT (sms, i, seq)
3014 {
3015 /* Prune sm_other from the end. */
3016 while (!seq->second.is_empty ()
3017 && seq->second.last ().second == sm_other)
3018 seq->second.pop ();
3019 /* Prune duplicates from the start. */
3020 auto_bitmap seen (&lim_bitmap_obstack);
3021 unsigned j, k;
3022 for (j = k = 0; j < seq->second.length (); ++j)
3023 if (bitmap_set_bit (seen, seq->second[j].first))
3024 {
3025 if (k != j)
3026 seq->second[k] = seq->second[j];
3027 ++k;
3028 }
3029 seq->second.truncate (size: k);
3030 /* And verify. */
3031 seq_entry *e;
3032 FOR_EACH_VEC_ELT (seq->second, j, e)
3033 gcc_assert (e->second == sm_ord
3034 || (e->second == sm_other && e->from != NULL_TREE));
3035 }
3036
3037 /* Verify dependence for refs we cannot handle with the order preserving
3038 code (refs_not_supported) or prune them from mem_refs. */
3039 auto_vec<seq_entry> unord_refs;
3040 EXECUTE_IF_SET_IN_BITMAP (refs_not_supported, 0, i, bi)
3041 {
3042 ref = memory_accesses.refs_list[i];
3043 if (!ref_indep_loop_p (loop, ref, sm_waw))
3044 bitmap_clear_bit (mem_refs, i);
3045 /* We've now verified store order for ref with respect to all other
3046 stores in the loop does not matter. */
3047 else
3048 unord_refs.safe_push (obj: seq_entry (i, sm_unord));
3049 }
3050
3051 hash_map<im_mem_ref *, sm_aux *> aux_map;
3052
3053 /* Execute SM but delay the store materialization for ordered
3054 sequences on exit. */
3055 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
3056 {
3057 ref = memory_accesses.refs_list[i];
3058 execute_sm (loop, ref, aux_map, maybe_mt: bitmap_bit_p (refs_not_supported, i),
3059 use_other_flag_var: false);
3060 }
3061
3062 /* Materialize ordered store sequences on exits. */
3063 auto_bitmap clobbers_to_prune;
3064 FOR_EACH_VEC_ELT (exits, i, e)
3065 {
3066 edge append_cond_position = NULL;
3067 edge last_cond_fallthru = NULL;
3068 if (i < sms.length ())
3069 {
3070 gcc_assert (sms[i].first == e);
3071 execute_sm_exit (loop, ex: e, seq&: sms[i].second, aux_map, kind: sm_ord,
3072 append_cond_position, last_cond_fallthru,
3073 clobbers_to_prune);
3074 sms[i].second.release ();
3075 }
3076 if (!unord_refs.is_empty ())
3077 execute_sm_exit (loop, ex: e, seq&: unord_refs, aux_map, kind: sm_unord,
3078 append_cond_position, last_cond_fallthru,
3079 clobbers_to_prune);
3080 /* Commit edge inserts here to preserve the order of stores
3081 when an exit exits multiple loops. */
3082 gsi_commit_one_edge_insert (e, NULL);
3083 }
3084
3085 /* Remove clobbers inside the loop we re-materialized on exits. */
3086 EXECUTE_IF_SET_IN_BITMAP (clobbers_to_prune, 0, i, bi)
3087 {
3088 gimple *stmt = memory_accesses.refs_list[i]->accesses_in_loop[0].stmt;
3089 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3090 unlink_stmt_vdef (stmt);
3091 release_defs (stmt);
3092 gimple_set_vdef (g: stmt, NULL_TREE);
3093 gsi_remove (&gsi, true);
3094 }
3095
3096 for (hash_map<im_mem_ref *, sm_aux *>::iterator iter = aux_map.begin ();
3097 iter != aux_map.end (); ++iter)
3098 delete (*iter).second;
3099}
3100
3101class ref_always_accessed
3102{
3103public:
3104 ref_always_accessed (class loop *loop_, bool stored_p_)
3105 : loop (loop_), stored_p (stored_p_) {}
3106 bool operator () (mem_ref_loc *loc);
3107 class loop *loop;
3108 bool stored_p;
3109};
3110
3111bool
3112ref_always_accessed::operator () (mem_ref_loc *loc)
3113{
3114 class loop *must_exec;
3115
3116 struct lim_aux_data *lim_data = get_lim_data (stmt: loc->stmt);
3117 if (!lim_data)
3118 return false;
3119
3120 /* If we require an always executed store make sure the statement
3121 is a store. */
3122 if (stored_p)
3123 {
3124 tree lhs = gimple_get_lhs (loc->stmt);
3125 if (!lhs
3126 || !(DECL_P (lhs) || REFERENCE_CLASS_P (lhs)))
3127 return false;
3128 }
3129
3130 must_exec = lim_data->always_executed_in;
3131 if (!must_exec)
3132 return false;
3133
3134 if (must_exec == loop
3135 || flow_loop_nested_p (must_exec, loop))
3136 return true;
3137
3138 return false;
3139}
3140
3141/* Returns true if REF is always accessed in LOOP. If STORED_P is true
3142 make sure REF is always stored to in LOOP. */
3143
3144static bool
3145ref_always_accessed_p (class loop *loop, im_mem_ref *ref, bool stored_p)
3146{
3147 return for_all_locs_in_loop (loop, ref,
3148 fn: ref_always_accessed (loop, stored_p));
3149}
3150
3151/* Returns true if LOAD_REF and STORE_REF form a "self write" pattern
3152 where the stored value comes from the loaded value via SSA.
3153 Example: a[i] = a[0] is safe to hoist a[0] even when i==0. */
3154
3155static bool
3156is_self_write (im_mem_ref *load_ref, im_mem_ref *store_ref)
3157{
3158 /* Only handle the simple case with a single access per ref.
3159 Bail out on multiple accesses to be conservative. */
3160 if (load_ref->accesses_in_loop.length () != 1
3161 || store_ref->accesses_in_loop.length () != 1)
3162 return false;
3163
3164 gimple *load_stmt = load_ref->accesses_in_loop[0].stmt;
3165 gimple *store_stmt = store_ref->accesses_in_loop[0].stmt;
3166
3167 if (!is_gimple_assign (gs: load_stmt) || !is_gimple_assign (gs: store_stmt))
3168 return false;
3169
3170 tree loaded_val = gimple_assign_lhs (gs: load_stmt);
3171 tree stored_val = gimple_assign_rhs1 (gs: store_stmt);
3172
3173 if (TREE_CODE (loaded_val) != SSA_NAME || TREE_CODE (stored_val) != SSA_NAME)
3174 return false;
3175
3176 /* Self write: stored value is the loaded value. */
3177 if (stored_val != loaded_val)
3178 return false;
3179
3180
3181 /* TODO: Try to factor this out with mem_ref_hasher::equal
3182 into im_compare_access_position_and_size
3183 or a similar helper to centralize this delicate handling
3184 complete for MEM_REF offsets and base pointer equality.
3185
3186 TODO: Also ensure max_size_known_p agrees or resort to
3187 alignment considerations to rule out partial overlaps.
3188
3189 See:
3190 https://gcc.gnu.org/pipermail/gcc-patches/2025-December/704155.html
3191 For more context on TODOs above. */
3192
3193 /* They may alias. Verify exact same location. */
3194 return (operand_equal_p (load_ref->mem.base, store_ref->mem.base, flags: 0)
3195 && known_eq (load_ref->mem.size, store_ref->mem.size)
3196 && known_eq (load_ref->mem.offset, store_ref->mem.offset));
3197
3198}
3199
3200/* Returns true if REF1 and REF2 are independent. */
3201
3202static bool
3203refs_independent_p (im_mem_ref *ref1, im_mem_ref *ref2, bool tbaa_p)
3204{
3205 if (ref1 == ref2)
3206 return true;
3207
3208 if (dump_file && (dump_flags & TDF_DETAILS))
3209 fprintf (stream: dump_file, format: "Querying dependency of refs %u and %u: ",
3210 ref1->id, ref2->id);
3211
3212 if (mem_refs_may_alias_p (mem1: ref1, mem2: ref2, ttae_cache: &memory_accesses.ttae_cache, tbaa_p))
3213 {
3214 if (dump_file && (dump_flags & TDF_DETAILS))
3215 fprintf (stream: dump_file, format: "dependent.\n");
3216 return false;
3217 }
3218 else
3219 {
3220 if (dump_file && (dump_flags & TDF_DETAILS))
3221 fprintf (stream: dump_file, format: "independent.\n");
3222 return true;
3223 }
3224}
3225
3226/* Returns true if REF is independent on all other accessess in LOOP.
3227 KIND specifies the kind of dependence to consider.
3228 lim_raw assumes REF is not stored in LOOP and disambiguates RAW
3229 dependences so if true REF can be hoisted out of LOOP
3230 sm_war disambiguates a store REF against all other loads to see
3231 whether the store can be sunk across loads out of LOOP
3232 sm_waw disambiguates a store REF against all other stores to see
3233 whether the store can be sunk across stores out of LOOP. */
3234
3235static bool
3236ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
3237{
3238 bool indep_p = true;
3239 bitmap refs_to_check;
3240
3241 if (kind == sm_war)
3242 refs_to_check = &memory_accesses.refs_loaded_in_loop[loop->num];
3243 else
3244 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
3245
3246 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
3247 || ref->mem.ref == error_mark_node)
3248 indep_p = false;
3249 else
3250 {
3251 /* tri-state, { unknown, independent, dependent } */
3252 dep_state state = query_loop_dependence (loop, ref, kind);
3253 if (state != dep_unknown)
3254 return state == dep_independent ? true : false;
3255
3256 class loop *inner = loop->inner;
3257 while (inner)
3258 {
3259 if (!ref_indep_loop_p (loop: inner, ref, kind))
3260 {
3261 indep_p = false;
3262 break;
3263 }
3264 inner = inner->next;
3265 }
3266
3267 if (indep_p)
3268 {
3269 unsigned i;
3270 bitmap_iterator bi;
3271 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
3272 {
3273 im_mem_ref *aref = memory_accesses.refs_list[i];
3274 if (aref->mem.ref == error_mark_node)
3275 {
3276 gimple *stmt = aref->accesses_in_loop[0].stmt;
3277 if ((kind == sm_war
3278 && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
3279 kind != sm_waw))
3280 || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
3281 kind != sm_waw))
3282 {
3283 indep_p = false;
3284 break;
3285 }
3286 }
3287 /* For hoisting loads (lim_raw), allow "self write": the store
3288 writes back the loaded value. Example: a[i] = a[0]
3289 is safe even when i==0 causes aliasing. */
3290 else if (kind == lim_raw
3291 && ref->loaded && aref->stored
3292 && is_self_write (load_ref: ref, store_ref: aref))
3293 {
3294 if (dump_file && (dump_flags & TDF_DETAILS))
3295 fprintf (stream: dump_file,
3296 format: "Dependency of refs %u and %u: "
3297 "independent (self write).\n",
3298 ref->id, aref->id);
3299 }
3300
3301 else if (!refs_independent_p (ref1: ref, ref2: aref, tbaa_p: kind != sm_waw))
3302 {
3303 indep_p = false;
3304 break;
3305 }
3306 }
3307 }
3308 }
3309
3310 if (dump_file && (dump_flags & TDF_DETAILS))
3311 fprintf (stream: dump_file, format: "Querying %s dependencies of ref %u in loop %d: %s\n",
3312 kind == lim_raw ? "RAW" : (kind == sm_war ? "SM WAR" : "SM WAW"),
3313 ref->id, loop->num, indep_p ? "independent" : "dependent");
3314
3315 /* Record the computed result in the cache. */
3316 record_loop_dependence (loop, ref, kind,
3317 state: indep_p ? dep_independent : dep_dependent);
3318
3319 return indep_p;
3320}
3321
3322class ref_in_loop_hot_body
3323{
3324public:
3325 ref_in_loop_hot_body (class loop *loop_) : l (loop_) {}
3326 bool operator () (mem_ref_loc *loc);
3327 class loop *l;
3328};
3329
3330/* Check the coldest loop between loop L and innermost loop. If there is one
3331 cold loop between L and INNER_LOOP, store motion can be performed, otherwise
3332 no cold loop means no store motion. get_coldest_out_loop also handles cases
3333 when l is inner_loop. */
3334bool
3335ref_in_loop_hot_body::operator () (mem_ref_loc *loc)
3336{
3337 basic_block curr_bb = gimple_bb (g: loc->stmt);
3338 class loop *inner_loop = curr_bb->loop_father;
3339 return get_coldest_out_loop (outermost_loop: l, loop: inner_loop, curr_bb);
3340}
3341
3342
3343/* Returns true if we can perform store motion of REF from LOOP. */
3344
3345static bool
3346can_sm_ref_p (class loop *loop, im_mem_ref *ref)
3347{
3348 tree base;
3349
3350 /* Can't hoist unanalyzable refs. */
3351 if (!MEM_ANALYZABLE (ref))
3352 return false;
3353
3354 /* Can't hoist/sink aggregate copies. */
3355 if (ref->mem.ref == error_mark_node)
3356 return false;
3357
3358 /* It should be movable. */
3359 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
3360 || TREE_THIS_VOLATILE (ref->mem.ref)
3361 || !for_each_index (&ref->mem.ref, may_move_till, loop))
3362 return false;
3363
3364 /* If it can throw fail, we do not properly update EH info. */
3365 if (tree_could_throw_p (ref->mem.ref))
3366 return false;
3367
3368 /* If it can trap, it must be always executed in LOOP.
3369 Readonly memory locations may trap when storing to them, but
3370 tree_could_trap_p is a predicate for rvalues, so check that
3371 explicitly. */
3372 base = get_base_address (t: ref->mem.ref);
3373 if ((tree_could_trap_p (ref->mem.ref)
3374 || (DECL_P (base) && TREE_READONLY (base))
3375 || TREE_CODE (base) == STRING_CST)
3376 /* ??? We can at least use false here, allowing loads? We
3377 are forcing conditional stores if the ref is not always
3378 stored to later anyway. So this would only guard
3379 the load we need to emit. Thus when the ref is not
3380 loaded we can elide this completely? */
3381 && !ref_always_accessed_p (loop, ref, stored_p: true))
3382 return false;
3383
3384 /* Verify all loads of ref can be hoisted. */
3385 if (ref->loaded
3386 && bitmap_bit_p (ref->loaded, loop->num)
3387 && !ref_indep_loop_p (loop, ref, kind: lim_raw))
3388 return false;
3389
3390 /* Verify the candidate can be disambiguated against all loads,
3391 that is, we can elide all in-loop stores. Disambiguation
3392 against stores is done later when we cannot guarantee preserving
3393 the order of stores. */
3394 if (!ref_indep_loop_p (loop, ref, kind: sm_war))
3395 return false;
3396
3397 /* Verify whether the candidate is hot for LOOP. Only do store motion if the
3398 candidate's profile count is hot. Statement in cold BB shouldn't be moved
3399 out of it's loop_father. */
3400 if (!for_all_locs_in_loop (loop, ref, fn: ref_in_loop_hot_body (loop)))
3401 return false;
3402
3403 return true;
3404}
3405
3406/* Marks the references in LOOP for that store motion should be performed
3407 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
3408 motion was performed in one of the outer loops. */
3409
3410static void
3411find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm)
3412{
3413 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
3414 unsigned i;
3415 bitmap_iterator bi;
3416 im_mem_ref *ref;
3417
3418 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
3419 {
3420 ref = memory_accesses.refs_list[i];
3421 if (can_sm_ref_p (loop, ref) && dbg_cnt (index: lim))
3422 bitmap_set_bit (refs_to_sm, i);
3423 }
3424}
3425
3426/* Checks whether LOOP (with exits stored in EXITS array) is suitable
3427 for a store motion optimization (i.e. whether we can insert statement
3428 on its exits). */
3429
3430static bool
3431loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED,
3432 const vec<edge> &exits)
3433{
3434 unsigned i;
3435 edge ex;
3436
3437 FOR_EACH_VEC_ELT (exits, i, ex)
3438 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
3439 return false;
3440
3441 return true;
3442}
3443
3444/* Try to perform store motion for all memory references modified inside
3445 LOOP. SM_EXECUTED is the bitmap of the memory references for that
3446 store motion was executed in one of the outer loops. */
3447
3448static void
3449store_motion_loop (class loop *loop, bitmap sm_executed)
3450{
3451 auto_vec<edge> exits = get_loop_exit_edges (loop);
3452 class loop *subloop;
3453 bitmap sm_in_loop = BITMAP_ALLOC (obstack: &lim_bitmap_obstack);
3454
3455 if (loop_suitable_for_sm (loop, exits))
3456 {
3457 find_refs_for_sm (loop, sm_executed, refs_to_sm: sm_in_loop);
3458 if (!bitmap_empty_p (map: sm_in_loop))
3459 hoist_memory_references (loop, mem_refs: sm_in_loop, exits);
3460 }
3461
3462 bitmap_ior_into (sm_executed, sm_in_loop);
3463 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
3464 store_motion_loop (loop: subloop, sm_executed);
3465 bitmap_and_compl_into (sm_executed, sm_in_loop);
3466 BITMAP_FREE (sm_in_loop);
3467}
3468
3469/* Try to perform store motion for all memory references modified inside
3470 loops. */
3471
3472static void
3473do_store_motion (void)
3474{
3475 class loop *loop;
3476 bitmap sm_executed = BITMAP_ALLOC (obstack: &lim_bitmap_obstack);
3477
3478 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3479 store_motion_loop (loop, sm_executed);
3480
3481 BITMAP_FREE (sm_executed);
3482}
3483
3484/* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
3485 for each such basic block bb records the outermost loop for that execution
3486 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
3487 blocks that contain a nonpure call. */
3488
3489static void
3490fill_always_executed_in_1 (class loop *loop, sbitmap contains_call)
3491{
3492 basic_block bb = NULL, last = NULL;
3493 edge e;
3494 class loop *inn_loop = loop;
3495
3496 for (class loop *inner = loop->inner; inner; inner = inner->next)
3497 fill_always_executed_in_1 (loop: inner, contains_call);
3498
3499 auto_vec<basic_block, 64> worklist;
3500 worklist.reserve_exact (nelems: loop->num_nodes);
3501 worklist.quick_push (obj: loop->header);
3502 do
3503 {
3504 edge_iterator ei;
3505 bb = worklist.pop ();
3506
3507 if (!flow_bb_inside_loop_p (inn_loop, bb))
3508 {
3509 /* When we are leaving a possibly infinite inner loop
3510 we have to stop processing. */
3511 if (!finite_loop_p (inn_loop))
3512 break;
3513 /* If the loop was finite we can continue with processing
3514 the loop we exited to. */
3515 inn_loop = bb->loop_father;
3516 }
3517
3518 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
3519 last = bb;
3520
3521 if (bitmap_bit_p (map: contains_call, bitno: bb->index))
3522 break;
3523
3524 /* If LOOP exits from this BB stop processing. */
3525 FOR_EACH_EDGE (e, ei, bb->succs)
3526 if (!flow_bb_inside_loop_p (loop, e->dest))
3527 break;
3528 if (e)
3529 break;
3530
3531 /* A loop might be infinite (TODO use simple loop analysis
3532 to disprove this if possible). */
3533 if (bb->flags & BB_IRREDUCIBLE_LOOP)
3534 break;
3535
3536 if (bb->loop_father->header == bb)
3537 /* Record that we enter into a subloop since it might not
3538 be finite. */
3539 /* ??? Entering into a not always executed subloop makes
3540 fill_always_executed_in quadratic in loop depth since
3541 we walk those loops N times. This is not a problem
3542 in practice though, see PR102253 for a worst-case testcase. */
3543 inn_loop = bb->loop_father;
3544
3545 /* Walk the body of LOOP sorted by dominance relation. Additionally,
3546 if a basic block S dominates the latch, then only blocks dominated
3547 by S are after it.
3548 This is get_loop_body_in_dom_order using a worklist algorithm and
3549 stopping once we are no longer interested in visiting further
3550 blocks. */
3551 unsigned old_len = worklist.length ();
3552 unsigned postpone = 0;
3553 for (basic_block son = first_dom_son (CDI_DOMINATORS, bb);
3554 son;
3555 son = next_dom_son (CDI_DOMINATORS, son))
3556 {
3557 if (!flow_bb_inside_loop_p (loop, son))
3558 continue;
3559 if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
3560 postpone = worklist.length ();
3561 worklist.quick_push (obj: son);
3562 }
3563 if (postpone)
3564 /* Postponing the block that dominates the latch means
3565 processing it last and thus putting it earliest in the
3566 worklist. */
3567 std::swap (a&: worklist[old_len], b&: worklist[postpone]);
3568 }
3569 while (!worklist.is_empty ());
3570
3571 while (1)
3572 {
3573 if (last->loop_father == loop
3574 || (ALWAYS_EXECUTED_IN (last)
3575 && loop_outer (ALWAYS_EXECUTED_IN (last)) == loop))
3576 {
3577 if (dump_enabled_p ())
3578 dump_printf (MSG_NOTE, "BB %d is always executed in loop %d\n",
3579 last->index, loop->num);
3580 SET_ALWAYS_EXECUTED_IN (last, loop);
3581 }
3582 if (last == loop->header)
3583 break;
3584 last = get_immediate_dominator (CDI_DOMINATORS, last);
3585 }
3586}
3587
3588/* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
3589 for each such basic block bb records the outermost loop for that execution
3590 of its header implies execution of bb. */
3591
3592static void
3593fill_always_executed_in (void)
3594{
3595 basic_block bb;
3596 class loop *loop;
3597
3598 auto_sbitmap contains_call (last_basic_block_for_fn (cfun));
3599 bitmap_clear (contains_call);
3600 FOR_EACH_BB_FN (bb, cfun)
3601 {
3602 if (loop_depth (loop: bb->loop_father) == 0)
3603 {
3604 /* Outside of loops we can skip scanning all stmts. */
3605 bitmap_set_bit (map: contains_call, bitno: bb->index);
3606 continue;
3607 }
3608 gimple_stmt_iterator gsi;
3609 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
3610 {
3611 if (nonpure_call_p (stmt: gsi_stmt (i: gsi)))
3612 break;
3613 }
3614
3615 if (!gsi_end_p (i: gsi))
3616 bitmap_set_bit (map: contains_call, bitno: bb->index);
3617 }
3618
3619 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
3620 fill_always_executed_in_1 (loop, contains_call);
3621}
3622
3623/* Find the coldest loop preheader for LOOP, also find the nearest hotter loop
3624 to LOOP. Then recursively iterate each inner loop. */
3625
3626void
3627fill_coldest_and_hotter_out_loop (class loop *coldest_loop,
3628 class loop *hotter_loop, class loop *loop)
3629{
3630 if (bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src,
3631 loop: coldest_loop))
3632 coldest_loop = loop;
3633
3634 coldest_outermost_loop[loop->num] = coldest_loop;
3635
3636 hotter_than_inner_loop[loop->num] = NULL;
3637 class loop *outer_loop = loop_outer (loop);
3638 if (hotter_loop
3639 && bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src,
3640 loop: hotter_loop))
3641 hotter_than_inner_loop[loop->num] = hotter_loop;
3642
3643 if (outer_loop && outer_loop != current_loops->tree_root
3644 && bb_colder_than_loop_preheader (bb: loop_preheader_edge (loop)->src,
3645 loop: outer_loop))
3646 hotter_than_inner_loop[loop->num] = outer_loop;
3647
3648 if (dump_enabled_p ())
3649 {
3650 dump_printf (MSG_NOTE, "loop %d's coldest_outermost_loop is %d, ",
3651 loop->num, coldest_loop->num);
3652 if (hotter_than_inner_loop[loop->num])
3653 dump_printf (MSG_NOTE, "hotter_than_inner_loop is %d\n",
3654 hotter_than_inner_loop[loop->num]->num);
3655 else
3656 dump_printf (MSG_NOTE, "hotter_than_inner_loop is NULL\n");
3657 }
3658
3659 class loop *inner_loop;
3660 for (inner_loop = loop->inner; inner_loop; inner_loop = inner_loop->next)
3661 fill_coldest_and_hotter_out_loop (coldest_loop,
3662 hotter_loop: hotter_than_inner_loop[loop->num],
3663 loop: inner_loop);
3664}
3665
3666/* Compute the global information needed by the loop invariant motion pass. */
3667
3668static void
3669tree_ssa_lim_initialize (bool store_motion)
3670{
3671 unsigned i;
3672
3673 bitmap_obstack_initialize (&lim_bitmap_obstack);
3674 gcc_obstack_init (&mem_ref_obstack);
3675 lim_aux_data_map = new hash_map<gimple *, lim_aux_data *>;
3676
3677 if (flag_tm)
3678 compute_transaction_bits ();
3679
3680 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
3681 memory_accesses.refs_list.create (nelems: 100);
3682 /* Allocate a special, unanalyzable mem-ref with ID zero. */
3683 memory_accesses.refs_list.quick_push
3684 (obj: mem_ref_alloc (NULL, hash: 0, UNANALYZABLE_MEM_ID));
3685
3686 memory_accesses.refs_loaded_in_loop.create (nelems: number_of_loops (cfun));
3687 memory_accesses.refs_loaded_in_loop.quick_grow_cleared (len: number_of_loops (cfun));
3688 memory_accesses.refs_stored_in_loop.create (nelems: number_of_loops (cfun));
3689 memory_accesses.refs_stored_in_loop.quick_grow_cleared (len: number_of_loops (cfun));
3690 if (store_motion)
3691 {
3692 memory_accesses.all_refs_stored_in_loop.create (nelems: number_of_loops (cfun));
3693 memory_accesses.all_refs_stored_in_loop.quick_grow_cleared
3694 (len: number_of_loops (cfun));
3695 }
3696
3697 for (i = 0; i < number_of_loops (cfun); i++)
3698 {
3699 bitmap_initialize (head: &memory_accesses.refs_loaded_in_loop[i],
3700 obstack: &lim_bitmap_obstack);
3701 bitmap_initialize (head: &memory_accesses.refs_stored_in_loop[i],
3702 obstack: &lim_bitmap_obstack);
3703 if (store_motion)
3704 bitmap_initialize (head: &memory_accesses.all_refs_stored_in_loop[i],
3705 obstack: &lim_bitmap_obstack);
3706 }
3707
3708 memory_accesses.ttae_cache = NULL;
3709
3710 /* Initialize bb_loop_postorder with a mapping from loop->num to
3711 its postorder index. */
3712 i = 0;
3713 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
3714 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
3715 bb_loop_postorder[loop->num] = i++;
3716}
3717
3718/* Cleans up after the invariant motion pass. */
3719
3720static void
3721tree_ssa_lim_finalize (void)
3722{
3723 basic_block bb;
3724 unsigned i;
3725 im_mem_ref *ref;
3726
3727 FOR_EACH_BB_FN (bb, cfun)
3728 SET_ALWAYS_EXECUTED_IN (bb, NULL);
3729
3730 bitmap_obstack_release (&lim_bitmap_obstack);
3731 delete lim_aux_data_map;
3732
3733 delete memory_accesses.refs;
3734 memory_accesses.refs = NULL;
3735
3736 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
3737 memref_free (mem: ref);
3738 memory_accesses.refs_list.release ();
3739 obstack_free (&mem_ref_obstack, NULL);
3740
3741 memory_accesses.refs_loaded_in_loop.release ();
3742 memory_accesses.refs_stored_in_loop.release ();
3743 memory_accesses.all_refs_stored_in_loop.release ();
3744
3745 if (memory_accesses.ttae_cache)
3746 free_affine_expand_cache (&memory_accesses.ttae_cache);
3747
3748 free (ptr: bb_loop_postorder);
3749
3750 coldest_outermost_loop.release ();
3751 hotter_than_inner_loop.release ();
3752}
3753
3754/* Moves invariants from loops. Only "expensive" invariants are moved out --
3755 i.e. those that are likely to be win regardless of the register pressure.
3756 Only perform store motion if STORE_MOTION is true. */
3757
3758unsigned int
3759loop_invariant_motion_in_fun (function *fun, bool store_motion)
3760{
3761 unsigned int todo = 0;
3762
3763 tree_ssa_lim_initialize (store_motion);
3764
3765 mark_ssa_maybe_undefs ();
3766
3767 /* Gathers information about memory accesses in the loops. */
3768 analyze_memory_references (store_motion);
3769
3770 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
3771 fill_always_executed_in ();
3772
3773 /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
3774 */
3775 class loop *loop;
3776 coldest_outermost_loop.create (nelems: number_of_loops (cfun));
3777 coldest_outermost_loop.safe_grow_cleared (len: number_of_loops (cfun));
3778 hotter_than_inner_loop.create (nelems: number_of_loops (cfun));
3779 hotter_than_inner_loop.safe_grow_cleared (len: number_of_loops (cfun));
3780 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
3781 fill_coldest_and_hotter_out_loop (coldest_loop: loop, NULL, loop);
3782
3783 int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3784 int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3785
3786 /* For each statement determine the outermost loop in that it is
3787 invariant and cost for computing the invariant. */
3788 for (int i = 0; i < n; ++i)
3789 compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3790
3791 /* Execute store motion. Force the necessary invariants to be moved
3792 out of the loops as well. */
3793 if (store_motion)
3794 do_store_motion ();
3795
3796 free (ptr: rpo);
3797 rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
3798 n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
3799
3800 /* Move the expressions that are expensive enough. */
3801 for (int i = 0; i < n; ++i)
3802 todo |= move_computations_worker (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
3803
3804 free (ptr: rpo);
3805
3806 gsi_commit_edge_inserts ();
3807 if (need_ssa_update_p (fun))
3808 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3809
3810 tree_ssa_lim_finalize ();
3811
3812 return todo;
3813}
3814
3815/* Loop invariant motion pass. */
3816
3817namespace {
3818
3819const pass_data pass_data_lim =
3820{
3821 .type: GIMPLE_PASS, /* type */
3822 .name: "lim", /* name */
3823 .optinfo_flags: OPTGROUP_LOOP, /* optinfo_flags */
3824 .tv_id: TV_LIM, /* tv_id */
3825 PROP_cfg, /* properties_required */
3826 .properties_provided: 0, /* properties_provided */
3827 .properties_destroyed: 0, /* properties_destroyed */
3828 .todo_flags_start: 0, /* todo_flags_start */
3829 .todo_flags_finish: 0, /* todo_flags_finish */
3830};
3831
3832class pass_lim : public gimple_opt_pass
3833{
3834public:
3835 pass_lim (gcc::context *ctxt)
3836 : gimple_opt_pass (pass_data_lim, ctxt)
3837 {}
3838
3839 /* opt_pass methods: */
3840 opt_pass * clone () final override { return new pass_lim (m_ctxt); }
3841 bool gate (function *) final override { return flag_tree_loop_im != 0; }
3842 unsigned int execute (function *) final override;
3843
3844}; // class pass_lim
3845
3846unsigned int
3847pass_lim::execute (function *fun)
3848{
3849 in_loop_pipeline = scev_initialized_p ();
3850 if (!in_loop_pipeline)
3851 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
3852
3853 if (number_of_loops (fn: fun) <= 1)
3854 return 0;
3855 unsigned int todo = loop_invariant_motion_in_fun (fun, flag_move_loop_stores);
3856
3857 if (!in_loop_pipeline)
3858 loop_optimizer_finalize ();
3859 else
3860 scev_reset ();
3861 return todo;
3862}
3863
3864} // anon namespace
3865
3866gimple_opt_pass *
3867make_pass_lim (gcc::context *ctxt)
3868{
3869 return new pass_lim (ctxt);
3870}
3871
3872
3873

source code of gcc/tree-ssa-loop-im.cc