1/* Induction variable optimizations.
2 Copyright (C) 2003-2026 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20/* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated.
68
69 For the targets supporting low-overhead loops, IVOPTs has to take care of
70 the loops which will probably be transformed in RTL doloop optimization,
71 to try to make selected IV candidate set optimal. The process of doloop
72 support includes:
73
74 1) Analyze the current loop will be transformed to doloop or not, find and
75 mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 The target hook predict_doloop_p can be used for target specific checks.
79
80 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 like biv. For cost determination between doloop IV cand and IV use, the
83 target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 provided to add on extra costs for generic type and address type IV use.
85 Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 use, and bound zero is set for IV elimination.
87
88 3) With the cost setting in step 2), the current cost model based IV
89 selection algorithm will process as usual, pick up doloop dedicated IV if
90 profitable. */
91
92#include "config.h"
93#include "system.h"
94#include "coretypes.h"
95#include "backend.h"
96#include "rtl.h"
97#include "tree.h"
98#include "gimple.h"
99#include "cfghooks.h"
100#include "tree-pass.h"
101#include "memmodel.h"
102#include "tm_p.h"
103#include "ssa.h"
104#include "expmed.h"
105#include "insn-config.h"
106#include "emit-rtl.h"
107#include "recog.h"
108#include "cgraph.h"
109#include "gimple-pretty-print.h"
110#include "alias.h"
111#include "fold-const.h"
112#include "stor-layout.h"
113#include "tree-eh.h"
114#include "gimplify.h"
115#include "gimple-iterator.h"
116#include "gimplify-me.h"
117#include "tree-cfg.h"
118#include "tree-ssa-loop-ivopts.h"
119#include "tree-ssa-loop-manip.h"
120#include "tree-ssa-loop-niter.h"
121#include "tree-ssa-loop.h"
122#include "explow.h"
123#include "expr.h"
124#include "tree-dfa.h"
125#include "tree-ssa.h"
126#include "cfgloop.h"
127#include "tree-scalar-evolution.h"
128#include "tree-affine.h"
129#include "tree-ssa-propagate.h"
130#include "tree-ssa-address.h"
131#include "builtins.h"
132#include "tree-vectorizer.h"
133#include "dbgcnt.h"
134#include "cfganal.h"
135#include "gimple-fold.h"
136
137/* For lang_hooks.types.type_for_mode. */
138#include "langhooks.h"
139
140/* FIXME: Expressions are expanded to RTL in this pass to determine the
141 cost of different addressing modes. This should be moved to a TBD
142 interface between the GIMPLE and RTL worlds. */
143
144/* The infinite cost. */
145#define INFTY 1000000000
146
147/* Returns the expected number of loop iterations for LOOP.
148 The average trip count is computed from profile data if it
149 exists. */
150
151static inline unsigned HOST_WIDE_INT
152avg_loop_niter (class loop *loop)
153{
154 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
155 if (niter == -1)
156 {
157 niter = likely_max_stmt_executions_int (loop);
158
159 if (niter == -1 || niter > param_avg_loop_niter)
160 return param_avg_loop_niter;
161 }
162
163 return niter;
164}
165
166struct iv_use;
167
168/* Representation of the induction variable. */
169struct iv
170{
171 tree base; /* Initial value of the iv. */
172 tree base_object; /* A memory object to that the induction variable points. */
173 tree step; /* Step of the iv (constant only). */
174 tree ssa_name; /* The ssa name with the value. */
175 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
176 bool biv_p; /* Is it a biv? */
177 bool no_overflow; /* True if the iv doesn't overflow. */
178 bool have_address_use;/* For biv, indicate if it's used in any address
179 type use. */
180};
181
182/* Per-ssa version information (induction variable descriptions, etc.). */
183struct version_info
184{
185 tree name; /* The ssa name. */
186 struct iv *iv; /* Induction variable description. */
187 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
188 an expression that is not an induction variable. */
189 bool preserve_biv; /* For the original biv, whether to preserve it. */
190 unsigned inv_id; /* Id of an invariant. */
191};
192
193/* Types of uses. */
194enum use_type
195{
196 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
197 USE_REF_ADDRESS, /* Use is an address for an explicit memory
198 reference. */
199 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
200 cases where the expansion of the function
201 will turn the argument into a normal address. */
202 USE_COMPARE /* Use is a compare. */
203};
204
205/* Cost of a computation. */
206class comp_cost
207{
208public:
209 comp_cost (): cost (0), complexity (0), scratch (0)
210 {}
211
212 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
213 : cost (cost), complexity (complexity), scratch (scratch)
214 {}
215
216 /* Returns true if COST is infinite. */
217 bool infinite_cost_p ();
218
219 /* Adds costs COST1 and COST2. */
220 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221
222 /* Adds COST to the comp_cost. */
223 comp_cost operator+= (comp_cost cost);
224
225 /* Adds constant C to this comp_cost. */
226 comp_cost operator+= (HOST_WIDE_INT c);
227
228 /* Subtracts constant C to this comp_cost. */
229 comp_cost operator-= (HOST_WIDE_INT c);
230
231 /* Divide the comp_cost by constant C. */
232 comp_cost operator/= (HOST_WIDE_INT c);
233
234 /* Multiply the comp_cost by constant C. */
235 comp_cost operator*= (HOST_WIDE_INT c);
236
237 /* Subtracts costs COST1 and COST2. */
238 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239
240 /* Subtracts COST from this comp_cost. */
241 comp_cost operator-= (comp_cost cost);
242
243 /* Returns true if COST1 is smaller than COST2. */
244 friend bool operator< (comp_cost cost1, comp_cost cost2);
245
246 /* Returns true if COST1 and COST2 are equal. */
247 friend bool operator== (comp_cost cost1, comp_cost cost2);
248
249 /* Returns true if COST1 is smaller or equal than COST2. */
250 friend bool operator<= (comp_cost cost1, comp_cost cost2);
251
252 int64_t cost; /* The runtime cost. */
253 unsigned complexity; /* The estimate of the complexity of the code for
254 the computation (in no concrete units --
255 complexity field should be larger for more
256 complex expressions and addressing modes). */
257 int64_t scratch; /* Scratch used during cost computation. */
258};
259
260static const comp_cost no_cost;
261static const comp_cost infinite_cost (INFTY, 0, INFTY);
262
263bool
264comp_cost::infinite_cost_p ()
265{
266 return cost == INFTY;
267}
268
269comp_cost
270operator+ (comp_cost cost1, comp_cost cost2)
271{
272 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
273 return infinite_cost;
274
275 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
276 cost1.cost += cost2.cost;
277 cost1.complexity += cost2.complexity;
278
279 return cost1;
280}
281
282comp_cost
283operator- (comp_cost cost1, comp_cost cost2)
284{
285 if (cost1.infinite_cost_p ())
286 return infinite_cost;
287
288 gcc_assert (!cost2.infinite_cost_p ());
289 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290
291 cost1.cost -= cost2.cost;
292 cost1.complexity -= cost2.complexity;
293
294 return cost1;
295}
296
297comp_cost
298comp_cost::operator+= (comp_cost cost)
299{
300 *this = *this + cost;
301 return *this;
302}
303
304comp_cost
305comp_cost::operator+= (HOST_WIDE_INT c)
306{
307 if (c >= INFTY)
308 this->cost = INFTY;
309
310 if (infinite_cost_p ())
311 return *this;
312
313 gcc_assert (this->cost + c < infinite_cost.cost);
314 this->cost += c;
315
316 return *this;
317}
318
319comp_cost
320comp_cost::operator-= (HOST_WIDE_INT c)
321{
322 if (infinite_cost_p ())
323 return *this;
324
325 gcc_assert (this->cost - c < infinite_cost.cost);
326 this->cost -= c;
327
328 return *this;
329}
330
331comp_cost
332comp_cost::operator/= (HOST_WIDE_INT c)
333{
334 gcc_assert (c != 0);
335 if (infinite_cost_p ())
336 return *this;
337
338 this->cost /= c;
339
340 return *this;
341}
342
343comp_cost
344comp_cost::operator*= (HOST_WIDE_INT c)
345{
346 if (infinite_cost_p ())
347 return *this;
348
349 gcc_assert (this->cost * c < infinite_cost.cost);
350 this->cost *= c;
351
352 return *this;
353}
354
355comp_cost
356comp_cost::operator-= (comp_cost cost)
357{
358 *this = *this - cost;
359 return *this;
360}
361
362bool
363operator< (comp_cost cost1, comp_cost cost2)
364{
365 if (cost1.cost == cost2.cost)
366 return cost1.complexity < cost2.complexity;
367
368 return cost1.cost < cost2.cost;
369}
370
371bool
372operator== (comp_cost cost1, comp_cost cost2)
373{
374 return cost1.cost == cost2.cost
375 && cost1.complexity == cost2.complexity;
376}
377
378bool
379operator<= (comp_cost cost1, comp_cost cost2)
380{
381 return cost1 < cost2 || cost1 == cost2;
382}
383
384struct iv_inv_expr_ent;
385
386/* The candidate - cost pair. */
387class cost_pair
388{
389public:
390 struct iv_cand *cand; /* The candidate. */
391 comp_cost cost; /* The cost. */
392 enum tree_code comp; /* For iv elimination, the comparison. */
393 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
394 preserved when representing iv_use with iv_cand. */
395 bitmap inv_exprs; /* The list of newly created invariant expressions
396 when representing iv_use with iv_cand. */
397 tree value; /* For final value elimination, the expression for
398 the final value of the iv. For iv elimination,
399 the new bound to compare with. */
400};
401
402/* Use. */
403struct iv_use
404{
405 unsigned id; /* The id of the use. */
406 unsigned group_id; /* The group id the use belongs to. */
407 enum use_type type; /* Type of the use. */
408 tree mem_type; /* The memory type to use when testing whether an
409 address is legitimate, and what the address's
410 cost is. */
411 struct iv *iv; /* The induction variable it is based on. */
412 gimple *stmt; /* Statement in that it occurs. */
413 tree *op_p; /* The place where it occurs. */
414
415 tree addr_base; /* Base address with const offset stripped. */
416 poly_uint64 addr_offset;
417 /* Const offset stripped from base address. */
418};
419
420/* Group of uses. */
421struct iv_group
422{
423 /* The id of the group. */
424 unsigned id;
425 /* Uses of the group are of the same type. */
426 enum use_type type;
427 /* The set of "related" IV candidates, plus the important ones. */
428 bitmap related_cands;
429 /* Number of IV candidates in the cost_map. */
430 unsigned n_map_members;
431 /* The costs wrto the iv candidates. */
432 class cost_pair *cost_map;
433 /* The selected candidate for the group. */
434 struct iv_cand *selected;
435 /* To indicate this is a doloop use group. */
436 bool doloop_p;
437 /* Uses in the group. */
438 vec<struct iv_use *> vuses;
439};
440
441/* The position where the iv is computed. */
442enum iv_position
443{
444 IP_NORMAL, /* At the end, just before the exit condition. */
445 IP_END, /* At the end of the latch block. */
446 IP_BEFORE_USE, /* Immediately before a specific use. */
447 IP_AFTER_USE, /* Immediately after a specific use. */
448 IP_ORIGINAL /* The original biv. */
449};
450
451/* The induction variable candidate. */
452struct iv_cand
453{
454 unsigned id; /* The number of the candidate. */
455 bool important; /* Whether this is an "important" candidate, i.e. such
456 that it should be considered by all uses. */
457 bool involves_undefs; /* Whether the IV involves undefined values. */
458 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
459 gimple *incremented_at;/* For original biv, the statement where it is
460 incremented. */
461 tree var_before; /* The variable used for it before increment. */
462 tree var_after; /* The variable used for it after increment. */
463 struct iv *iv; /* The value of the candidate. NULL for
464 "pseudocandidate" used to indicate the possibility
465 to replace the final value of an iv by direct
466 computation of the value. */
467 unsigned cost; /* Cost of the candidate. */
468 unsigned cost_step; /* Cost of the candidate's increment operation. */
469 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
470 where it is incremented. */
471 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
472 iv_cand. */
473 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
474 handle it as a new invariant expression which will
475 be hoisted out of loop. */
476 struct iv *orig_iv; /* The original iv if this cand is added from biv with
477 smaller type. */
478 bool doloop_p; /* Whether this is a doloop candidate. */
479};
480
481/* Hashtable entry for common candidate derived from iv uses. */
482class iv_common_cand
483{
484public:
485 tree base;
486 tree step;
487 /* IV uses from which this common candidate is derived. */
488 auto_vec<struct iv_use *> uses;
489 hashval_t hash;
490};
491
492/* Hashtable helpers. */
493
494struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495{
496 static inline hashval_t hash (const iv_common_cand *);
497 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
498};
499
500/* Hash function for possible common candidates. */
501
502inline hashval_t
503iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504{
505 return ccand->hash;
506}
507
508/* Hash table equality function for common candidates. */
509
510inline bool
511iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
512 const iv_common_cand *ccand2)
513{
514 return (ccand1->hash == ccand2->hash
515 && operand_equal_p (ccand1->base, ccand2->base, flags: 0)
516 && operand_equal_p (ccand1->step, ccand2->step, flags: 0)
517 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
518 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
519}
520
521/* Loop invariant expression hashtable entry. */
522
523struct iv_inv_expr_ent
524{
525 /* Tree expression of the entry. */
526 tree expr;
527 /* Unique indentifier. */
528 int id;
529 /* Hash value. */
530 hashval_t hash;
531};
532
533/* Sort iv_inv_expr_ent pair A and B by id field. */
534
535static int
536sort_iv_inv_expr_ent (const void *a, const void *b)
537{
538 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
539 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540
541 unsigned id1 = (*e1)->id;
542 unsigned id2 = (*e2)->id;
543
544 if (id1 < id2)
545 return -1;
546 else if (id1 > id2)
547 return 1;
548 else
549 return 0;
550}
551
552/* Hashtable helpers. */
553
554struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555{
556 static inline hashval_t hash (const iv_inv_expr_ent *);
557 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
558};
559
560/* Return true if uses of type TYPE represent some form of address. */
561
562inline bool
563address_p (use_type type)
564{
565 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
566}
567
568/* Hash function for loop invariant expressions. */
569
570inline hashval_t
571iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572{
573 return expr->hash;
574}
575
576/* Hash table equality function for expressions. */
577
578inline bool
579iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
580 const iv_inv_expr_ent *expr2)
581{
582 return expr1->hash == expr2->hash
583 && operand_equal_p (expr1->expr, expr2->expr, flags: 0);
584}
585
586struct ivopts_data
587{
588 /* The currently optimized loop. */
589 class loop *current_loop;
590 location_t loop_loc;
591
592 /* Numbers of iterations for all exits of the current loop. */
593 hash_map<edge, tree_niter_desc *> *niters;
594
595 /* Number of registers used in it. */
596 unsigned regs_used;
597
598 /* The size of version_info array allocated. */
599 unsigned version_info_size;
600
601 /* The array of information for the ssa names. */
602 struct version_info *version_info;
603
604 /* The hashtable of loop invariant expressions created
605 by ivopt. */
606 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607
608 /* The bitmap of indices in version_info whose value was changed. */
609 bitmap relevant;
610
611 /* The uses of induction variables. */
612 vec<iv_group *> vgroups;
613
614 /* The candidates. */
615 vec<iv_cand *> vcands;
616
617 /* A bitmap of important candidates. */
618 bitmap important_candidates;
619
620 /* Cache used by tree_to_aff_combination_expand. */
621 hash_map<tree, name_expansion *> *name_expansion_cache;
622
623 /* The hashtable of common candidates derived from iv uses. */
624 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625
626 /* The common candidates. */
627 vec<iv_common_cand *> iv_common_cands;
628
629 /* Hash map recording base object information of tree exp. */
630 hash_map<tree, tree> *base_object_map;
631
632 /* The maximum invariant variable id. */
633 unsigned max_inv_var_id;
634
635 /* The maximum invariant expression id. */
636 unsigned max_inv_expr_id;
637
638 /* Number of no_overflow BIVs which are not used in memory address. */
639 unsigned bivs_not_used_in_addr;
640
641 /* Obstack for iv structure. */
642 struct obstack iv_obstack;
643
644 /* Whether to consider just related and important candidates when replacing a
645 use. */
646 bool consider_all_candidates;
647
648 /* Are we optimizing for speed? */
649 bool speed;
650
651 /* Whether the loop body includes any function calls. */
652 bool body_includes_call;
653
654 /* Whether the loop body can only be exited via single exit. */
655 bool loop_single_exit_p;
656
657 /* Whether the loop has doloop comparison use. */
658 bool doloop_use_p;
659};
660
661/* An assignment of iv candidates to uses. */
662
663class iv_ca
664{
665public:
666 /* The number of uses covered by the assignment. */
667 unsigned upto;
668
669 /* Number of uses that cannot be expressed by the candidates in the set. */
670 unsigned bad_groups;
671
672 /* Candidate assigned to a use, together with the related costs. */
673 class cost_pair **cand_for_group;
674
675 /* Number of times each candidate is used. */
676 unsigned *n_cand_uses;
677
678 /* The candidates used. */
679 bitmap cands;
680
681 /* The number of candidates in the set. */
682 unsigned n_cands;
683
684 /* The number of invariants needed, including both invariant variants and
685 invariant expressions. */
686 unsigned n_invs;
687
688 /* Total cost of expressing uses. */
689 comp_cost cand_use_cost;
690
691 /* Total cost of candidates. */
692 int64_t cand_cost;
693
694 /* Number of times each invariant variable is used. */
695 unsigned *n_inv_var_uses;
696
697 /* Number of times each invariant expression is used. */
698 unsigned *n_inv_expr_uses;
699
700 /* Total cost of the assignment. */
701 comp_cost cost;
702};
703
704/* Difference of two iv candidate assignments. */
705
706struct iv_ca_delta
707{
708 /* Changed group. */
709 struct iv_group *group;
710
711 /* An old assignment (for rollback purposes). */
712 class cost_pair *old_cp;
713
714 /* A new assignment. */
715 class cost_pair *new_cp;
716
717 /* Next change in the list. */
718 struct iv_ca_delta *next;
719};
720
721/* Bound on number of candidates below that all candidates are considered. */
722
723#define CONSIDER_ALL_CANDIDATES_BOUND \
724 ((unsigned) param_iv_consider_all_candidates_bound)
725
726/* If there are more iv occurrences, we just give up (it is quite unlikely that
727 optimizing such a loop would help, and it would take ages). */
728
729#define MAX_CONSIDERED_GROUPS \
730 ((unsigned) param_iv_max_considered_uses)
731
732/* If there are at most this number of ivs in the set, try removing unnecessary
733 ivs from the set always. */
734
735#define ALWAYS_PRUNE_CAND_SET_BOUND \
736 ((unsigned) param_iv_always_prune_cand_set_bound)
737
738/* The list of trees for that the decl_rtl field must be reset is stored
739 here. */
740
741static vec<tree> decl_rtl_to_reset;
742
743static comp_cost force_expr_to_var_cost (tree, bool);
744
745/* The single loop exit if it dominates the latch, NULL otherwise. */
746
747edge
748single_dom_exit (class loop *loop)
749{
750 edge exit = single_exit (loop);
751
752 if (!exit)
753 return NULL;
754
755 if (!just_once_each_iteration_p (loop, exit->src))
756 return NULL;
757
758 return exit;
759}
760
761/* Dumps information about the induction variable IV to FILE. Don't dump
762 variable's name if DUMP_NAME is FALSE. The information is dumped with
763 preceding spaces indicated by INDENT_LEVEL. */
764
765void
766dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767{
768 const char *p;
769 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770
771 if (indent_level > 4)
772 indent_level = 4;
773 p = spaces + 8 - (indent_level << 1);
774
775 fprintf (stream: file, format: "%sIV struct:\n", p);
776 if (iv->ssa_name && dump_name)
777 {
778 fprintf (stream: file, format: "%s SSA_NAME:\t", p);
779 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
780 fprintf (stream: file, format: "\n");
781 }
782
783 fprintf (stream: file, format: "%s Type:\t", p);
784 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
785 fprintf (stream: file, format: "\n");
786
787 fprintf (stream: file, format: "%s Base:\t", p);
788 print_generic_expr (file, iv->base, TDF_SLIM);
789 fprintf (stream: file, format: "\n");
790
791 fprintf (stream: file, format: "%s Step:\t", p);
792 print_generic_expr (file, iv->step, TDF_SLIM);
793 fprintf (stream: file, format: "\n");
794
795 if (iv->base_object)
796 {
797 fprintf (stream: file, format: "%s Object:\t", p);
798 print_generic_expr (file, iv->base_object, TDF_SLIM);
799 fprintf (stream: file, format: "\n");
800 }
801
802 fprintf (stream: file, format: "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803
804 fprintf (stream: file, format: "%s Overflowness wrto loop niter:\t%s\n",
805 p, iv->no_overflow ? "No-overflow" : "Overflow");
806}
807
808/* Dumps information about the USE to FILE. */
809
810void
811dump_use (FILE *file, struct iv_use *use)
812{
813 fprintf (stream: file, format: " Use %d.%d:\n", use->group_id, use->id);
814 fprintf (stream: file, format: " At stmt:\t");
815 print_gimple_stmt (file, use->stmt, 0);
816 fprintf (stream: file, format: " At pos:\t");
817 if (use->op_p)
818 print_generic_expr (file, *use->op_p, TDF_SLIM);
819 fprintf (stream: file, format: "\n");
820 dump_iv (file, iv: use->iv, dump_name: false, indent_level: 2);
821}
822
823/* Dumps information about the uses to FILE. */
824
825void
826dump_groups (FILE *file, struct ivopts_data *data)
827{
828 unsigned i, j;
829 struct iv_group *group;
830
831 for (i = 0; i < data->vgroups.length (); i++)
832 {
833 group = data->vgroups[i];
834 fprintf (stream: file, format: "Group %d:\n", group->id);
835 if (group->type == USE_NONLINEAR_EXPR)
836 fprintf (stream: file, format: " Type:\tGENERIC\n");
837 else if (group->type == USE_REF_ADDRESS)
838 fprintf (stream: file, format: " Type:\tREFERENCE ADDRESS\n");
839 else if (group->type == USE_PTR_ADDRESS)
840 fprintf (stream: file, format: " Type:\tPOINTER ARGUMENT ADDRESS\n");
841 else
842 {
843 gcc_assert (group->type == USE_COMPARE);
844 fprintf (stream: file, format: " Type:\tCOMPARE\n");
845 }
846 for (j = 0; j < group->vuses.length (); j++)
847 dump_use (file, use: group->vuses[j]);
848 }
849}
850
851/* Dumps information about induction variable candidate CAND to FILE. */
852
853void
854dump_cand (FILE *file, struct iv_cand *cand)
855{
856 struct iv *iv = cand->iv;
857
858 fprintf (stream: file, format: "Candidate %d:\n", cand->id);
859 if (cand->inv_vars)
860 {
861 fprintf (stream: file, format: " Depend on inv.vars: ");
862 dump_bitmap (file, map: cand->inv_vars);
863 }
864 if (cand->inv_exprs)
865 {
866 fprintf (stream: file, format: " Depend on inv.exprs: ");
867 dump_bitmap (file, map: cand->inv_exprs);
868 }
869
870 if (cand->var_before)
871 {
872 fprintf (stream: file, format: " Var befor: ");
873 print_generic_expr (file, cand->var_before, TDF_SLIM);
874 fprintf (stream: file, format: "\n");
875 }
876 if (cand->var_after)
877 {
878 fprintf (stream: file, format: " Var after: ");
879 print_generic_expr (file, cand->var_after, TDF_SLIM);
880 fprintf (stream: file, format: "\n");
881 }
882
883 switch (cand->pos)
884 {
885 case IP_NORMAL:
886 fprintf (stream: file, format: " Incr POS: before exit test\n");
887 break;
888
889 case IP_BEFORE_USE:
890 fprintf (stream: file, format: " Incr POS: before use %d\n", cand->ainc_use->id);
891 break;
892
893 case IP_AFTER_USE:
894 fprintf (stream: file, format: " Incr POS: after use %d\n", cand->ainc_use->id);
895 break;
896
897 case IP_END:
898 fprintf (stream: file, format: " Incr POS: at end\n");
899 break;
900
901 case IP_ORIGINAL:
902 fprintf (stream: file, format: " Incr POS: orig biv\n");
903 break;
904 }
905
906 dump_iv (file, iv, dump_name: false, indent_level: 1);
907}
908
909/* Returns the info for ssa version VER. */
910
911static inline struct version_info *
912ver_info (struct ivopts_data *data, unsigned ver)
913{
914 return data->version_info + ver;
915}
916
917/* Returns the info for ssa name NAME. */
918
919static inline struct version_info *
920name_info (struct ivopts_data *data, tree name)
921{
922 return ver_info (data, SSA_NAME_VERSION (name));
923}
924
925/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
926 emitted in LOOP. */
927
928static bool
929stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930{
931 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (g: stmt);
932
933 gcc_assert (bb);
934
935 if (sbb == loop->latch)
936 return true;
937
938 if (sbb != bb)
939 return false;
940
941 return stmt == last_nondebug_stmt (bb);
942}
943
944/* Returns true if STMT if after the place where the original induction
945 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
946 if the positions are identical. */
947
948static bool
949stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950{
951 basic_block cand_bb = gimple_bb (g: cand->incremented_at);
952 basic_block stmt_bb = gimple_bb (g: stmt);
953
954 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
955 return false;
956
957 if (stmt_bb != cand_bb)
958 return true;
959
960 if (true_if_equal
961 && gimple_uid (g: stmt) == gimple_uid (g: cand->incremented_at))
962 return true;
963 return gimple_uid (g: stmt) > gimple_uid (g: cand->incremented_at);
964}
965
966/* Returns true if STMT if after the place where the induction variable
967 CAND is incremented in LOOP. */
968
969static bool
970stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971{
972 switch (cand->pos)
973 {
974 case IP_END:
975 return false;
976
977 case IP_NORMAL:
978 return stmt_after_ip_normal_pos (loop, stmt);
979
980 case IP_ORIGINAL:
981 case IP_AFTER_USE:
982 return stmt_after_inc_pos (cand, stmt, true_if_equal: false);
983
984 case IP_BEFORE_USE:
985 return stmt_after_inc_pos (cand, stmt, true_if_equal: true);
986
987 default:
988 gcc_unreachable ();
989 }
990}
991
992/* walk_tree callback for contains_abnormal_ssa_name_p. */
993
994static tree
995contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996{
997 if (TREE_CODE (*tp) == SSA_NAME
998 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
999 return *tp;
1000
1001 if (!EXPR_P (*tp))
1002 *walk_subtrees = 0;
1003
1004 return NULL_TREE;
1005}
1006
1007/* Returns true if EXPR contains a ssa name that occurs in an
1008 abnormal phi node. */
1009
1010bool
1011contains_abnormal_ssa_name_p (tree expr)
1012{
1013 return walk_tree_without_duplicates
1014 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1015}
1016
1017/* Returns the structure describing number of iterations determined from
1018 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019
1020static class tree_niter_desc *
1021niter_for_exit (struct ivopts_data *data, edge exit)
1022{
1023 class tree_niter_desc *desc;
1024 tree_niter_desc **slot;
1025
1026 if (!data->niters)
1027 {
1028 data->niters = new hash_map<edge, tree_niter_desc *>;
1029 slot = NULL;
1030 }
1031 else
1032 slot = data->niters->get (k: exit);
1033
1034 if (!slot)
1035 {
1036 /* Try to determine number of iterations. We cannot safely work with ssa
1037 names that appear in phi nodes on abnormal edges, so that we do not
1038 create overlapping life ranges for them (PR 27283). */
1039 desc = XNEW (class tree_niter_desc);
1040 ::new (static_cast<void*> (desc)) tree_niter_desc ();
1041 if (!number_of_iterations_exit (data->current_loop,
1042 exit, niter: desc, true)
1043 || contains_abnormal_ssa_name_p (expr: desc->niter))
1044 {
1045 desc->~tree_niter_desc ();
1046 XDELETE (desc);
1047 desc = NULL;
1048 }
1049 data->niters->put (k: exit, v: desc);
1050 }
1051 else
1052 desc = *slot;
1053
1054 return desc;
1055}
1056
1057/* Returns the structure describing number of iterations determined from
1058 single dominating exit of DATA->current_loop, or NULL if something
1059 goes wrong. */
1060
1061static class tree_niter_desc *
1062niter_for_single_dom_exit (struct ivopts_data *data)
1063{
1064 edge exit = single_dom_exit (loop: data->current_loop);
1065
1066 if (!exit)
1067 return NULL;
1068
1069 return niter_for_exit (data, exit);
1070}
1071
1072/* Initializes data structures used by the iv optimization pass, stored
1073 in DATA. */
1074
1075static void
1076tree_ssa_iv_optimize_init (struct ivopts_data *data)
1077{
1078 data->version_info_size = 2 * num_ssa_names;
1079 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1080 data->relevant = BITMAP_ALLOC (NULL);
1081 data->important_candidates = BITMAP_ALLOC (NULL);
1082 data->max_inv_var_id = 0;
1083 data->max_inv_expr_id = 0;
1084 data->niters = NULL;
1085 data->vgroups.create (nelems: 20);
1086 data->vcands.create (nelems: 20);
1087 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1088 data->name_expansion_cache = NULL;
1089 data->base_object_map = NULL;
1090 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1091 data->iv_common_cands.create (nelems: 20);
1092 decl_rtl_to_reset.create (nelems: 20);
1093 gcc_obstack_init (&data->iv_obstack);
1094}
1095
1096/* walk_tree callback for determine_base_object. */
1097
1098static tree
1099determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1100{
1101 tree_code code = TREE_CODE (*tp);
1102 tree obj = NULL_TREE;
1103 if (code == ADDR_EXPR)
1104 {
1105 tree base = get_base_address (TREE_OPERAND (*tp, 0));
1106 if (!base)
1107 obj = *tp;
1108 else if (TREE_CODE (base) != MEM_REF)
1109 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1110 }
1111 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1112 obj = fold_convert (ptr_type_node, *tp);
1113
1114 if (!obj)
1115 {
1116 if (!EXPR_P (*tp))
1117 *walk_subtrees = 0;
1118
1119 return NULL_TREE;
1120 }
1121 /* Record special node for multiple base objects and stop. */
1122 if (*static_cast<tree *> (wdata))
1123 {
1124 *static_cast<tree *> (wdata) = integer_zero_node;
1125 return integer_zero_node;
1126 }
1127 /* Record the base object and continue looking. */
1128 *static_cast<tree *> (wdata) = obj;
1129 return NULL_TREE;
1130}
1131
1132/* Returns a memory object to that EXPR points with caching. Return NULL if we
1133 are able to determine that it does not point to any such object; specially
1134 return integer_zero_node if EXPR contains multiple base objects. */
1135
1136static tree
1137determine_base_object (struct ivopts_data *data, tree expr)
1138{
1139 tree *slot, obj = NULL_TREE;
1140 if (data->base_object_map)
1141 {
1142 if ((slot = data->base_object_map->get(k: expr)) != NULL)
1143 return *slot;
1144 }
1145 else
1146 data->base_object_map = new hash_map<tree, tree>;
1147
1148 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1149 data->base_object_map->put (k: expr, v: obj);
1150 return obj;
1151}
1152
1153/* Allocates an induction variable with given initial value BASE and step STEP
1154 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1155
1156static struct iv *
1157alloc_iv (struct ivopts_data *data, tree base, tree step,
1158 bool no_overflow = false)
1159{
1160 tree expr = base;
1161 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1162 sizeof (struct iv));
1163 gcc_assert (step != NULL_TREE);
1164
1165 /* Canonicalize the address expression in base if it were an unsigned
1166 computation. That leads to more equalities being detected and results in:
1167
1168 1) More accurate cost can be computed for address expressions;
1169 2) Duplicate candidates won't be created for bases in different
1170 forms, like &a[0] and &a.
1171 3) Duplicate candidates won't be created for IV expressions that differ
1172 only in their sign. */
1173 aff_tree comb;
1174 STRIP_NOPS (expr);
1175 expr = fold_convert (unsigned_type_for (TREE_TYPE (expr)), expr);
1176 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1177 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178
1179 iv->base = base;
1180 iv->base_object = determine_base_object (data, expr: base);
1181 iv->step = step;
1182 iv->biv_p = false;
1183 iv->nonlin_use = NULL;
1184 iv->ssa_name = NULL_TREE;
1185 if (!no_overflow
1186 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1187 base, step))
1188 no_overflow = true;
1189 iv->no_overflow = no_overflow;
1190 iv->have_address_use = false;
1191
1192 return iv;
1193}
1194
1195/* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1196 doesn't overflow. */
1197
1198static void
1199set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1200 bool no_overflow)
1201{
1202 struct version_info *info = name_info (data, name: iv);
1203
1204 gcc_assert (!info->iv);
1205
1206 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1207 info->iv = alloc_iv (data, base, step, no_overflow);
1208 info->iv->ssa_name = iv;
1209}
1210
1211/* Finds induction variable declaration for VAR. */
1212
1213static struct iv *
1214get_iv (struct ivopts_data *data, tree var)
1215{
1216 basic_block bb;
1217 tree type = TREE_TYPE (var);
1218
1219 if (!POINTER_TYPE_P (type)
1220 && !INTEGRAL_TYPE_P (type))
1221 return NULL;
1222
1223 if (!name_info (data, name: var)->iv)
1224 {
1225 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1226
1227 if (!bb
1228 || !flow_bb_inside_loop_p (data->current_loop, bb))
1229 {
1230 if (POINTER_TYPE_P (type))
1231 type = sizetype;
1232 set_iv (data, iv: var, base: var, step: build_int_cst (type, 0), no_overflow: true);
1233 }
1234 }
1235
1236 return name_info (data, name: var)->iv;
1237}
1238
1239/* Return the first non-invariant ssa var found in EXPR. */
1240
1241static tree
1242extract_single_var_from_expr (tree expr)
1243{
1244 int i, n;
1245 tree tmp;
1246 enum tree_code code;
1247
1248 if (!expr || is_gimple_min_invariant (expr))
1249 return NULL;
1250
1251 code = TREE_CODE (expr);
1252 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1253 {
1254 n = TREE_OPERAND_LENGTH (expr);
1255 for (i = 0; i < n; i++)
1256 {
1257 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1258
1259 if (tmp)
1260 return tmp;
1261 }
1262 }
1263 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1264}
1265
1266/* Finds basic ivs. */
1267
1268static bool
1269find_bivs (struct ivopts_data *data)
1270{
1271 gphi *phi;
1272 affine_iv iv;
1273 tree step, type, base, stop;
1274 bool found = false;
1275 class loop *loop = data->current_loop;
1276 gphi_iterator psi;
1277
1278 for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
1279 {
1280 phi = psi.phi ();
1281
1282 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1283 continue;
1284
1285 if (virtual_operand_p (PHI_RESULT (phi)))
1286 continue;
1287
1288 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1289 continue;
1290
1291 if (integer_zerop (iv.step))
1292 continue;
1293
1294 step = iv.step;
1295 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1296 /* Stop expanding iv base at the first ssa var referred by iv step.
1297 Ideally we should stop at any ssa var, because that's expensive
1298 and unusual to happen, we just do it on the first one.
1299
1300 See PR64705 for the rationale. */
1301 stop = extract_single_var_from_expr (expr: step);
1302 base = expand_simple_operations (base, stop);
1303 if (contains_abnormal_ssa_name_p (expr: base)
1304 || contains_abnormal_ssa_name_p (expr: step))
1305 continue;
1306
1307 type = TREE_TYPE (PHI_RESULT (phi));
1308 base = fold_convert (type, base);
1309 if (step)
1310 {
1311 if (POINTER_TYPE_P (type))
1312 step = convert_to_ptrofftype (step);
1313 else
1314 step = fold_convert (type, step);
1315 }
1316
1317 set_iv (data, PHI_RESULT (phi), base, step, no_overflow: iv.no_overflow);
1318 found = true;
1319 }
1320
1321 return found;
1322}
1323
1324/* Marks basic ivs. */
1325
1326static void
1327mark_bivs (struct ivopts_data *data)
1328{
1329 gphi *phi;
1330 gimple *def;
1331 tree var;
1332 struct iv *iv, *incr_iv;
1333 class loop *loop = data->current_loop;
1334 basic_block incr_bb;
1335 gphi_iterator psi;
1336
1337 data->bivs_not_used_in_addr = 0;
1338 for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
1339 {
1340 phi = psi.phi ();
1341
1342 iv = get_iv (data, PHI_RESULT (phi));
1343 if (!iv)
1344 continue;
1345
1346 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1347 def = SSA_NAME_DEF_STMT (var);
1348 /* Don't mark iv peeled from other one as biv. */
1349 if (def
1350 && gimple_code (g: def) == GIMPLE_PHI
1351 && gimple_bb (g: def) == loop->header)
1352 continue;
1353
1354 incr_iv = get_iv (data, var);
1355 if (!incr_iv)
1356 continue;
1357
1358 /* If the increment is in the subloop, ignore it. */
1359 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1360 if (incr_bb->loop_father != data->current_loop
1361 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1362 continue;
1363
1364 iv->biv_p = true;
1365 incr_iv->biv_p = true;
1366 if (iv->no_overflow)
1367 data->bivs_not_used_in_addr++;
1368 if (incr_iv->no_overflow)
1369 data->bivs_not_used_in_addr++;
1370 }
1371}
1372
1373/* Checks whether STMT defines a linear induction variable and stores its
1374 parameters to IV. */
1375
1376static bool
1377find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1378{
1379 tree lhs, stop;
1380 class loop *loop = data->current_loop;
1381
1382 iv->base = NULL_TREE;
1383 iv->step = NULL_TREE;
1384
1385 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1386 return false;
1387
1388 lhs = gimple_assign_lhs (gs: stmt);
1389 if (TREE_CODE (lhs) != SSA_NAME)
1390 return false;
1391
1392 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1393 return false;
1394
1395 /* Stop expanding iv base at the first ssa var referred by iv step.
1396 Ideally we should stop at any ssa var, because that's expensive
1397 and unusual to happen, we just do it on the first one.
1398
1399 See PR64705 for the rationale. */
1400 stop = extract_single_var_from_expr (expr: iv->step);
1401 iv->base = expand_simple_operations (iv->base, stop);
1402 if (contains_abnormal_ssa_name_p (expr: iv->base)
1403 || contains_abnormal_ssa_name_p (expr: iv->step))
1404 return false;
1405
1406 /* If STMT could throw, then do not consider STMT as defining a GIV.
1407 While this will suppress optimizations, we cannot safely delete this
1408 GIV and associated statements, even if it appears it is not used. */
1409 if (stmt_could_throw_p (cfun, stmt))
1410 return false;
1411
1412 return true;
1413}
1414
1415/* Finds general ivs in statement STMT. */
1416
1417static void
1418find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1419{
1420 affine_iv iv;
1421
1422 if (!find_givs_in_stmt_scev (data, stmt, iv: &iv))
1423 return;
1424
1425 set_iv (data, iv: gimple_assign_lhs (gs: stmt), base: iv.base, step: iv.step, no_overflow: iv.no_overflow);
1426}
1427
1428/* Finds general ivs in basic block BB. */
1429
1430static void
1431find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1432{
1433 gimple_stmt_iterator bsi;
1434
1435 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1436 if (!is_gimple_debug (gs: gsi_stmt (i: bsi)))
1437 find_givs_in_stmt (data, stmt: gsi_stmt (i: bsi));
1438}
1439
1440/* Finds general ivs. */
1441
1442static void
1443find_givs (struct ivopts_data *data, basic_block *body)
1444{
1445 class loop *loop = data->current_loop;
1446 unsigned i;
1447
1448 for (i = 0; i < loop->num_nodes; i++)
1449 find_givs_in_bb (data, bb: body[i]);
1450}
1451
1452/* For each ssa name defined in LOOP determines whether it is an induction
1453 variable and if so, its initial value and step. */
1454
1455static bool
1456find_induction_variables (struct ivopts_data *data, basic_block *body)
1457{
1458 unsigned i;
1459 bitmap_iterator bi;
1460
1461 if (!find_bivs (data))
1462 return false;
1463
1464 find_givs (data, body);
1465 mark_bivs (data);
1466
1467 if (dump_file && (dump_flags & TDF_DETAILS))
1468 {
1469 class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1470
1471 if (niter)
1472 {
1473 fprintf (stream: dump_file, format: " number of iterations ");
1474 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1475 if (!integer_zerop (niter->may_be_zero))
1476 {
1477 fprintf (stream: dump_file, format: "; zero if ");
1478 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1479 }
1480 fprintf (stream: dump_file, format: "\n");
1481 };
1482
1483 fprintf (stream: dump_file, format: "\n<Induction Vars>:\n");
1484 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1485 {
1486 struct version_info *info = ver_info (data, ver: i);
1487 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1488 dump_iv (file: dump_file, iv: ver_info (data, ver: i)->iv, dump_name: true, indent_level: 0);
1489 }
1490 }
1491
1492 return true;
1493}
1494
1495/* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1496 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1497 is the const offset stripped from IV base and MEM_TYPE is the type
1498 of the memory being addressed. For uses of other types, ADDR_BASE
1499 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1500
1501static struct iv_use *
1502record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1503 gimple *stmt, enum use_type type, tree mem_type,
1504 tree addr_base, poly_uint64 addr_offset)
1505{
1506 struct iv_use *use = XCNEW (struct iv_use);
1507
1508 use->id = group->vuses.length ();
1509 use->group_id = group->id;
1510 use->type = type;
1511 use->mem_type = mem_type;
1512 use->iv = iv;
1513 use->stmt = stmt;
1514 use->op_p = use_p;
1515 use->addr_base = addr_base;
1516 use->addr_offset = addr_offset;
1517
1518 group->vuses.safe_push (obj: use);
1519 return use;
1520}
1521
1522/* Checks whether OP is a loop-level invariant and if so, records it.
1523 NONLINEAR_USE is true if the invariant is used in a way we do not
1524 handle specially. */
1525
1526static void
1527record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1528{
1529 basic_block bb;
1530 struct version_info *info;
1531
1532 if (TREE_CODE (op) != SSA_NAME
1533 || virtual_operand_p (op))
1534 return;
1535
1536 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1537 if (bb
1538 && flow_bb_inside_loop_p (data->current_loop, bb))
1539 return;
1540
1541 info = name_info (data, name: op);
1542 info->name = op;
1543 info->has_nonlin_use |= nonlinear_use;
1544 if (!info->inv_id)
1545 info->inv_id = ++data->max_inv_var_id;
1546 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1547}
1548
1549/* Record a group of TYPE. */
1550
1551static struct iv_group *
1552record_group (struct ivopts_data *data, enum use_type type)
1553{
1554 struct iv_group *group = XCNEW (struct iv_group);
1555
1556 group->id = data->vgroups.length ();
1557 group->type = type;
1558 group->related_cands = BITMAP_ALLOC (NULL);
1559 group->vuses.create (nelems: 1);
1560 group->doloop_p = false;
1561
1562 data->vgroups.safe_push (obj: group);
1563 return group;
1564}
1565
1566/* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1567 New group will be created if there is no existing group for the use.
1568 MEM_TYPE is the type of memory being addressed, or NULL if this
1569 isn't an address reference. */
1570
1571static struct iv_use *
1572record_group_use (struct ivopts_data *data, tree *use_p,
1573 struct iv *iv, gimple *stmt, enum use_type type,
1574 tree mem_type)
1575{
1576 tree addr_base = NULL;
1577 struct iv_group *group = NULL;
1578 poly_uint64 addr_offset = 0;
1579
1580 /* Record non address type use in a new group. */
1581 if (address_p (type))
1582 {
1583 unsigned int i;
1584
1585 gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1586 tree addr_toffset;
1587 split_constant_offset (iv->base, &addr_base, &addr_toffset);
1588 addr_offset = int_cst_value (addr_toffset);
1589 for (i = 0; i < data->vgroups.length (); i++)
1590 {
1591 struct iv_use *use;
1592
1593 group = data->vgroups[i];
1594 use = group->vuses[0];
1595 if (!address_p (type: use->type))
1596 continue;
1597
1598 /* Check if it has the same stripped base and step. */
1599 if (operand_equal_p (iv->base_object, use->iv->base_object, flags: 0)
1600 && operand_equal_p (iv->step, use->iv->step, flags: OEP_ASSUME_WRAPV)
1601 && operand_equal_p (addr_base, use->addr_base, flags: OEP_ASSUME_WRAPV))
1602 break;
1603 }
1604 if (i == data->vgroups.length ())
1605 group = NULL;
1606 }
1607
1608 if (!group)
1609 group = record_group (data, type);
1610
1611 return record_use (group, use_p, iv, stmt, type, mem_type,
1612 addr_base, addr_offset);
1613}
1614
1615/* Checks whether the use OP is interesting and if so, records it. */
1616
1617static struct iv_use *
1618find_interesting_uses_op (struct ivopts_data *data, tree op)
1619{
1620 struct iv *iv;
1621 gimple *stmt;
1622 struct iv_use *use;
1623
1624 if (TREE_CODE (op) != SSA_NAME)
1625 return NULL;
1626
1627 iv = get_iv (data, var: op);
1628 if (!iv)
1629 return NULL;
1630
1631 if (iv->nonlin_use)
1632 {
1633 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1634 return iv->nonlin_use;
1635 }
1636
1637 if (integer_zerop (iv->step))
1638 {
1639 record_invariant (data, op, nonlinear_use: true);
1640 return NULL;
1641 }
1642
1643 stmt = SSA_NAME_DEF_STMT (op);
1644 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1645
1646 use = record_group_use (data, NULL, iv, stmt, type: USE_NONLINEAR_EXPR, NULL_TREE);
1647 iv->nonlin_use = use;
1648 return use;
1649}
1650
1651/* Indicate how compare type iv_use can be handled. */
1652enum comp_iv_rewrite
1653{
1654 COMP_IV_NA,
1655 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1656 COMP_IV_EXPR,
1657 /* We may rewrite compare type iv_uses on both sides of comparison by
1658 expressing value of each iv_use. */
1659 COMP_IV_EXPR_2,
1660 /* We may rewrite compare type iv_use by expressing value of the iv_use
1661 or by eliminating it with other iv_cand. */
1662 COMP_IV_ELIM
1663};
1664
1665/* Given a condition in statement STMT, checks whether it is a compare
1666 of an induction variable and an invariant. If this is the case,
1667 CONTROL_VAR is set to location of the iv, BOUND to the location of
1668 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1669 induction variable descriptions, and true is returned. If this is not
1670 the case, CONTROL_VAR and BOUND are set to the arguments of the
1671 condition and false is returned. */
1672
1673static enum comp_iv_rewrite
1674extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1675 tree **control_var, tree **bound,
1676 struct iv **iv_var, struct iv **iv_bound)
1677{
1678 /* The objects returned when COND has constant operands. */
1679 static struct iv const_iv;
1680 static tree zero;
1681 tree *op0 = &zero, *op1 = &zero;
1682 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1683 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1684
1685 if (gimple_code (g: stmt) == GIMPLE_COND)
1686 {
1687 gcond *cond_stmt = as_a <gcond *> (p: stmt);
1688 op0 = gimple_cond_lhs_ptr (gs: cond_stmt);
1689 op1 = gimple_cond_rhs_ptr (gs: cond_stmt);
1690 }
1691 else
1692 {
1693 op0 = gimple_assign_rhs1_ptr (gs: stmt);
1694 op1 = gimple_assign_rhs2_ptr (gs: stmt);
1695 }
1696
1697 zero = integer_zero_node;
1698 const_iv.step = integer_zero_node;
1699
1700 if (TREE_CODE (*op0) == SSA_NAME)
1701 iv0 = get_iv (data, var: *op0);
1702 if (TREE_CODE (*op1) == SSA_NAME)
1703 iv1 = get_iv (data, var: *op1);
1704
1705 /* If both sides of comparison are IVs. We can express ivs on both end. */
1706 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1707 {
1708 rewrite_type = COMP_IV_EXPR_2;
1709 goto end;
1710 }
1711
1712 /* If none side of comparison is IV. */
1713 if ((!iv0 || integer_zerop (iv0->step))
1714 && (!iv1 || integer_zerop (iv1->step)))
1715 goto end;
1716
1717 /* Control variable may be on the other side. */
1718 if (!iv0 || integer_zerop (iv0->step))
1719 {
1720 std::swap (a&: op0, b&: op1);
1721 std::swap (a&: iv0, b&: iv1);
1722 }
1723 /* If one side is IV and the other side isn't loop invariant. */
1724 if (!iv1)
1725 rewrite_type = COMP_IV_EXPR;
1726 /* If one side is IV and the other side is loop invariant. */
1727 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1728 rewrite_type = COMP_IV_ELIM;
1729
1730end:
1731 if (control_var)
1732 *control_var = op0;
1733 if (iv_var)
1734 *iv_var = iv0;
1735 if (bound)
1736 *bound = op1;
1737 if (iv_bound)
1738 *iv_bound = iv1;
1739
1740 return rewrite_type;
1741}
1742
1743/* Checks whether the condition in STMT is interesting and if so,
1744 records it. */
1745
1746static void
1747find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1748{
1749 tree *var_p, *bound_p;
1750 struct iv *var_iv, *bound_iv;
1751 enum comp_iv_rewrite ret;
1752
1753 ret = extract_cond_operands (data, stmt,
1754 control_var: &var_p, bound: &bound_p, iv_var: &var_iv, iv_bound: &bound_iv);
1755 if (ret == COMP_IV_NA)
1756 {
1757 find_interesting_uses_op (data, op: *var_p);
1758 find_interesting_uses_op (data, op: *bound_p);
1759 return;
1760 }
1761
1762 record_group_use (data, use_p: var_p, iv: var_iv, stmt, type: USE_COMPARE, NULL_TREE);
1763 /* Record compare type iv_use for iv on the other side of comparison. */
1764 if (ret == COMP_IV_EXPR_2)
1765 record_group_use (data, use_p: bound_p, iv: bound_iv, stmt, type: USE_COMPARE, NULL_TREE);
1766}
1767
1768/* Returns the outermost loop EXPR is obviously invariant in
1769 relative to the loop LOOP, i.e. if all its operands are defined
1770 outside of the returned loop. Returns NULL if EXPR is not
1771 even obviously invariant in LOOP. */
1772
1773class loop *
1774outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1775{
1776 basic_block def_bb;
1777 unsigned i, len;
1778
1779 if (is_gimple_min_invariant (expr))
1780 return current_loops->tree_root;
1781
1782 if (TREE_CODE (expr) == SSA_NAME)
1783 {
1784 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1785 if (def_bb)
1786 {
1787 if (flow_bb_inside_loop_p (loop, def_bb))
1788 return NULL;
1789 return superloop_at_depth (loop,
1790 loop_depth (loop: def_bb->loop_father) + 1);
1791 }
1792
1793 return current_loops->tree_root;
1794 }
1795
1796 if (!EXPR_P (expr))
1797 return NULL;
1798
1799 unsigned maxdepth = 0;
1800 len = TREE_OPERAND_LENGTH (expr);
1801 for (i = 0; i < len; i++)
1802 {
1803 class loop *ivloop;
1804 if (!TREE_OPERAND (expr, i))
1805 continue;
1806
1807 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1808 if (!ivloop)
1809 return NULL;
1810 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1811 }
1812
1813 return superloop_at_depth (loop, maxdepth);
1814}
1815
1816/* Returns true if expression EXPR is obviously invariant in LOOP,
1817 i.e. if all its operands are defined outside of the LOOP. LOOP
1818 should not be the function body. */
1819
1820bool
1821expr_invariant_in_loop_p (class loop *loop, tree expr)
1822{
1823 basic_block def_bb;
1824 unsigned i, len;
1825
1826 gcc_assert (loop_depth (loop) > 0);
1827
1828 if (is_gimple_min_invariant (expr))
1829 return true;
1830
1831 if (TREE_CODE (expr) == SSA_NAME)
1832 {
1833 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1834 if (def_bb
1835 && flow_bb_inside_loop_p (loop, def_bb))
1836 return false;
1837
1838 return true;
1839 }
1840
1841 if (!EXPR_P (expr))
1842 return false;
1843
1844 len = TREE_OPERAND_LENGTH (expr);
1845 for (i = 0; i < len; i++)
1846 if (TREE_OPERAND (expr, i)
1847 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1848 return false;
1849
1850 return true;
1851}
1852
1853/* Given expression EXPR which computes inductive values with respect
1854 to loop recorded in DATA, this function returns biv from which EXPR
1855 is derived by tracing definition chains of ssa variables in EXPR. */
1856
1857static struct iv*
1858find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1859{
1860 struct iv *iv;
1861 unsigned i, n;
1862 tree e2, e1;
1863 enum tree_code code;
1864 gimple *stmt;
1865
1866 if (expr == NULL_TREE)
1867 return NULL;
1868
1869 if (is_gimple_min_invariant (expr))
1870 return NULL;
1871
1872 code = TREE_CODE (expr);
1873 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1874 {
1875 n = TREE_OPERAND_LENGTH (expr);
1876 for (i = 0; i < n; i++)
1877 {
1878 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1879 if (iv)
1880 return iv;
1881 }
1882 }
1883
1884 /* Stop if it's not ssa name. */
1885 if (code != SSA_NAME)
1886 return NULL;
1887
1888 iv = get_iv (data, var: expr);
1889 if (!iv || integer_zerop (iv->step))
1890 return NULL;
1891 else if (iv->biv_p)
1892 return iv;
1893
1894 stmt = SSA_NAME_DEF_STMT (expr);
1895 if (gphi *phi = dyn_cast <gphi *> (p: stmt))
1896 {
1897 ssa_op_iter iter;
1898 use_operand_p use_p;
1899 basic_block phi_bb = gimple_bb (g: phi);
1900
1901 /* Skip loop header PHI that doesn't define biv. */
1902 if (phi_bb->loop_father == data->current_loop)
1903 return NULL;
1904
1905 if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
1906 return NULL;
1907
1908 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1909 {
1910 tree use = USE_FROM_PTR (use_p);
1911 iv = find_deriving_biv_for_expr (data, expr: use);
1912 if (iv)
1913 return iv;
1914 }
1915 return NULL;
1916 }
1917 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1918 return NULL;
1919
1920 e1 = gimple_assign_rhs1 (gs: stmt);
1921 code = gimple_assign_rhs_code (gs: stmt);
1922 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1923 return find_deriving_biv_for_expr (data, expr: e1);
1924
1925 switch (code)
1926 {
1927 case MULT_EXPR:
1928 case PLUS_EXPR:
1929 case MINUS_EXPR:
1930 case POINTER_PLUS_EXPR:
1931 /* Increments, decrements and multiplications by a constant
1932 are simple. */
1933 e2 = gimple_assign_rhs2 (gs: stmt);
1934 iv = find_deriving_biv_for_expr (data, expr: e2);
1935 if (iv)
1936 return iv;
1937 gcc_fallthrough ();
1938
1939 CASE_CONVERT:
1940 /* Casts are simple. */
1941 return find_deriving_biv_for_expr (data, expr: e1);
1942
1943 default:
1944 break;
1945 }
1946
1947 return NULL;
1948}
1949
1950/* Record BIV, its predecessor and successor that they are used in
1951 address type uses. */
1952
1953static void
1954record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1955{
1956 unsigned i;
1957 tree type, base_1, base_2;
1958 bitmap_iterator bi;
1959
1960 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1961 || biv->have_address_use || !biv->no_overflow)
1962 return;
1963
1964 type = TREE_TYPE (biv->base);
1965 if (!INTEGRAL_TYPE_P (type))
1966 return;
1967
1968 biv->have_address_use = true;
1969 data->bivs_not_used_in_addr--;
1970 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1971 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1972 {
1973 struct iv *iv = ver_info (data, ver: i)->iv;
1974
1975 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1976 || iv->have_address_use || !iv->no_overflow)
1977 continue;
1978
1979 if (type != TREE_TYPE (iv->base)
1980 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1981 continue;
1982
1983 if (!operand_equal_p (biv->step, iv->step, flags: 0))
1984 continue;
1985
1986 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1987 if (operand_equal_p (base_1, iv->base, flags: 0)
1988 || operand_equal_p (base_2, biv->base, flags: 0))
1989 {
1990 iv->have_address_use = true;
1991 data->bivs_not_used_in_addr--;
1992 }
1993 }
1994}
1995
1996/* Cumulates the steps of indices into DATA and replaces their values with the
1997 initial ones. Returns false when the value of the index cannot be determined.
1998 Callback for for_each_index. */
1999
2000struct ifs_ivopts_data
2001{
2002 struct ivopts_data *ivopts_data;
2003 gimple *stmt;
2004 tree step;
2005};
2006
2007static bool
2008idx_find_step (tree base, tree *idx, void *data)
2009{
2010 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2011 struct iv *iv;
2012 bool use_overflow_semantics = false;
2013 tree step, iv_base, iv_step, lbound, off;
2014 class loop *loop = dta->ivopts_data->current_loop;
2015
2016 /* If base is a component ref, require that the offset of the reference
2017 be invariant. */
2018 if (TREE_CODE (base) == COMPONENT_REF)
2019 {
2020 off = component_ref_field_offset (base);
2021 return expr_invariant_in_loop_p (loop, expr: off);
2022 }
2023
2024 /* If base is array, first check whether we will be able to move the
2025 reference out of the loop (in order to take its address in strength
2026 reduction). In order for this to work we need both lower bound
2027 and step to be loop invariants. */
2028 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2029 {
2030 /* Moreover, for a range, the size needs to be invariant as well. */
2031 if (TREE_CODE (base) == ARRAY_RANGE_REF
2032 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2033 return false;
2034
2035 step = array_ref_element_size (base);
2036 lbound = array_ref_low_bound (base);
2037
2038 if (!expr_invariant_in_loop_p (loop, expr: step)
2039 || !expr_invariant_in_loop_p (loop, expr: lbound))
2040 return false;
2041 }
2042
2043 if (TREE_CODE (*idx) != SSA_NAME)
2044 return true;
2045
2046 iv = get_iv (data: dta->ivopts_data, var: *idx);
2047 if (!iv)
2048 return false;
2049
2050 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2051 *&x[0], which is not folded and does not trigger the
2052 ARRAY_REF path below. */
2053 *idx = iv->base;
2054
2055 if (integer_zerop (iv->step))
2056 return true;
2057
2058 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2059 {
2060 step = array_ref_element_size (base);
2061
2062 /* We only handle addresses whose step is an integer constant. */
2063 if (TREE_CODE (step) != INTEGER_CST)
2064 return false;
2065 }
2066 else
2067 /* The step for pointer arithmetics already is 1 byte. */
2068 step = size_one_node;
2069
2070 iv_base = iv->base;
2071 iv_step = iv->step;
2072 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2073 use_overflow_semantics = true;
2074
2075 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2076 sizetype, &iv_base, &iv_step, dta->stmt,
2077 use_overflow_semantics))
2078 {
2079 /* The index might wrap. */
2080 return false;
2081 }
2082
2083 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2084 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2085
2086 if (dta->ivopts_data->bivs_not_used_in_addr)
2087 {
2088 if (!iv->biv_p)
2089 iv = find_deriving_biv_for_expr (data: dta->ivopts_data, expr: iv->ssa_name);
2090
2091 record_biv_for_address_use (data: dta->ivopts_data, biv: iv);
2092 }
2093 return true;
2094}
2095
2096/* Records use in index IDX. Callback for for_each_index. Ivopts data
2097 object is passed to it in DATA. */
2098
2099static bool
2100idx_record_use (tree base, tree *idx,
2101 void *vdata)
2102{
2103 struct ivopts_data *data = (struct ivopts_data *) vdata;
2104 find_interesting_uses_op (data, op: *idx);
2105 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2106 {
2107 if (TREE_OPERAND (base, 2))
2108 find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2109 if (TREE_OPERAND (base, 3))
2110 find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2111 }
2112 return true;
2113}
2114
2115/* If we can prove that TOP = cst * BOT for some constant cst,
2116 store cst to MUL and return true. Otherwise return false.
2117 The returned value is always sign-extended, regardless of the
2118 signedness of TOP and BOT. */
2119
2120static bool
2121constant_multiple_of (tree top, tree bot, widest_int *mul,
2122 struct ivopts_data *data)
2123{
2124 aff_tree aff_top, aff_bot;
2125 tree_to_aff_combination_expand (top, TREE_TYPE (top), &aff_top,
2126 &data->name_expansion_cache);
2127 tree_to_aff_combination_expand (bot, TREE_TYPE (bot), &aff_bot,
2128 &data->name_expansion_cache);
2129
2130 poly_widest_int poly_mul;
2131 if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
2132 && poly_mul.is_constant (const_value: mul))
2133 return true;
2134
2135 return false;
2136}
2137
2138/* Return true if memory reference REF with step STEP may be unaligned. */
2139
2140static bool
2141may_be_unaligned_p (tree ref, tree step)
2142{
2143 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2144 thus they are not misaligned. */
2145 if (TREE_CODE (ref) == TARGET_MEM_REF)
2146 return false;
2147
2148 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2149 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2150 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2151
2152 unsigned HOST_WIDE_INT bitpos;
2153 unsigned int ref_align;
2154 get_object_alignment_1 (ref, &ref_align, &bitpos);
2155 if (ref_align < align
2156 || (bitpos % align) != 0
2157 || (bitpos % BITS_PER_UNIT) != 0)
2158 return true;
2159
2160 unsigned int trailing_zeros = tree_ctz (step);
2161 if (trailing_zeros < HOST_BITS_PER_INT
2162 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2163 return true;
2164
2165 return false;
2166}
2167
2168/* Return true if EXPR may be non-addressable. */
2169
2170bool
2171may_be_nonaddressable_p (tree expr)
2172{
2173 switch (TREE_CODE (expr))
2174 {
2175 case VAR_DECL:
2176 /* Check if it's a register variable. */
2177 return DECL_HARD_REGISTER (expr);
2178
2179 case TARGET_MEM_REF:
2180 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2181 target, thus they are always addressable. */
2182 return false;
2183
2184 case MEM_REF:
2185 /* Likewise for MEM_REFs, modulo the storage order. */
2186 return REF_REVERSE_STORAGE_ORDER (expr);
2187
2188 case BIT_FIELD_REF:
2189 if (REF_REVERSE_STORAGE_ORDER (expr))
2190 return true;
2191 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2192
2193 case COMPONENT_REF:
2194 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2195 return true;
2196 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2197 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2198
2199 case ARRAY_REF:
2200 case ARRAY_RANGE_REF:
2201 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2202 return true;
2203 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2204
2205 case VIEW_CONVERT_EXPR:
2206 /* This kind of view-conversions may wrap non-addressable objects
2207 and make them look addressable. After some processing the
2208 non-addressability may be uncovered again, causing ADDR_EXPRs
2209 of inappropriate objects to be built. */
2210 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2211 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2212 return true;
2213 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2214
2215 CASE_CONVERT:
2216 return true;
2217
2218 default:
2219 break;
2220 }
2221
2222 return false;
2223}
2224
2225/* Finds addresses in *OP_P inside STMT. */
2226
2227static void
2228find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2229 tree *op_p)
2230{
2231 tree base = *op_p, step = size_zero_node;
2232 struct iv *civ;
2233 struct ifs_ivopts_data ifs_ivopts_data;
2234
2235 /* Do not play with volatile memory references. A bit too conservative,
2236 perhaps, but safe. */
2237 if (gimple_has_volatile_ops (stmt))
2238 goto fail;
2239
2240 /* Ignore bitfields for now. Not really something terribly complicated
2241 to handle. TODO. */
2242 if (TREE_CODE (base) == BIT_FIELD_REF)
2243 goto fail;
2244
2245 base = unshare_expr (base);
2246
2247 if (TREE_CODE (base) == TARGET_MEM_REF)
2248 {
2249 tree type = build_pointer_type (TREE_TYPE (base));
2250 tree astep;
2251
2252 if (TMR_BASE (base)
2253 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2254 {
2255 civ = get_iv (data, TMR_BASE (base));
2256 if (!civ)
2257 goto fail;
2258
2259 TMR_BASE (base) = civ->base;
2260 step = civ->step;
2261 }
2262 if (TMR_INDEX2 (base)
2263 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2264 {
2265 civ = get_iv (data, TMR_INDEX2 (base));
2266 if (!civ)
2267 goto fail;
2268
2269 TMR_INDEX2 (base) = civ->base;
2270 step = civ->step;
2271 }
2272 if (TMR_INDEX (base)
2273 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2274 {
2275 civ = get_iv (data, TMR_INDEX (base));
2276 if (!civ)
2277 goto fail;
2278
2279 TMR_INDEX (base) = civ->base;
2280 astep = civ->step;
2281
2282 if (astep)
2283 {
2284 if (TMR_STEP (base))
2285 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2286
2287 step = fold_build2 (PLUS_EXPR, type, step, astep);
2288 }
2289 }
2290
2291 if (integer_zerop (step))
2292 goto fail;
2293 base = tree_mem_ref_addr (type, base);
2294 }
2295 else
2296 {
2297 ifs_ivopts_data.ivopts_data = data;
2298 ifs_ivopts_data.stmt = stmt;
2299 ifs_ivopts_data.step = size_zero_node;
2300 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2301 || integer_zerop (ifs_ivopts_data.step))
2302 goto fail;
2303 step = ifs_ivopts_data.step;
2304
2305 /* Check that the base expression is addressable. This needs
2306 to be done after substituting bases of IVs into it. */
2307 if (may_be_nonaddressable_p (expr: base))
2308 goto fail;
2309
2310 /* Moreover, on strict alignment platforms, check that it is
2311 sufficiently aligned. */
2312 if (STRICT_ALIGNMENT && may_be_unaligned_p (ref: base, step))
2313 goto fail;
2314
2315 base = build_fold_addr_expr (base);
2316
2317 /* Substituting bases of IVs into the base expression might
2318 have caused folding opportunities. */
2319 if (TREE_CODE (base) == ADDR_EXPR)
2320 {
2321 tree *ref = &TREE_OPERAND (base, 0);
2322 while (handled_component_p (t: *ref))
2323 ref = &TREE_OPERAND (*ref, 0);
2324 if (TREE_CODE (*ref) == MEM_REF)
2325 {
2326 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2327 TREE_OPERAND (*ref, 0),
2328 TREE_OPERAND (*ref, 1));
2329 if (tem)
2330 *ref = tem;
2331 }
2332 }
2333 }
2334
2335 civ = alloc_iv (data, base, step);
2336 /* Fail if base object of this memory reference is unknown. */
2337 if (civ->base_object == NULL_TREE)
2338 goto fail;
2339
2340 record_group_use (data, use_p: op_p, iv: civ, stmt, type: USE_REF_ADDRESS, TREE_TYPE (*op_p));
2341 return;
2342
2343fail:
2344 for_each_index (op_p, idx_record_use, data);
2345}
2346
2347/* Finds and records invariants used in STMT. */
2348
2349static void
2350find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2351{
2352 ssa_op_iter iter;
2353 use_operand_p use_p;
2354 tree op;
2355
2356 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2357 {
2358 op = USE_FROM_PTR (use_p);
2359 record_invariant (data, op, nonlinear_use: false);
2360 }
2361}
2362
2363/* CALL calls an internal function. If operand *OP_P will become an
2364 address when the call is expanded, return the type of the memory
2365 being addressed, otherwise return null. */
2366
2367static tree
2368get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2369{
2370 switch (gimple_call_internal_fn (gs: call))
2371 {
2372 case IFN_MASK_LOAD:
2373 case IFN_MASK_LOAD_LANES:
2374 case IFN_MASK_LEN_LOAD_LANES:
2375 case IFN_LEN_LOAD:
2376 case IFN_MASK_LEN_LOAD:
2377 if (op_p == gimple_call_arg_ptr (gs: call, index: 0))
2378 return TREE_TYPE (gimple_call_lhs (call));
2379 return NULL_TREE;
2380
2381 case IFN_MASK_STORE:
2382 case IFN_MASK_STORE_LANES:
2383 case IFN_MASK_LEN_STORE_LANES:
2384 case IFN_LEN_STORE:
2385 case IFN_MASK_LEN_STORE:
2386 {
2387 if (op_p == gimple_call_arg_ptr (gs: call, index: 0))
2388 {
2389 internal_fn ifn = gimple_call_internal_fn (gs: call);
2390 int index = internal_fn_stored_value_index (ifn);
2391 return TREE_TYPE (gimple_call_arg (call, index));
2392 }
2393 return NULL_TREE;
2394 }
2395
2396 default:
2397 return NULL_TREE;
2398 }
2399}
2400
2401/* IV is a (non-address) iv that describes operand *OP_P of STMT.
2402 Return true if the operand will become an address when STMT
2403 is expanded and record the associated address use if so. */
2404
2405static bool
2406find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2407 struct iv *iv)
2408{
2409 /* Fail if base object of this memory reference is unknown. */
2410 if (iv->base_object == NULL_TREE)
2411 return false;
2412
2413 tree mem_type = NULL_TREE;
2414 if (gcall *call = dyn_cast <gcall *> (p: stmt))
2415 if (gimple_call_internal_p (gs: call))
2416 mem_type = get_mem_type_for_internal_fn (call, op_p);
2417 if (mem_type)
2418 {
2419 iv = alloc_iv (data, base: iv->base, step: iv->step);
2420 record_group_use (data, use_p: op_p, iv, stmt, type: USE_PTR_ADDRESS, mem_type);
2421 return true;
2422 }
2423 return false;
2424}
2425
2426/* Finds interesting uses of induction variables in the statement STMT. */
2427
2428static void
2429find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2430{
2431 struct iv *iv;
2432 tree op, *lhs, *rhs;
2433 ssa_op_iter iter;
2434 use_operand_p use_p;
2435 enum tree_code code;
2436
2437 find_invariants_stmt (data, stmt);
2438
2439 if (gimple_code (g: stmt) == GIMPLE_COND)
2440 {
2441 find_interesting_uses_cond (data, stmt);
2442 return;
2443 }
2444
2445 if (is_gimple_assign (gs: stmt))
2446 {
2447 lhs = gimple_assign_lhs_ptr (gs: stmt);
2448 rhs = gimple_assign_rhs1_ptr (gs: stmt);
2449
2450 if (TREE_CODE (*lhs) == SSA_NAME)
2451 {
2452 /* If the statement defines an induction variable, the uses are not
2453 interesting by themselves. */
2454
2455 iv = get_iv (data, var: *lhs);
2456
2457 if (iv && !integer_zerop (iv->step))
2458 return;
2459 }
2460
2461 code = gimple_assign_rhs_code (gs: stmt);
2462 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2463 && (REFERENCE_CLASS_P (*rhs)
2464 || is_gimple_val (*rhs)))
2465 {
2466 if (REFERENCE_CLASS_P (*rhs))
2467 find_interesting_uses_address (data, stmt, op_p: rhs);
2468 else
2469 find_interesting_uses_op (data, op: *rhs);
2470
2471 if (REFERENCE_CLASS_P (*lhs))
2472 find_interesting_uses_address (data, stmt, op_p: lhs);
2473 return;
2474 }
2475 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2476 {
2477 find_interesting_uses_cond (data, stmt);
2478 return;
2479 }
2480
2481 /* TODO -- we should also handle address uses of type
2482
2483 memory = call (whatever);
2484
2485 and
2486
2487 call (memory). */
2488 }
2489
2490 if (gimple_code (g: stmt) == GIMPLE_PHI
2491 && gimple_bb (g: stmt) == data->current_loop->header)
2492 {
2493 iv = get_iv (data, PHI_RESULT (stmt));
2494
2495 if (iv && !integer_zerop (iv->step))
2496 return;
2497 }
2498
2499 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2500 {
2501 op = USE_FROM_PTR (use_p);
2502
2503 if (TREE_CODE (op) != SSA_NAME)
2504 continue;
2505
2506 iv = get_iv (data, var: op);
2507 if (!iv)
2508 continue;
2509
2510 if (!find_address_like_use (data, stmt, op_p: use_p->use, iv))
2511 find_interesting_uses_op (data, op);
2512 }
2513}
2514
2515/* Finds interesting uses of induction variables outside of loops
2516 on loop exit edge EXIT. */
2517
2518static void
2519find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2520{
2521 gphi *phi;
2522 gphi_iterator psi;
2523 tree def;
2524
2525 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (i: psi); gsi_next (i: &psi))
2526 {
2527 phi = psi.phi ();
2528 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2529 if (!virtual_operand_p (op: def))
2530 find_interesting_uses_op (data, op: def);
2531 }
2532}
2533
2534/* Return TRUE if OFFSET is within the range of [base + offset] addressing
2535 mode for memory reference represented by USE. */
2536
2537static GTY (()) vec<rtx, va_gc> *addr_list;
2538
2539static bool
2540addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2541{
2542 rtx reg, addr;
2543 unsigned list_index;
2544 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2545 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2546
2547 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2548 if (list_index >= vec_safe_length (v: addr_list))
2549 vec_safe_grow_cleared (v&: addr_list, len: list_index + MAX_MACHINE_MODE, exact: true);
2550
2551 addr = (*addr_list)[list_index];
2552 if (!addr)
2553 {
2554 addr_mode = targetm.addr_space.address_mode (as);
2555 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2556 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2557 (*addr_list)[list_index] = addr;
2558 }
2559 else
2560 addr_mode = GET_MODE (addr);
2561
2562 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2563 return (memory_address_addr_space_p (mem_mode, addr, as));
2564}
2565
2566/* Comparison function to sort group in ascending order of addr_offset. */
2567
2568static int
2569group_compare_offset (const void *a, const void *b)
2570{
2571 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2572 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2573
2574 return compare_sizes_for_sort (a: (*u1)->addr_offset, b: (*u2)->addr_offset);
2575}
2576
2577/* Check if small groups should be split. Return true if no group
2578 contains more than two uses with distinct addr_offsets. Return
2579 false otherwise. We want to split such groups because:
2580
2581 1) Small groups don't have much benefit and may interfer with
2582 general candidate selection.
2583 2) Size for problem with only small groups is usually small and
2584 general algorithm can handle it well.
2585
2586 TODO -- Above claim may not hold when we want to merge memory
2587 accesses with conseuctive addresses. */
2588
2589static bool
2590split_small_address_groups_p (struct ivopts_data *data)
2591{
2592 unsigned int i, j, distinct = 1;
2593 struct iv_use *pre;
2594 struct iv_group *group;
2595
2596 for (i = 0; i < data->vgroups.length (); i++)
2597 {
2598 group = data->vgroups[i];
2599 if (group->vuses.length () == 1)
2600 continue;
2601
2602 gcc_assert (address_p (group->type));
2603 if (group->vuses.length () == 2)
2604 {
2605 if (compare_sizes_for_sort (a: group->vuses[0]->addr_offset,
2606 b: group->vuses[1]->addr_offset) > 0)
2607 std::swap (a&: group->vuses[0], b&: group->vuses[1]);
2608 }
2609 else
2610 group->vuses.qsort (group_compare_offset);
2611
2612 if (distinct > 2)
2613 continue;
2614
2615 distinct = 1;
2616 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2617 {
2618 if (maybe_ne (a: group->vuses[j]->addr_offset, b: pre->addr_offset))
2619 {
2620 pre = group->vuses[j];
2621 distinct++;
2622 }
2623
2624 if (distinct > 2)
2625 break;
2626 }
2627 }
2628
2629 return (distinct <= 2);
2630}
2631
2632/* For each group of address type uses, this function further groups
2633 these uses according to the maximum offset supported by target's
2634 [base + offset] addressing mode. */
2635
2636static void
2637split_address_groups (struct ivopts_data *data)
2638{
2639 unsigned int i, j;
2640 /* Always split group. */
2641 bool split_p = split_small_address_groups_p (data);
2642
2643 for (i = 0; i < data->vgroups.length (); i++)
2644 {
2645 struct iv_group *new_group = NULL;
2646 struct iv_group *group = data->vgroups[i];
2647 struct iv_use *use = group->vuses[0];
2648
2649 use->id = 0;
2650 use->group_id = group->id;
2651 if (group->vuses.length () == 1)
2652 continue;
2653
2654 gcc_assert (address_p (use->type));
2655
2656 for (j = 1; j < group->vuses.length ();)
2657 {
2658 struct iv_use *next = group->vuses[j];
2659 poly_int64 offset = next->addr_offset - use->addr_offset;
2660
2661 /* Split group if aksed to, or the offset against the first
2662 use can't fit in offset part of addressing mode. IV uses
2663 having the same offset are still kept in one group. */
2664 if (maybe_ne (a: offset, b: 0)
2665 && (split_p || !addr_offset_valid_p (use, offset)))
2666 {
2667 if (!new_group)
2668 new_group = record_group (data, type: group->type);
2669 group->vuses.ordered_remove (ix: j);
2670 new_group->vuses.safe_push (obj: next);
2671 continue;
2672 }
2673
2674 next->id = j;
2675 next->group_id = group->id;
2676 j++;
2677 }
2678 }
2679}
2680
2681/* Finds uses of the induction variables that are interesting. */
2682
2683static void
2684find_interesting_uses (struct ivopts_data *data, basic_block *body)
2685{
2686 basic_block bb;
2687 gimple_stmt_iterator bsi;
2688 unsigned i;
2689 edge e;
2690
2691 for (i = 0; i < data->current_loop->num_nodes; i++)
2692 {
2693 edge_iterator ei;
2694 bb = body[i];
2695
2696 FOR_EACH_EDGE (e, ei, bb->succs)
2697 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2698 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2699 find_interesting_uses_outside (data, exit: e);
2700
2701 for (bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
2702 find_interesting_uses_stmt (data, stmt: gsi_stmt (i: bsi));
2703 for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
2704 if (!is_gimple_debug (gs: gsi_stmt (i: bsi)))
2705 find_interesting_uses_stmt (data, stmt: gsi_stmt (i: bsi));
2706 }
2707
2708 split_address_groups (data);
2709
2710 if (dump_file && (dump_flags & TDF_DETAILS))
2711 {
2712 fprintf (stream: dump_file, format: "\n<IV Groups>:\n");
2713 dump_groups (file: dump_file, data);
2714 fprintf (stream: dump_file, format: "\n");
2715 }
2716}
2717
2718/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2719 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2720 we are at the top-level of the processed address. */
2721
2722static tree
2723strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2724 poly_int64 *offset)
2725{
2726 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2727 enum tree_code code;
2728 tree type, orig_type = TREE_TYPE (expr);
2729 poly_int64 off0, off1;
2730 HOST_WIDE_INT st;
2731 tree orig_expr = expr;
2732
2733 STRIP_NOPS (expr);
2734
2735 type = TREE_TYPE (expr);
2736 code = TREE_CODE (expr);
2737 *offset = 0;
2738
2739 switch (code)
2740 {
2741 case POINTER_PLUS_EXPR:
2742 case PLUS_EXPR:
2743 case MINUS_EXPR:
2744 op0 = TREE_OPERAND (expr, 0);
2745 op1 = TREE_OPERAND (expr, 1);
2746
2747 op0 = strip_offset_1 (expr: op0, inside_addr: false, top_compref: false, offset: &off0);
2748 op1 = strip_offset_1 (expr: op1, inside_addr: false, top_compref: false, offset: &off1);
2749
2750 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2751 if (op0 == TREE_OPERAND (expr, 0)
2752 && op1 == TREE_OPERAND (expr, 1))
2753 return orig_expr;
2754
2755 if (integer_zerop (op1))
2756 expr = op0;
2757 else if (integer_zerop (op0))
2758 {
2759 if (code == MINUS_EXPR)
2760 {
2761 if (TYPE_OVERFLOW_UNDEFINED (type))
2762 {
2763 type = unsigned_type_for (type);
2764 op1 = fold_convert (type, op1);
2765 }
2766 expr = fold_build1 (NEGATE_EXPR, type, op1);
2767 }
2768 else
2769 expr = op1;
2770 }
2771 else
2772 {
2773 if (TYPE_OVERFLOW_UNDEFINED (type))
2774 {
2775 type = unsigned_type_for (type);
2776 if (code == POINTER_PLUS_EXPR)
2777 code = PLUS_EXPR;
2778 op0 = fold_convert (type, op0);
2779 op1 = fold_convert (type, op1);
2780 }
2781 expr = fold_build2 (code, type, op0, op1);
2782 }
2783
2784 return fold_convert (orig_type, expr);
2785
2786 case MULT_EXPR:
2787 op1 = TREE_OPERAND (expr, 1);
2788 if (!cst_and_fits_in_hwi (op1))
2789 return orig_expr;
2790
2791 op0 = TREE_OPERAND (expr, 0);
2792 op0 = strip_offset_1 (expr: op0, inside_addr: false, top_compref: false, offset: &off0);
2793 if (op0 == TREE_OPERAND (expr, 0))
2794 return orig_expr;
2795
2796 *offset = off0 * int_cst_value (op1);
2797 if (integer_zerop (op0))
2798 expr = op0;
2799 else
2800 {
2801 if (TYPE_OVERFLOW_UNDEFINED (type))
2802 {
2803 type = unsigned_type_for (type);
2804 op0 = fold_convert (type, op0);
2805 op1 = fold_convert (type, op1);
2806 }
2807 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2808 }
2809
2810 return fold_convert (orig_type, expr);
2811
2812 case ARRAY_REF:
2813 case ARRAY_RANGE_REF:
2814 if (!inside_addr)
2815 return orig_expr;
2816
2817 step = array_ref_element_size (expr);
2818 if (!cst_and_fits_in_hwi (step))
2819 break;
2820
2821 st = int_cst_value (step);
2822 op1 = TREE_OPERAND (expr, 1);
2823 op1 = strip_offset_1 (expr: op1, inside_addr: false, top_compref: false, offset: &off1);
2824 *offset = off1 * st;
2825
2826 if (top_compref
2827 && integer_zerop (op1))
2828 {
2829 /* Strip the component reference completely. */
2830 op0 = TREE_OPERAND (expr, 0);
2831 op0 = strip_offset_1 (expr: op0, inside_addr, top_compref, offset: &off0);
2832 *offset += off0;
2833 return op0;
2834 }
2835 break;
2836
2837 case COMPONENT_REF:
2838 {
2839 tree field;
2840
2841 if (!inside_addr)
2842 return orig_expr;
2843
2844 tmp = component_ref_field_offset (expr);
2845 field = TREE_OPERAND (expr, 1);
2846 if (top_compref
2847 && cst_and_fits_in_hwi (tmp)
2848 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2849 {
2850 HOST_WIDE_INT boffset, abs_off;
2851
2852 /* Strip the component reference completely. */
2853 op0 = TREE_OPERAND (expr, 0);
2854 op0 = strip_offset_1 (expr: op0, inside_addr, top_compref, offset: &off0);
2855 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2856 abs_off = abs_hwi (x: boffset) / BITS_PER_UNIT;
2857 if (boffset < 0)
2858 abs_off = -abs_off;
2859
2860 *offset = off0 + int_cst_value (tmp) + abs_off;
2861 return op0;
2862 }
2863 }
2864 break;
2865
2866 case ADDR_EXPR:
2867 op0 = TREE_OPERAND (expr, 0);
2868 op0 = strip_offset_1 (expr: op0, inside_addr: true, top_compref: true, offset: &off0);
2869 *offset += off0;
2870
2871 if (op0 == TREE_OPERAND (expr, 0))
2872 return orig_expr;
2873
2874 expr = build_fold_addr_expr (op0);
2875 return fold_convert (orig_type, expr);
2876
2877 case MEM_REF:
2878 /* ??? Offset operand? */
2879 inside_addr = false;
2880 break;
2881
2882 default:
2883 if (ptrdiff_tree_p (expr, offset) && maybe_ne (a: *offset, b: 0))
2884 return build_int_cst (orig_type, 0);
2885 return orig_expr;
2886 }
2887
2888 /* Default handling of expressions for that we want to recurse into
2889 the first operand. */
2890 op0 = TREE_OPERAND (expr, 0);
2891 op0 = strip_offset_1 (expr: op0, inside_addr, top_compref: false, offset: &off0);
2892 *offset += off0;
2893
2894 if (op0 == TREE_OPERAND (expr, 0)
2895 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2896 return orig_expr;
2897
2898 expr = copy_node (expr);
2899 TREE_OPERAND (expr, 0) = op0;
2900 if (op1)
2901 TREE_OPERAND (expr, 1) = op1;
2902
2903 /* Inside address, we might strip the top level component references,
2904 thus changing type of the expression. Handling of ADDR_EXPR
2905 will fix that. */
2906 expr = fold_convert (orig_type, expr);
2907
2908 return expr;
2909}
2910
2911/* Strips constant offsets from EXPR and stores them to OFFSET. */
2912
2913static tree
2914strip_offset (tree expr, poly_uint64 *offset)
2915{
2916 poly_int64 off;
2917 tree core = strip_offset_1 (expr, inside_addr: false, top_compref: false, offset: &off);
2918 *offset = off;
2919 return core;
2920}
2921
2922/* Returns variant of TYPE that can be used as base for different uses.
2923 We return unsigned type with the same precision, which avoids problems
2924 with overflows. */
2925
2926static tree
2927generic_type_for (tree type)
2928{
2929 if (POINTER_TYPE_P (type))
2930 return unsigned_type_for (type);
2931
2932 if (TYPE_UNSIGNED (type))
2933 return type;
2934
2935 return unsigned_type_for (type);
2936}
2937
2938/* Private data for walk_tree. */
2939
2940struct walk_tree_data
2941{
2942 bitmap *inv_vars;
2943 struct ivopts_data *idata;
2944};
2945
2946/* Callback function for walk_tree, it records invariants and symbol
2947 reference in *EXPR_P. DATA is the structure storing result info. */
2948
2949static tree
2950find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2951{
2952 tree op = *expr_p;
2953 struct version_info *info;
2954 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2955
2956 if (TREE_CODE (op) != SSA_NAME)
2957 return NULL_TREE;
2958
2959 info = name_info (data: wdata->idata, name: op);
2960 /* Because we expand simple operations when finding IVs, loop invariant
2961 variable that isn't referred by the original loop could be used now.
2962 Record such invariant variables here. */
2963 if (!info->iv)
2964 {
2965 struct ivopts_data *idata = wdata->idata;
2966 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2967
2968 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2969 {
2970 tree steptype = TREE_TYPE (op);
2971 if (POINTER_TYPE_P (steptype))
2972 steptype = sizetype;
2973 set_iv (data: idata, iv: op, base: op, step: build_int_cst (steptype, 0), no_overflow: true);
2974 record_invariant (data: idata, op, nonlinear_use: false);
2975 }
2976 }
2977 if (!info->inv_id || info->has_nonlin_use)
2978 return NULL_TREE;
2979
2980 if (!*wdata->inv_vars)
2981 *wdata->inv_vars = BITMAP_ALLOC (NULL);
2982 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2983
2984 return NULL_TREE;
2985}
2986
2987/* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2988 store it. */
2989
2990static inline void
2991find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2992{
2993 struct walk_tree_data wdata;
2994
2995 if (!inv_vars)
2996 return;
2997
2998 wdata.idata = data;
2999 wdata.inv_vars = inv_vars;
3000 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3001}
3002
3003/* Get entry from invariant expr hash table for INV_EXPR. New entry
3004 will be recorded if it doesn't exist yet. Given below two exprs:
3005 inv_expr + cst1, inv_expr + cst2
3006 It's hard to make decision whether constant part should be stripped
3007 or not. We choose to not strip based on below facts:
3008 1) We need to count ADD cost for constant part if it's stripped,
3009 which isn't always trivial where this functions is called.
3010 2) Stripping constant away may be conflict with following loop
3011 invariant hoisting pass.
3012 3) Not stripping constant away results in more invariant exprs,
3013 which usually leads to decision preferring lower reg pressure. */
3014
3015static iv_inv_expr_ent *
3016get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3017{
3018 STRIP_NOPS (inv_expr);
3019
3020 if (poly_int_tree_p (t: inv_expr)
3021 || TREE_CODE (inv_expr) == SSA_NAME)
3022 return NULL;
3023
3024 /* Don't strip constant part away as we used to. */
3025
3026 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3027 struct iv_inv_expr_ent ent;
3028 ent.expr = inv_expr;
3029 ent.hash = iterative_hash_expr (tree: inv_expr, seed: 0);
3030 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (value: &ent, insert: INSERT);
3031
3032 if (!*slot)
3033 {
3034 *slot = XNEW (struct iv_inv_expr_ent);
3035 (*slot)->expr = inv_expr;
3036 (*slot)->hash = ent.hash;
3037 (*slot)->id = ++data->max_inv_expr_id;
3038 }
3039
3040 return *slot;
3041}
3042
3043
3044/* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3045 unsuitable as ivopts candidates for potentially involving undefined
3046 behavior. */
3047
3048static tree
3049find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3050{
3051 basic_block bb = (basic_block) bb_;
3052 if (TREE_CODE (*tp) == SSA_NAME
3053 && ssa_name_maybe_undef_p (var: *tp)
3054 && !ssa_name_any_use_dominates_bb_p (var: *tp, bb))
3055 return *tp;
3056 if (!EXPR_P (*tp))
3057 *walk_subtrees = 0;
3058 return NULL;
3059}
3060
3061/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3062 position to POS. If USE is not NULL, the candidate is set as related to
3063 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3064 replacement of the final value of the iv by a direct computation. */
3065
3066static struct iv_cand *
3067add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3068 enum iv_position pos, struct iv_use *use,
3069 gimple *incremented_at, struct iv *orig_iv = NULL,
3070 bool doloop = false)
3071{
3072 unsigned i;
3073 struct iv_cand *cand = NULL;
3074 tree type, orig_type;
3075
3076 gcc_assert (base && step);
3077
3078 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3079 live, but the ivopts code may replace a real pointer with one
3080 pointing before or after the memory block that is then adjusted
3081 into the memory block during the loop. FIXME: It would likely be
3082 better to actually force the pointer live and still use ivopts;
3083 for example, it would be enough to write the pointer into memory
3084 and keep it there until after the loop. */
3085 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3086 return NULL;
3087
3088 /* If BASE contains undefined SSA names make sure we only record
3089 the original IV. */
3090 bool involves_undefs = false;
3091 if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3092 {
3093 if (pos != IP_ORIGINAL)
3094 return NULL;
3095 important = false;
3096 involves_undefs = true;
3097 }
3098
3099 /* For non-original variables, make sure their values are computed in a type
3100 that does not invoke undefined behavior on overflows (since in general,
3101 we cannot prove that these induction variables are non-wrapping). */
3102 if (pos != IP_ORIGINAL)
3103 {
3104 orig_type = TREE_TYPE (base);
3105 type = generic_type_for (type: orig_type);
3106 if (type != orig_type)
3107 {
3108 base = fold_convert (type, base);
3109 step = fold_convert (type, step);
3110 }
3111 }
3112
3113 for (i = 0; i < data->vcands.length (); i++)
3114 {
3115 cand = data->vcands[i];
3116
3117 if (cand->pos != pos)
3118 continue;
3119
3120 if (cand->incremented_at != incremented_at
3121 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3122 && cand->ainc_use != use))
3123 continue;
3124
3125 if (operand_equal_p (base, cand->iv->base, flags: 0)
3126 && operand_equal_p (step, cand->iv->step, flags: 0)
3127 && (TYPE_PRECISION (TREE_TYPE (base))
3128 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3129 break;
3130 }
3131
3132 if (i == data->vcands.length ())
3133 {
3134 cand = XCNEW (struct iv_cand);
3135 cand->id = i;
3136 cand->iv = alloc_iv (data, base, step);
3137 cand->pos = pos;
3138 if (pos != IP_ORIGINAL)
3139 {
3140 if (doloop)
3141 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3142 else
3143 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3144 cand->var_after = cand->var_before;
3145 }
3146 cand->important = important;
3147 cand->involves_undefs = involves_undefs;
3148 cand->incremented_at = incremented_at;
3149 cand->doloop_p = doloop;
3150 data->vcands.safe_push (obj: cand);
3151
3152 if (!poly_int_tree_p (t: step))
3153 {
3154 find_inv_vars (data, expr_p: &step, inv_vars: &cand->inv_vars);
3155
3156 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, inv_expr: step);
3157 /* Share bitmap between inv_vars and inv_exprs for cand. */
3158 if (inv_expr != NULL)
3159 {
3160 cand->inv_exprs = cand->inv_vars;
3161 cand->inv_vars = NULL;
3162 if (cand->inv_exprs)
3163 bitmap_clear (cand->inv_exprs);
3164 else
3165 cand->inv_exprs = BITMAP_ALLOC (NULL);
3166
3167 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168 }
3169 }
3170
3171 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3172 cand->ainc_use = use;
3173 else
3174 cand->ainc_use = NULL;
3175
3176 cand->orig_iv = orig_iv;
3177 if (dump_file && (dump_flags & TDF_DETAILS))
3178 dump_cand (file: dump_file, cand);
3179 }
3180
3181 cand->important |= important;
3182 cand->doloop_p |= doloop;
3183
3184 /* Relate candidate to the group for which it is added. */
3185 if (use)
3186 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187
3188 return cand;
3189}
3190
3191/* Returns true if incrementing the induction variable at the end of the LOOP
3192 is allowed.
3193
3194 The purpose is to avoid splitting latch edge with a biv increment, thus
3195 creating a jump, possibly confusing other optimization passes and leaving
3196 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197 available (so we do not have a better alternative), or if the latch edge
3198 is already nonempty. */
3199
3200static bool
3201allow_ip_end_pos_p (class loop *loop)
3202{
3203 /* Do not allow IP_END when creating the IV would need to split the
3204 latch edge as that makes all IP_NORMAL invalid. */
3205 auto pos = gsi_last_bb (bb: ip_end_pos (loop));
3206 if (!gsi_end_p (i: pos) && stmt_ends_bb_p (*pos))
3207 return false;
3208
3209 if (!ip_normal_pos (loop))
3210 return true;
3211
3212 if (!empty_block_p (ip_end_pos (loop)))
3213 return true;
3214
3215 return false;
3216}
3217
3218/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3219 Important field is set to IMPORTANT. */
3220
3221static void
3222add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3223 bool important, struct iv_use *use)
3224{
3225 basic_block use_bb = gimple_bb (g: use->stmt);
3226 machine_mode mem_mode;
3227 unsigned HOST_WIDE_INT cstepi;
3228
3229 /* If we insert the increment in any position other than the standard
3230 ones, we must ensure that it is incremented once per iteration.
3231 It must not be in an inner nested loop, or one side of an if
3232 statement. */
3233 if (use_bb->loop_father != data->current_loop
3234 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3235 || stmt_can_throw_internal (cfun, use->stmt)
3236 || !cst_and_fits_in_hwi (step))
3237 return;
3238
3239 cstepi = int_cst_value (step);
3240
3241 mem_mode = TYPE_MODE (use->mem_type);
3242 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3243 || USE_STORE_PRE_INCREMENT (mem_mode))
3244 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3245 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3246 || USE_STORE_PRE_DECREMENT (mem_mode))
3247 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3248 {
3249 enum tree_code code = MINUS_EXPR;
3250 tree new_base;
3251 tree new_step = step;
3252
3253 if (POINTER_TYPE_P (TREE_TYPE (base)))
3254 {
3255 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3256 code = POINTER_PLUS_EXPR;
3257 }
3258 else
3259 new_step = fold_convert (TREE_TYPE (base), new_step);
3260 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3261 add_candidate_1 (data, base: new_base, step, important, pos: IP_BEFORE_USE, use,
3262 incremented_at: use->stmt);
3263 }
3264 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3265 || USE_STORE_POST_INCREMENT (mem_mode))
3266 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3267 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3268 || USE_STORE_POST_DECREMENT (mem_mode))
3269 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3270 {
3271 add_candidate_1 (data, base, step, important, pos: IP_AFTER_USE, use,
3272 incremented_at: use->stmt);
3273 }
3274}
3275
3276/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3277 position to POS. If USE is not NULL, the candidate is set as related to
3278 it. The candidate computation is scheduled before exit condition and at
3279 the end of loop. */
3280
3281static void
3282add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3283 struct iv_use *use, struct iv *orig_iv = NULL,
3284 bool doloop = false)
3285{
3286 if (ip_normal_pos (data->current_loop))
3287 add_candidate_1 (data, base, step, important, pos: IP_NORMAL, use, NULL, orig_iv,
3288 doloop);
3289 /* Exclude doloop candidate here since it requires decrement then comparison
3290 and jump, the IP_END position doesn't match. */
3291 if (!doloop && ip_end_pos (data->current_loop)
3292 && allow_ip_end_pos_p (loop: data->current_loop))
3293 add_candidate_1 (data, base, step, important, pos: IP_END, use, NULL, orig_iv);
3294}
3295
3296/* Adds standard iv candidates. */
3297
3298static void
3299add_standard_iv_candidates (struct ivopts_data *data)
3300{
3301 add_candidate (data, integer_zero_node, integer_one_node, important: true, NULL);
3302
3303 /* The same for a double-integer type if it is still fast enough. */
3304 if (TYPE_PRECISION
3305 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3306 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3307 add_candidate (data, base: build_int_cst (long_integer_type_node, 0),
3308 step: build_int_cst (long_integer_type_node, 1), important: true, NULL);
3309
3310 /* The same for a double-integer type if it is still fast enough. */
3311 if (TYPE_PRECISION
3312 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3313 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3314 add_candidate (data, base: build_int_cst (long_long_integer_type_node, 0),
3315 step: build_int_cst (long_long_integer_type_node, 1), important: true, NULL);
3316}
3317
3318
3319/* Adds candidates bases on the old induction variable IV. */
3320
3321static void
3322add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3323{
3324 gimple *phi;
3325 tree def;
3326 struct iv_cand *cand;
3327
3328 /* Check if this biv is used in address type use. */
3329 if (iv->no_overflow && iv->have_address_use
3330 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3331 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3332 {
3333 tree base = fold_convert (sizetype, iv->base);
3334 tree step = fold_convert (sizetype, iv->step);
3335
3336 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3337 add_candidate (data, base, step, important: true, NULL, orig_iv: iv);
3338 /* Add iv cand of the original type only if it has nonlinear use. */
3339 if (iv->nonlin_use)
3340 add_candidate (data, base: iv->base, step: iv->step, important: true, NULL);
3341 }
3342 else
3343 add_candidate (data, base: iv->base, step: iv->step, important: true, NULL);
3344
3345 /* The same, but with initial value zero. */
3346 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3347 add_candidate (data, size_int (0), step: iv->step, important: true, NULL);
3348 else
3349 add_candidate (data, base: build_int_cst (TREE_TYPE (iv->base), 0),
3350 step: iv->step, important: true, NULL);
3351
3352 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3353 if (gimple_code (g: phi) == GIMPLE_PHI)
3354 {
3355 /* Additionally record the possibility of leaving the original iv
3356 untouched. */
3357 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3358 /* Don't add candidate if it's from another PHI node because
3359 it's an affine iv appearing in the form of PEELED_CHREC. */
3360 phi = SSA_NAME_DEF_STMT (def);
3361 if (gimple_code (g: phi) != GIMPLE_PHI)
3362 {
3363 cand = add_candidate_1 (data,
3364 base: iv->base, step: iv->step, important: true, pos: IP_ORIGINAL, NULL,
3365 SSA_NAME_DEF_STMT (def));
3366 if (cand)
3367 {
3368 cand->var_before = iv->ssa_name;
3369 cand->var_after = def;
3370 }
3371 }
3372 else
3373 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3374 }
3375}
3376
3377/* Adds candidates based on the old induction variables. */
3378
3379static void
3380add_iv_candidate_for_bivs (struct ivopts_data *data)
3381{
3382 unsigned i;
3383 struct iv *iv;
3384 bitmap_iterator bi;
3385
3386 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3387 {
3388 iv = ver_info (data, ver: i)->iv;
3389 if (iv && iv->biv_p && !integer_zerop (iv->step))
3390 add_iv_candidate_for_biv (data, iv);
3391 }
3392}
3393
3394/* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3395
3396static void
3397record_common_cand (struct ivopts_data *data, tree base,
3398 tree step, struct iv_use *use)
3399{
3400 class iv_common_cand ent;
3401 class iv_common_cand **slot;
3402
3403 ent.base = base;
3404 ent.step = step;
3405 ent.hash = iterative_hash_expr (tree: base, seed: 0);
3406 ent.hash = iterative_hash_expr (tree: step, seed: ent.hash);
3407
3408 slot = data->iv_common_cand_tab->find_slot (value: &ent, insert: INSERT);
3409 if (*slot == NULL)
3410 {
3411 *slot = new iv_common_cand ();
3412 (*slot)->base = base;
3413 (*slot)->step = step;
3414 (*slot)->uses.create (nelems: 8);
3415 (*slot)->hash = ent.hash;
3416 data->iv_common_cands.safe_push (obj: (*slot));
3417 }
3418
3419 gcc_assert (use != NULL);
3420 (*slot)->uses.safe_push (obj: use);
3421 return;
3422}
3423
3424/* Comparison function used to sort common candidates. */
3425
3426static int
3427common_cand_cmp (const void *p1, const void *p2)
3428{
3429 unsigned n1, n2;
3430 const class iv_common_cand *const *const ccand1
3431 = (const class iv_common_cand *const *)p1;
3432 const class iv_common_cand *const *const ccand2
3433 = (const class iv_common_cand *const *)p2;
3434
3435 n1 = (*ccand1)->uses.length ();
3436 n2 = (*ccand2)->uses.length ();
3437 return n2 - n1;
3438}
3439
3440/* Adds IV candidates based on common candidated recorded. */
3441
3442static void
3443add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3444{
3445 unsigned i, j;
3446 struct iv_cand *cand_1, *cand_2;
3447
3448 data->iv_common_cands.qsort (common_cand_cmp);
3449 for (i = 0; i < data->iv_common_cands.length (); i++)
3450 {
3451 class iv_common_cand *ptr = data->iv_common_cands[i];
3452
3453 /* Only add IV candidate if it's derived from multiple uses. */
3454 if (ptr->uses.length () <= 1)
3455 break;
3456
3457 cand_1 = NULL;
3458 cand_2 = NULL;
3459 if (ip_normal_pos (data->current_loop))
3460 cand_1 = add_candidate_1 (data, base: ptr->base, step: ptr->step,
3461 important: false, pos: IP_NORMAL, NULL, NULL);
3462
3463 if (ip_end_pos (data->current_loop)
3464 && allow_ip_end_pos_p (loop: data->current_loop))
3465 cand_2 = add_candidate_1 (data, base: ptr->base, step: ptr->step,
3466 important: false, pos: IP_END, NULL, NULL);
3467
3468 /* Bind deriving uses and the new candidates. */
3469 for (j = 0; j < ptr->uses.length (); j++)
3470 {
3471 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3472 if (cand_1)
3473 bitmap_set_bit (group->related_cands, cand_1->id);
3474 if (cand_2)
3475 bitmap_set_bit (group->related_cands, cand_2->id);
3476 }
3477 }
3478
3479 /* Release data since it is useless from this point. */
3480 data->iv_common_cand_tab->empty ();
3481 data->iv_common_cands.truncate (size: 0);
3482}
3483
3484/* Adds candidates based on the value of USE's iv. */
3485
3486static void
3487add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3488{
3489 poly_uint64 offset;
3490 tree base;
3491 struct iv *iv = use->iv;
3492 tree basetype = TREE_TYPE (iv->base);
3493
3494 /* Don't add candidate for iv_use with non integer, pointer or non-mode
3495 precision types, instead, add candidate for the corresponding scev in
3496 unsigned type with the same precision. See PR93674 for more info. */
3497 if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3498 || !type_has_mode_precision_p (t: basetype))
3499 {
3500 basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3501 TYPE_UNSIGNED (basetype));
3502 add_candidate (data, fold_convert (basetype, iv->base),
3503 fold_convert (basetype, iv->step), important: false, NULL);
3504 return;
3505 }
3506
3507 add_candidate (data, base: iv->base, step: iv->step, important: false, use);
3508
3509 /* Record common candidate for use in case it can be shared by others. */
3510 record_common_cand (data, base: iv->base, step: iv->step, use);
3511
3512 /* Record common candidate with initial value zero. */
3513 basetype = TREE_TYPE (iv->base);
3514 if (POINTER_TYPE_P (basetype))
3515 basetype = sizetype;
3516 record_common_cand (data, base: build_int_cst (basetype, 0), step: iv->step, use);
3517
3518 /* Compare the cost of an address with an unscaled index with the cost of
3519 an address with a scaled index and add candidate if useful. */
3520 poly_int64 step;
3521 if (use != NULL
3522 && poly_int_tree_p (t: iv->step, value: &step)
3523 && address_p (type: use->type))
3524 {
3525 poly_int64 new_step;
3526 unsigned int fact = preferred_mem_scale_factor
3527 (base: use->iv->base,
3528 TYPE_MODE (use->mem_type),
3529 speed: optimize_loop_for_speed_p (data->current_loop));
3530
3531 if (fact != 1
3532 && multiple_p (a: step, b: fact, multiple: &new_step))
3533 add_candidate (data, size_int (0),
3534 step: wide_int_to_tree (sizetype, cst: new_step),
3535 important: true, NULL);
3536 }
3537
3538 /* Record common candidate with constant offset stripped in base.
3539 Like the use itself, we also add candidate directly for it. */
3540 base = strip_offset (expr: iv->base, offset: &offset);
3541 if (maybe_ne (a: offset, b: 0U) || base != iv->base)
3542 {
3543 record_common_cand (data, base, step: iv->step, use);
3544 add_candidate (data, base, step: iv->step, important: false, use);
3545 }
3546
3547 /* Record common candidate with base_object removed in base. */
3548 base = iv->base;
3549 STRIP_NOPS (base);
3550 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3551 {
3552 tree step = iv->step;
3553
3554 STRIP_NOPS (step);
3555 base = TREE_OPERAND (base, 1);
3556 step = fold_convert (sizetype, step);
3557 record_common_cand (data, base, step, use);
3558 /* Also record common candidate with offset stripped. */
3559 tree alt_base, alt_offset;
3560 split_constant_offset (base, &alt_base, &alt_offset);
3561 if (!integer_zerop (alt_offset))
3562 record_common_cand (data, base: alt_base, step, use);
3563 }
3564
3565 /* At last, add auto-incremental candidates. Make such variables
3566 important since other iv uses with same base object may be based
3567 on it. */
3568 if (use != NULL && address_p (type: use->type))
3569 add_autoinc_candidates (data, base: iv->base, step: iv->step, important: true, use);
3570}
3571
3572/* Adds candidates based on the uses. */
3573
3574static void
3575add_iv_candidate_for_groups (struct ivopts_data *data)
3576{
3577 unsigned i;
3578
3579 /* Only add candidate for the first use in group. */
3580 for (i = 0; i < data->vgroups.length (); i++)
3581 {
3582 struct iv_group *group = data->vgroups[i];
3583
3584 gcc_assert (group->vuses[0] != NULL);
3585 add_iv_candidate_for_use (data, use: group->vuses[0]);
3586 }
3587 add_iv_candidate_derived_from_uses (data);
3588}
3589
3590/* Record important candidates and add them to related_cands bitmaps. */
3591
3592static void
3593record_important_candidates (struct ivopts_data *data)
3594{
3595 unsigned i;
3596 struct iv_group *group;
3597
3598 for (i = 0; i < data->vcands.length (); i++)
3599 {
3600 struct iv_cand *cand = data->vcands[i];
3601
3602 if (cand->important)
3603 bitmap_set_bit (data->important_candidates, i);
3604 }
3605
3606 data->consider_all_candidates = (data->vcands.length ()
3607 <= CONSIDER_ALL_CANDIDATES_BOUND);
3608
3609 /* Add important candidates to groups' related_cands bitmaps. */
3610 for (i = 0; i < data->vgroups.length (); i++)
3611 {
3612 group = data->vgroups[i];
3613 bitmap_ior_into (group->related_cands, data->important_candidates);
3614 }
3615}
3616
3617/* Allocates the data structure mapping the (use, candidate) pairs to costs.
3618 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3619 we allocate a simple list to every use. */
3620
3621static void
3622alloc_use_cost_map (struct ivopts_data *data)
3623{
3624 unsigned i, size, s;
3625
3626 for (i = 0; i < data->vgroups.length (); i++)
3627 {
3628 struct iv_group *group = data->vgroups[i];
3629
3630 if (data->consider_all_candidates)
3631 size = data->vcands.length ();
3632 else
3633 {
3634 s = bitmap_count_bits (group->related_cands);
3635
3636 /* Round up to the power of two, so that moduling by it is fast. */
3637 size = s ? (1 << ceil_log2 (x: s)) : 1;
3638 }
3639
3640 group->n_map_members = size;
3641 group->cost_map = XCNEWVEC (class cost_pair, size);
3642 }
3643}
3644
3645/* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3646 on invariants INV_VARS and that the value used in expressing it is
3647 VALUE, and in case of iv elimination the comparison operator is COMP. */
3648
3649static void
3650set_group_iv_cost (struct ivopts_data *data,
3651 struct iv_group *group, struct iv_cand *cand,
3652 comp_cost cost, bitmap inv_vars, tree value,
3653 enum tree_code comp, bitmap inv_exprs)
3654{
3655 unsigned i, s;
3656
3657 if (cost.infinite_cost_p ())
3658 {
3659 BITMAP_FREE (inv_vars);
3660 BITMAP_FREE (inv_exprs);
3661 return;
3662 }
3663
3664 if (data->consider_all_candidates)
3665 {
3666 group->cost_map[cand->id].cand = cand;
3667 group->cost_map[cand->id].cost = cost;
3668 group->cost_map[cand->id].inv_vars = inv_vars;
3669 group->cost_map[cand->id].inv_exprs = inv_exprs;
3670 group->cost_map[cand->id].value = value;
3671 group->cost_map[cand->id].comp = comp;
3672 return;
3673 }
3674
3675 /* n_map_members is a power of two, so this computes modulo. */
3676 s = cand->id & (group->n_map_members - 1);
3677 for (i = s; i < group->n_map_members; i++)
3678 if (!group->cost_map[i].cand)
3679 goto found;
3680 for (i = 0; i < s; i++)
3681 if (!group->cost_map[i].cand)
3682 goto found;
3683
3684 gcc_unreachable ();
3685
3686found:
3687 group->cost_map[i].cand = cand;
3688 group->cost_map[i].cost = cost;
3689 group->cost_map[i].inv_vars = inv_vars;
3690 group->cost_map[i].inv_exprs = inv_exprs;
3691 group->cost_map[i].value = value;
3692 group->cost_map[i].comp = comp;
3693}
3694
3695/* Gets cost of (GROUP, CAND) pair. */
3696
3697static class cost_pair *
3698get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3699 struct iv_cand *cand)
3700{
3701 unsigned i, s;
3702 class cost_pair *ret;
3703
3704 if (!cand)
3705 return NULL;
3706
3707 if (data->consider_all_candidates)
3708 {
3709 ret = group->cost_map + cand->id;
3710 if (!ret->cand)
3711 return NULL;
3712
3713 return ret;
3714 }
3715
3716 /* n_map_members is a power of two, so this computes modulo. */
3717 s = cand->id & (group->n_map_members - 1);
3718 for (i = s; i < group->n_map_members; i++)
3719 if (group->cost_map[i].cand == cand)
3720 return group->cost_map + i;
3721 else if (group->cost_map[i].cand == NULL)
3722 return NULL;
3723 for (i = 0; i < s; i++)
3724 if (group->cost_map[i].cand == cand)
3725 return group->cost_map + i;
3726 else if (group->cost_map[i].cand == NULL)
3727 return NULL;
3728
3729 return NULL;
3730}
3731
3732/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3733static rtx
3734produce_memory_decl_rtl (tree obj, int *regno)
3735{
3736 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3737 machine_mode address_mode = targetm.addr_space.address_mode (as);
3738 rtx x;
3739
3740 gcc_assert (obj);
3741 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3742 {
3743 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3744 x = gen_rtx_SYMBOL_REF (address_mode, name);
3745 SET_SYMBOL_REF_DECL (x, obj);
3746 x = gen_rtx_MEM (DECL_MODE (obj), x);
3747 set_mem_addr_space (x, as);
3748 targetm.encode_section_info (obj, x, true);
3749 }
3750 else
3751 {
3752 x = gen_raw_REG (address_mode, (*regno)++);
3753 x = gen_rtx_MEM (DECL_MODE (obj), x);
3754 set_mem_addr_space (x, as);
3755 }
3756
3757 return x;
3758}
3759
3760/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3761 walk_tree. DATA contains the actual fake register number. */
3762
3763static tree
3764prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3765{
3766 tree obj = NULL_TREE;
3767 rtx x = NULL_RTX;
3768 int *regno = (int *) data;
3769
3770 switch (TREE_CODE (*expr_p))
3771 {
3772 case ADDR_EXPR:
3773 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3774 handled_component_p (t: *expr_p);
3775 expr_p = &TREE_OPERAND (*expr_p, 0))
3776 continue;
3777 obj = *expr_p;
3778 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3779 x = produce_memory_decl_rtl (obj, regno);
3780 break;
3781
3782 case SSA_NAME:
3783 *ws = 0;
3784 obj = SSA_NAME_VAR (*expr_p);
3785 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3786 if (!obj)
3787 return NULL_TREE;
3788 if (!DECL_RTL_SET_P (obj))
3789 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3790 break;
3791
3792 case VAR_DECL:
3793 case PARM_DECL:
3794 case RESULT_DECL:
3795 *ws = 0;
3796 obj = *expr_p;
3797
3798 if (DECL_RTL_SET_P (obj))
3799 break;
3800
3801 if (DECL_MODE (obj) == BLKmode)
3802 x = produce_memory_decl_rtl (obj, regno);
3803 else
3804 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3805
3806 break;
3807
3808 default:
3809 break;
3810 }
3811
3812 if (x)
3813 {
3814 decl_rtl_to_reset.safe_push (obj);
3815 SET_DECL_RTL (obj, x);
3816 }
3817
3818 return NULL_TREE;
3819}
3820
3821/* Predict whether the given loop will be transformed in the RTL
3822 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3823 This is only for target independent checks, see targetm.predict_doloop_p
3824 for the target dependent ones.
3825
3826 Note that according to some initial investigation, some checks like costly
3827 niter check and invalid stmt scanning don't have much gains among general
3828 cases, so keep this as simple as possible first.
3829
3830 Some RTL specific checks seems unable to be checked in gimple, if any new
3831 checks or easy checks _are_ missing here, please add them. */
3832
3833static bool
3834generic_predict_doloop_p (struct ivopts_data *data)
3835{
3836 class loop *loop = data->current_loop;
3837
3838 /* Call target hook for target dependent checks. */
3839 if (!targetm.predict_doloop_p (loop))
3840 {
3841 if (dump_file && (dump_flags & TDF_DETAILS))
3842 fprintf (stream: dump_file, format: "Predict doloop failure due to"
3843 " target specific checks.\n");
3844 return false;
3845 }
3846
3847 /* Similar to doloop_optimize, check iteration description to know it's
3848 suitable or not. Keep it as simple as possible, feel free to extend it
3849 if you find any multiple exits cases matter. */
3850 edge exit = single_dom_exit (loop);
3851 class tree_niter_desc *niter_desc;
3852 if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3853 {
3854 if (dump_file && (dump_flags & TDF_DETAILS))
3855 fprintf (stream: dump_file, format: "Predict doloop failure due to"
3856 " unexpected niters.\n");
3857 return false;
3858 }
3859
3860 /* Similar to doloop_optimize, check whether iteration count too small
3861 and not profitable. */
3862 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3863 if (est_niter == -1)
3864 est_niter = get_likely_max_loop_iterations_int (loop);
3865 if (est_niter >= 0 && est_niter < 3)
3866 {
3867 if (dump_file && (dump_flags & TDF_DETAILS))
3868 fprintf (stream: dump_file,
3869 format: "Predict doloop failure due to"
3870 " too few iterations (%u).\n",
3871 (unsigned int) est_niter);
3872 return false;
3873 }
3874
3875 return true;
3876}
3877
3878/* Determines cost of the computation of EXPR. */
3879
3880static unsigned
3881computation_cost (tree expr, bool speed)
3882{
3883 rtx_insn *seq;
3884 rtx rslt;
3885 tree type = TREE_TYPE (expr);
3886 unsigned cost;
3887 /* Avoid using hard regs in ways which may be unsupported. */
3888 int regno = LAST_VIRTUAL_REGISTER + 1;
3889 struct cgraph_node *node = cgraph_node::get (decl: current_function_decl);
3890 enum node_frequency real_frequency = node->frequency;
3891
3892 node->frequency = NODE_FREQUENCY_NORMAL;
3893 crtl->maybe_hot_insn_p = speed;
3894 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3895 start_sequence ();
3896 rslt = expand_expr (exp: expr, NULL_RTX, TYPE_MODE (type), modifier: EXPAND_NORMAL);
3897 seq = end_sequence ();
3898 default_rtl_profile ();
3899 node->frequency = real_frequency;
3900
3901 cost = seq_cost (seq, speed);
3902 if (MEM_P (rslt))
3903 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3904 TYPE_ADDR_SPACE (type), speed);
3905 else if (!REG_P (rslt))
3906 cost += set_src_cost (x: rslt, TYPE_MODE (type), speed_p: speed);
3907
3908 return cost;
3909}
3910
3911/* Returns variable containing the value of candidate CAND at statement AT. */
3912
3913static tree
3914var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3915{
3916 if (stmt_after_increment (loop, cand, stmt))
3917 return cand->var_after;
3918 else
3919 return cand->var_before;
3920}
3921
3922/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3923 same precision that is at least as wide as the precision of TYPE, stores
3924 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3925 type of A and B. */
3926
3927static tree
3928determine_common_wider_type (tree *a, tree *b)
3929{
3930 tree wider_type = NULL;
3931 tree suba, subb;
3932 tree atype = TREE_TYPE (*a);
3933
3934 if (CONVERT_EXPR_P (*a))
3935 {
3936 suba = TREE_OPERAND (*a, 0);
3937 wider_type = TREE_TYPE (suba);
3938 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3939 return atype;
3940 }
3941 else
3942 return atype;
3943
3944 if (CONVERT_EXPR_P (*b))
3945 {
3946 subb = TREE_OPERAND (*b, 0);
3947 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3948 return atype;
3949 }
3950 else
3951 return atype;
3952
3953 *a = suba;
3954 *b = subb;
3955 return wider_type;
3956}
3957
3958/* Determines the expression by that USE is expressed from induction variable
3959 CAND at statement AT in DATA's current loop. The expression is stored in
3960 two parts in a decomposed form. The invariant part is stored in AFF_INV;
3961 while variant part in AFF_VAR. Store ratio of CAND.step over USE.step in
3962 PRAT if it's non-null. Returns false if USE cannot be expressed using
3963 CAND. */
3964
3965static bool
3966get_computation_aff_1 (struct ivopts_data *data, gimple *at, struct iv_use *use,
3967 struct iv_cand *cand, class aff_tree *aff_inv,
3968 class aff_tree *aff_var, widest_int *prat = NULL)
3969{
3970 tree ubase = use->iv->base, ustep = use->iv->step;
3971 tree cbase = cand->iv->base, cstep = cand->iv->step;
3972 tree common_type, uutype, var, cstep_common;
3973 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3974 aff_tree aff_cbase;
3975 widest_int rat;
3976
3977 /* We must have a precision to express the values of use. */
3978 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3979 return false;
3980
3981 var = var_at_stmt (loop: data->current_loop, cand, stmt: at);
3982 uutype = unsigned_type_for (utype);
3983
3984 /* If the conversion is not noop, perform it. */
3985 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3986 {
3987 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3988 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (t: cstep)))
3989 {
3990 tree inner_base, inner_step, inner_type;
3991 inner_base = TREE_OPERAND (cbase, 0);
3992 if (CONVERT_EXPR_P (cstep))
3993 inner_step = TREE_OPERAND (cstep, 0);
3994 else
3995 inner_step = cstep;
3996
3997 inner_type = TREE_TYPE (inner_base);
3998 /* If candidate is added from a biv whose type is smaller than
3999 ctype, we know both candidate and the biv won't overflow.
4000 In this case, it's safe to skip the convertion in candidate.
4001 As an example, (unsigned short)((unsigned long)A) equals to
4002 (unsigned short)A, if A has a type no larger than short. */
4003 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4004 {
4005 cbase = inner_base;
4006 cstep = inner_step;
4007 }
4008 }
4009 cbase = fold_convert (uutype, cbase);
4010 cstep = fold_convert (uutype, cstep);
4011 var = fold_convert (uutype, var);
4012 }
4013
4014 /* Ratio is 1 when computing the value of biv cand by itself.
4015 We can't rely on constant_multiple_of in this case because the
4016 use is created after the original biv is selected. The call
4017 could fail because of inconsistent fold behavior. See PR68021
4018 for more information. */
4019 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4020 {
4021 gcc_assert (is_gimple_assign (use->stmt));
4022 gcc_assert (use->iv->ssa_name == cand->var_after);
4023 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4024 rat = 1;
4025 }
4026 else if (!constant_multiple_of (top: ustep, bot: cstep, mul: &rat, data))
4027 return false;
4028
4029 if (prat)
4030 *prat = rat;
4031
4032 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4033 type, we achieve better folding by computing their difference in this
4034 wider type, and cast the result to UUTYPE. We do not need to worry about
4035 overflows, as all the arithmetics will in the end be performed in UUTYPE
4036 anyway. */
4037 common_type = determine_common_wider_type (a: &ubase, b: &cbase);
4038
4039 /* use = ubase - ratio * cbase + ratio * var. */
4040 tree_to_aff_combination (ubase, common_type, aff_inv);
4041 tree_to_aff_combination (cbase, common_type, &aff_cbase);
4042 tree_to_aff_combination (var, uutype, aff_var);
4043
4044 /* We need to shift the value if we are after the increment. */
4045 if (stmt_after_increment (loop: data->current_loop, cand, stmt: at))
4046 {
4047 aff_tree cstep_aff;
4048
4049 if (common_type != uutype)
4050 cstep_common = fold_convert (common_type, cstep);
4051 else
4052 cstep_common = cstep;
4053
4054 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4055 aff_combination_add (&aff_cbase, &cstep_aff);
4056 }
4057
4058 aff_combination_scale (&aff_cbase, -rat);
4059 aff_combination_add (aff_inv, &aff_cbase);
4060 if (common_type != uutype)
4061 aff_combination_convert (aff_inv, uutype);
4062
4063 aff_combination_scale (aff_var, rat);
4064 return true;
4065}
4066
4067/* Determines the expression by that USE is expressed from induction variable
4068 CAND at statement AT in DATA's current loop. The expression is stored in a
4069 decomposed form into AFF. Returns false if USE cannot be expressed using
4070 CAND. */
4071
4072static bool
4073get_computation_aff (struct ivopts_data *data, gimple *at, struct iv_use *use,
4074 struct iv_cand *cand, class aff_tree *aff)
4075{
4076 aff_tree aff_var;
4077
4078 if (!get_computation_aff_1 (data, at, use, cand, aff_inv: aff, aff_var: &aff_var))
4079 return false;
4080
4081 aff_combination_add (aff, &aff_var);
4082 return true;
4083}
4084
4085/* Return the type of USE. */
4086
4087static tree
4088get_use_type (struct iv_use *use)
4089{
4090 tree base_type = TREE_TYPE (use->iv->base);
4091 tree type;
4092
4093 if (use->type == USE_REF_ADDRESS)
4094 {
4095 /* The base_type may be a void pointer. Create a pointer type based on
4096 the mem_ref instead. */
4097 type = build_pointer_type (TREE_TYPE (*use->op_p));
4098 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4099 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4100 }
4101 else
4102 type = base_type;
4103
4104 return type;
4105}
4106
4107/* Determines the expression by that USE is expressed from induction variable
4108 CAND at statement AT in DATA's current loop. The computation is
4109 unshared. */
4110
4111static tree
4112get_computation_at (struct ivopts_data *data, gimple *at,
4113 struct iv_use *use, struct iv_cand *cand)
4114{
4115 aff_tree aff;
4116 tree type = get_use_type (use);
4117
4118 if (!get_computation_aff (data, at, use, cand, aff: &aff))
4119 return NULL_TREE;
4120 unshare_aff_combination (&aff);
4121 return fold_convert (type, aff_combination_to_tree (&aff));
4122}
4123
4124/* Like get_computation_at, but try harder, even if the computation
4125 is more expensive. Intended for debug stmts. */
4126
4127static tree
4128get_debug_computation_at (struct ivopts_data *data, gimple *at,
4129 struct iv_use *use, struct iv_cand *cand)
4130{
4131 if (tree ret = get_computation_at (data, at, use, cand))
4132 return ret;
4133
4134 tree ubase = use->iv->base, ustep = use->iv->step;
4135 tree cbase = cand->iv->base, cstep = cand->iv->step;
4136 tree var;
4137 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4138 widest_int rat;
4139
4140 /* We must have a precision to express the values of use. */
4141 if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4142 return NULL_TREE;
4143
4144 /* Try to handle the case that get_computation_at doesn't,
4145 try to express
4146 use = ubase + (var - cbase) / ratio. */
4147 if (!constant_multiple_of (top: cstep, fold_convert (TREE_TYPE (cstep), ustep),
4148 mul: &rat, data))
4149 return NULL_TREE;
4150
4151 bool neg_p = false;
4152 if (wi::neg_p (x: rat))
4153 {
4154 if (TYPE_UNSIGNED (ctype))
4155 return NULL_TREE;
4156 neg_p = true;
4157 rat = wi::neg (x: rat);
4158 }
4159
4160 /* If both IVs can wrap around and CAND doesn't have a power of two step,
4161 it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4162 the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4163 uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4164 ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4165 if (!use->iv->no_overflow
4166 && !cand->iv->no_overflow
4167 && !integer_pow2p (cstep))
4168 return NULL_TREE;
4169
4170 int bits = wi::exact_log2 (rat);
4171 if (bits == -1)
4172 bits = wi::floor_log2 (rat) + 1;
4173 if (!cand->iv->no_overflow
4174 && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4175 return NULL_TREE;
4176
4177 var = var_at_stmt (loop: data->current_loop, cand, stmt: at);
4178
4179 if (POINTER_TYPE_P (ctype))
4180 {
4181 ctype = unsigned_type_for (ctype);
4182 cbase = fold_convert (ctype, cbase);
4183 cstep = fold_convert (ctype, cstep);
4184 var = fold_convert (ctype, var);
4185 }
4186
4187 if (stmt_after_increment (loop: data->current_loop, cand, stmt: at))
4188 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4189 unshare_expr (cstep));
4190
4191 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4192 var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4193 wide_int_to_tree (TREE_TYPE (var), rat));
4194 if (POINTER_TYPE_P (utype))
4195 {
4196 var = fold_convert (sizetype, var);
4197 if (neg_p)
4198 var = fold_build1 (NEGATE_EXPR, sizetype, var);
4199 var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4200 }
4201 else
4202 {
4203 var = fold_convert (utype, var);
4204 var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4205 ubase, var);
4206 }
4207 return var;
4208}
4209
4210/* Adjust the cost COST for being in loop setup rather than loop body.
4211 If we're optimizing for space, the loop setup overhead is constant;
4212 if we're optimizing for speed, amortize it over the per-iteration cost.
4213 If ROUND_UP_P is true, the result is round up rather than to zero when
4214 optimizing for speed. */
4215static int64_t
4216adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4217 bool round_up_p = false)
4218{
4219 if (cost == INFTY)
4220 return cost;
4221 else if (optimize_loop_for_speed_p (data->current_loop))
4222 {
4223 uint64_t niters = avg_loop_niter (loop: data->current_loop);
4224 if (niters > (uint64_t) cost)
4225 return (round_up_p && cost != 0) ? 1 : 0;
4226 return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4227 }
4228 else
4229 return cost;
4230}
4231
4232/* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4233 EXPR operand holding the shift. COST0 and COST1 are the costs for
4234 calculating the operands of EXPR. Returns true if successful, and returns
4235 the cost in COST. */
4236
4237static bool
4238get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4239 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4240{
4241 comp_cost res;
4242 tree op1 = TREE_OPERAND (expr, 1);
4243 tree cst = TREE_OPERAND (mult, 1);
4244 tree multop = TREE_OPERAND (mult, 0);
4245 int m = exact_log2 (x: int_cst_value (cst));
4246 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4247 int as_cost, sa_cost;
4248 bool mult_in_op1;
4249
4250 if (!(m >= 0 && m < maxm))
4251 return false;
4252
4253 STRIP_NOPS (op1);
4254 mult_in_op1 = operand_equal_p (op1, mult, flags: 0);
4255
4256 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m);
4257
4258 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4259 use that in preference to a shift insn followed by an add insn. */
4260 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4261 ? shiftadd_cost (speed, mode, bits: m)
4262 : (mult_in_op1
4263 ? shiftsub1_cost (speed, mode, bits: m)
4264 : shiftsub0_cost (speed, mode, bits: m)));
4265
4266 res = comp_cost (MIN (as_cost, sa_cost), 0);
4267 res += (mult_in_op1 ? cost0 : cost1);
4268
4269 STRIP_NOPS (multop);
4270 if (!is_gimple_val (multop))
4271 res += force_expr_to_var_cost (multop, speed);
4272
4273 *cost = res;
4274 return true;
4275}
4276
4277/* Estimates cost of forcing expression EXPR into a variable. */
4278
4279static comp_cost
4280force_expr_to_var_cost (tree expr, bool speed)
4281{
4282 static bool costs_initialized = false;
4283 static unsigned integer_cost [2];
4284 static unsigned symbol_cost [2];
4285 static unsigned address_cost [2];
4286 tree op0, op1;
4287 comp_cost cost0, cost1, cost;
4288 machine_mode mode;
4289 scalar_int_mode int_mode;
4290
4291 if (!costs_initialized)
4292 {
4293 tree type = build_pointer_type (integer_type_node);
4294 tree var, addr;
4295 rtx x;
4296 int i;
4297
4298 var = create_tmp_var_raw (integer_type_node, "test_var");
4299 TREE_STATIC (var) = 1;
4300 x = produce_memory_decl_rtl (obj: var, NULL);
4301 SET_DECL_RTL (var, x);
4302
4303 addr = build1 (ADDR_EXPR, type, var);
4304
4305
4306 for (i = 0; i < 2; i++)
4307 {
4308 integer_cost[i] = computation_cost (expr: build_int_cst (integer_type_node,
4309 2000), speed: i);
4310
4311 symbol_cost[i] = computation_cost (expr: addr, speed: i) + 1;
4312
4313 address_cost[i]
4314 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), speed: i) + 1;
4315 if (dump_file && (dump_flags & TDF_DETAILS))
4316 {
4317 fprintf (stream: dump_file, format: "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4318 fprintf (stream: dump_file, format: " integer %d\n", (int) integer_cost[i]);
4319 fprintf (stream: dump_file, format: " symbol %d\n", (int) symbol_cost[i]);
4320 fprintf (stream: dump_file, format: " address %d\n", (int) address_cost[i]);
4321 fprintf (stream: dump_file, format: " other %d\n", (int) target_spill_cost[i]);
4322 fprintf (stream: dump_file, format: "\n");
4323 }
4324 }
4325
4326 costs_initialized = true;
4327 }
4328
4329 STRIP_NOPS (expr);
4330
4331 if (SSA_VAR_P (expr))
4332 return no_cost;
4333
4334 if (is_gimple_min_invariant (expr))
4335 {
4336 if (poly_int_tree_p (t: expr))
4337 return comp_cost (integer_cost [speed], 0);
4338
4339 if (TREE_CODE (expr) == ADDR_EXPR)
4340 {
4341 tree obj = TREE_OPERAND (expr, 0);
4342
4343 if (VAR_P (obj)
4344 || TREE_CODE (obj) == PARM_DECL
4345 || TREE_CODE (obj) == RESULT_DECL)
4346 return comp_cost (symbol_cost [speed], 0);
4347 }
4348
4349 return comp_cost (address_cost [speed], 0);
4350 }
4351
4352 switch (TREE_CODE (expr))
4353 {
4354 case POINTER_PLUS_EXPR:
4355 case PLUS_EXPR:
4356 case MINUS_EXPR:
4357 case MULT_EXPR:
4358 case EXACT_DIV_EXPR:
4359 case TRUNC_DIV_EXPR:
4360 case BIT_AND_EXPR:
4361 case BIT_IOR_EXPR:
4362 case LSHIFT_EXPR:
4363 case RSHIFT_EXPR:
4364 op0 = TREE_OPERAND (expr, 0);
4365 op1 = TREE_OPERAND (expr, 1);
4366 STRIP_NOPS (op0);
4367 STRIP_NOPS (op1);
4368 break;
4369
4370 CASE_CONVERT:
4371 case NEGATE_EXPR:
4372 case BIT_NOT_EXPR:
4373 op0 = TREE_OPERAND (expr, 0);
4374 STRIP_NOPS (op0);
4375 op1 = NULL_TREE;
4376 break;
4377 /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4378 introduce COND_EXPR for IV base, need to support better cost estimation
4379 for this COND_EXPR and tcc_comparison. */
4380 case COND_EXPR:
4381 op0 = TREE_OPERAND (expr, 1);
4382 STRIP_NOPS (op0);
4383 op1 = TREE_OPERAND (expr, 2);
4384 STRIP_NOPS (op1);
4385 break;
4386 case LT_EXPR:
4387 case LE_EXPR:
4388 case GT_EXPR:
4389 case GE_EXPR:
4390 case EQ_EXPR:
4391 case NE_EXPR:
4392 case UNORDERED_EXPR:
4393 case ORDERED_EXPR:
4394 case UNLT_EXPR:
4395 case UNLE_EXPR:
4396 case UNGT_EXPR:
4397 case UNGE_EXPR:
4398 case UNEQ_EXPR:
4399 case LTGT_EXPR:
4400 case MAX_EXPR:
4401 case MIN_EXPR:
4402 op0 = TREE_OPERAND (expr, 0);
4403 STRIP_NOPS (op0);
4404 op1 = TREE_OPERAND (expr, 1);
4405 STRIP_NOPS (op1);
4406 break;
4407
4408 default:
4409 /* Just an arbitrary value, FIXME. */
4410 return comp_cost (target_spill_cost[speed], 0);
4411 }
4412
4413 if (op0 == NULL_TREE
4414 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4415 cost0 = no_cost;
4416 else
4417 cost0 = force_expr_to_var_cost (expr: op0, speed);
4418
4419 if (op1 == NULL_TREE
4420 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4421 cost1 = no_cost;
4422 else
4423 cost1 = force_expr_to_var_cost (expr: op1, speed);
4424
4425 mode = TYPE_MODE (TREE_TYPE (expr));
4426 switch (TREE_CODE (expr))
4427 {
4428 case POINTER_PLUS_EXPR:
4429 case PLUS_EXPR:
4430 case MINUS_EXPR:
4431 case NEGATE_EXPR:
4432 cost = comp_cost (add_cost (speed, mode), 0);
4433 if (TREE_CODE (expr) != NEGATE_EXPR)
4434 {
4435 tree mult = NULL_TREE;
4436 comp_cost sa_cost;
4437 if (TREE_CODE (op1) == MULT_EXPR)
4438 mult = op1;
4439 else if (TREE_CODE (op0) == MULT_EXPR)
4440 mult = op0;
4441
4442 if (mult != NULL_TREE
4443 && is_a <scalar_int_mode> (m: mode, result: &int_mode)
4444 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4445 && get_shiftadd_cost (expr, mode: int_mode, cost0, cost1, mult,
4446 speed, cost: &sa_cost))
4447 return sa_cost;
4448 }
4449 break;
4450
4451 CASE_CONVERT:
4452 {
4453 tree inner_mode, outer_mode;
4454 outer_mode = TREE_TYPE (expr);
4455 inner_mode = TREE_TYPE (op0);
4456 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4457 TYPE_MODE (inner_mode), speed), 0);
4458 }
4459 break;
4460
4461 case MULT_EXPR:
4462 if (cst_and_fits_in_hwi (op0))
4463 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4464 mode, speed), 0);
4465 else if (cst_and_fits_in_hwi (op1))
4466 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4467 mode, speed), 0);
4468 else
4469 return comp_cost (target_spill_cost [speed], 0);
4470 break;
4471
4472 case EXACT_DIV_EXPR:
4473 case TRUNC_DIV_EXPR:
4474 /* Division by power of two is usually cheap, so we allow it. Forbid
4475 anything else. */
4476 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4477 cost = comp_cost (add_cost (speed, mode), 0);
4478 else
4479 cost = comp_cost (target_spill_cost[speed], 0);
4480 break;
4481
4482 case BIT_AND_EXPR:
4483 case BIT_IOR_EXPR:
4484 case BIT_NOT_EXPR:
4485 case LSHIFT_EXPR:
4486 case RSHIFT_EXPR:
4487 cost = comp_cost (add_cost (speed, mode), 0);
4488 break;
4489 case COND_EXPR:
4490 op0 = TREE_OPERAND (expr, 0);
4491 STRIP_NOPS (op0);
4492 if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4493 || CONSTANT_CLASS_P (op0))
4494 cost = no_cost;
4495 else
4496 cost = force_expr_to_var_cost (expr: op0, speed);
4497 break;
4498 case LT_EXPR:
4499 case LE_EXPR:
4500 case GT_EXPR:
4501 case GE_EXPR:
4502 case EQ_EXPR:
4503 case NE_EXPR:
4504 case UNORDERED_EXPR:
4505 case ORDERED_EXPR:
4506 case UNLT_EXPR:
4507 case UNLE_EXPR:
4508 case UNGT_EXPR:
4509 case UNGE_EXPR:
4510 case UNEQ_EXPR:
4511 case LTGT_EXPR:
4512 case MAX_EXPR:
4513 case MIN_EXPR:
4514 /* Simply use add cost for now, FIXME if there is some more accurate cost
4515 evaluation way. */
4516 cost = comp_cost (add_cost (speed, mode), 0);
4517 break;
4518
4519 default:
4520 gcc_unreachable ();
4521 }
4522
4523 cost += cost0;
4524 cost += cost1;
4525 return cost;
4526}
4527
4528/* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4529 invariants the computation depends on. */
4530
4531static comp_cost
4532force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4533{
4534 if (!expr)
4535 return no_cost;
4536
4537 find_inv_vars (data, expr_p: &expr, inv_vars);
4538 return force_expr_to_var_cost (expr, speed: data->speed);
4539}
4540
4541/* Returns cost of auto-modifying address expression in shape base + offset.
4542 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4543 address expression. The address expression has ADDR_MODE in addr space
4544 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4545 speed or size. */
4546
4547enum ainc_type
4548{
4549 AINC_PRE_INC, /* Pre increment. */
4550 AINC_PRE_DEC, /* Pre decrement. */
4551 AINC_POST_INC, /* Post increment. */
4552 AINC_POST_DEC, /* Post decrement. */
4553 AINC_NONE /* Also the number of auto increment types. */
4554};
4555
4556struct ainc_cost_data
4557{
4558 int64_t costs[AINC_NONE];
4559};
4560
4561static comp_cost
4562get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4563 machine_mode addr_mode, machine_mode mem_mode,
4564 addr_space_t as, bool speed)
4565{
4566 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4567 && !USE_STORE_PRE_DECREMENT (mem_mode)
4568 && !USE_LOAD_POST_DECREMENT (mem_mode)
4569 && !USE_STORE_POST_DECREMENT (mem_mode)
4570 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4571 && !USE_STORE_PRE_INCREMENT (mem_mode)
4572 && !USE_LOAD_POST_INCREMENT (mem_mode)
4573 && !USE_STORE_POST_INCREMENT (mem_mode))
4574 return infinite_cost;
4575
4576 static vec<ainc_cost_data *> ainc_cost_data_list;
4577 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4578 if (idx >= ainc_cost_data_list.length ())
4579 {
4580 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4581
4582 gcc_assert (nsize > idx);
4583 ainc_cost_data_list.safe_grow_cleared (len: nsize, exact: true);
4584 }
4585
4586 ainc_cost_data *data = ainc_cost_data_list[idx];
4587 if (data == NULL)
4588 {
4589 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4590
4591 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4592 data->costs[AINC_PRE_DEC] = INFTY;
4593 data->costs[AINC_POST_DEC] = INFTY;
4594 data->costs[AINC_PRE_INC] = INFTY;
4595 data->costs[AINC_POST_INC] = INFTY;
4596 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4597 || USE_STORE_PRE_DECREMENT (mem_mode))
4598 {
4599 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4600
4601 if (memory_address_addr_space_p (mem_mode, addr, as))
4602 data->costs[AINC_PRE_DEC]
4603 = address_cost (addr, mem_mode, as, speed);
4604 }
4605 if (USE_LOAD_POST_DECREMENT (mem_mode)
4606 || USE_STORE_POST_DECREMENT (mem_mode))
4607 {
4608 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4609
4610 if (memory_address_addr_space_p (mem_mode, addr, as))
4611 data->costs[AINC_POST_DEC]
4612 = address_cost (addr, mem_mode, as, speed);
4613 }
4614 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4615 || USE_STORE_PRE_INCREMENT (mem_mode))
4616 {
4617 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4618
4619 if (memory_address_addr_space_p (mem_mode, addr, as))
4620 data->costs[AINC_PRE_INC]
4621 = address_cost (addr, mem_mode, as, speed);
4622 }
4623 if (USE_LOAD_POST_INCREMENT (mem_mode)
4624 || USE_STORE_POST_INCREMENT (mem_mode))
4625 {
4626 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4627
4628 if (memory_address_addr_space_p (mem_mode, addr, as))
4629 data->costs[AINC_POST_INC]
4630 = address_cost (addr, mem_mode, as, speed);
4631 }
4632 ainc_cost_data_list[idx] = data;
4633 }
4634
4635 poly_int64 msize = GET_MODE_SIZE (mode: mem_mode);
4636 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4637 return comp_cost (data->costs[AINC_POST_INC], 0);
4638 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4639 return comp_cost (data->costs[AINC_POST_DEC], 0);
4640 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4641 return comp_cost (data->costs[AINC_PRE_INC], 0);
4642 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4643 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4644
4645 return infinite_cost;
4646}
4647
4648/* Return cost of computing USE's address expression by using CAND.
4649 AFF_INV and AFF_VAR represent invariant and variant parts of the
4650 address expression, respectively. If AFF_INV is simple, store
4651 the loop invariant variables which are depended by it in INV_VARS;
4652 if AFF_INV is complicated, handle it as a new invariant expression
4653 and record it in INV_EXPR. RATIO indicates multiple times between
4654 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4655 value to it indicating if this is an auto-increment address. */
4656
4657static comp_cost
4658get_address_cost (struct ivopts_data *data, struct iv_use *use,
4659 struct iv_cand *cand, aff_tree *aff_inv,
4660 aff_tree *aff_var, HOST_WIDE_INT ratio,
4661 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4662 bool *can_autoinc, bool speed)
4663{
4664 rtx addr;
4665 bool simple_inv = true;
4666 tree comp_inv = NULL_TREE, type = aff_var->type;
4667 comp_cost var_cost = no_cost, cost = no_cost;
4668 struct mem_address parts = {NULL_TREE, integer_one_node,
4669 NULL_TREE, NULL_TREE, NULL_TREE};
4670 machine_mode addr_mode = TYPE_MODE (type);
4671 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4672 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4673 /* Only true if ratio != 1. */
4674 bool ok_with_ratio_p = false;
4675 bool ok_without_ratio_p = false;
4676 code_helper code = ERROR_MARK;
4677
4678 if (use->type == USE_PTR_ADDRESS)
4679 {
4680 gcall *call = as_a<gcall *> (p: use->stmt);
4681 gcc_assert (gimple_call_internal_p (call));
4682 code = gimple_call_internal_fn (gs: call);
4683 }
4684
4685 if (!aff_combination_const_p (aff: aff_inv))
4686 {
4687 parts.index = integer_one_node;
4688 /* Addressing mode "base + index". */
4689 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4690 if (ratio != 1)
4691 {
4692 parts.step = wide_int_to_tree (type, cst: ratio);
4693 /* Addressing mode "base + index << scale". */
4694 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4695 if (!ok_with_ratio_p)
4696 parts.step = NULL_TREE;
4697 }
4698 if (ok_with_ratio_p || ok_without_ratio_p)
4699 {
4700 if (maybe_ne (a: aff_inv->offset, b: 0))
4701 {
4702 parts.offset = wide_int_to_tree (sizetype, cst: aff_inv->offset);
4703 /* Addressing mode "base + index [<< scale] + offset". */
4704 if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4705 parts.offset = NULL_TREE;
4706 else
4707 aff_inv->offset = 0;
4708 }
4709
4710 move_fixed_address_to_symbol (&parts, aff_inv);
4711 /* Base is fixed address and is moved to symbol part. */
4712 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff: aff_inv))
4713 parts.base = NULL_TREE;
4714
4715 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4716 if (parts.symbol != NULL_TREE
4717 && !valid_mem_ref_p (mem_mode, as, &parts, code))
4718 {
4719 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4720 parts.symbol = NULL_TREE;
4721 /* Reset SIMPLE_INV since symbol address needs to be computed
4722 outside of address expression in this case. */
4723 simple_inv = false;
4724 /* Symbol part is moved back to base part, it can't be NULL. */
4725 parts.base = integer_one_node;
4726 }
4727 }
4728 else
4729 parts.index = NULL_TREE;
4730 }
4731 else
4732 {
4733 poly_int64 ainc_step;
4734 if (can_autoinc
4735 && ratio == 1
4736 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4737 {
4738 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4739
4740 if (stmt_after_increment (loop: data->current_loop, cand, stmt: use->stmt))
4741 ainc_offset += ainc_step;
4742 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4743 addr_mode, mem_mode, as, speed);
4744 if (!cost.infinite_cost_p ())
4745 {
4746 *can_autoinc = true;
4747 return cost;
4748 }
4749 cost = no_cost;
4750 }
4751 if (!aff_combination_zero_p (aff: aff_inv))
4752 {
4753 parts.offset = wide_int_to_tree (sizetype, cst: aff_inv->offset);
4754 /* Addressing mode "base + offset". */
4755 if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4756 parts.offset = NULL_TREE;
4757 else
4758 aff_inv->offset = 0;
4759 }
4760 }
4761
4762 if (simple_inv)
4763 simple_inv = (aff_inv == NULL
4764 || aff_combination_const_p (aff: aff_inv)
4765 || aff_combination_singleton_var_p (aff: aff_inv));
4766 if (!aff_combination_zero_p (aff: aff_inv))
4767 comp_inv = aff_combination_to_tree (aff_inv);
4768 if (comp_inv != NULL_TREE)
4769 cost = force_var_cost (data, expr: comp_inv, inv_vars);
4770 if (ratio != 1 && parts.step == NULL_TREE)
4771 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4772 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4773 var_cost += add_cost (speed, mode: addr_mode);
4774
4775 if (comp_inv && inv_expr && !simple_inv)
4776 {
4777 *inv_expr = get_loop_invariant_expr (data, inv_expr: comp_inv);
4778 /* Clear depends on. */
4779 if (*inv_expr != NULL && inv_vars && *inv_vars)
4780 bitmap_clear (*inv_vars);
4781
4782 /* Cost of small invariant expression adjusted against loop niters
4783 is usually zero, which makes it difficult to be differentiated
4784 from candidate based on loop invariant variables. Secondly, the
4785 generated invariant expression may not be hoisted out of loop by
4786 following pass. We penalize the cost by rounding up in order to
4787 neutralize such effects. */
4788 cost.cost = adjust_setup_cost (data, cost: cost.cost, round_up_p: true);
4789 cost.scratch = cost.cost;
4790 }
4791
4792 cost += var_cost;
4793 addr = addr_for_mem_ref (&parts, as, false);
4794 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4795 cost += address_cost (addr, mem_mode, as, speed);
4796
4797 if (parts.symbol != NULL_TREE)
4798 cost.complexity += 1;
4799 /* Don't increase the complexity of adding a scaled index if it's
4800 the only kind of index that the target allows. */
4801 if (parts.step != NULL_TREE && ok_without_ratio_p)
4802 cost.complexity += 1;
4803 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4804 cost.complexity += 1;
4805 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4806 cost.complexity += 1;
4807
4808 return cost;
4809}
4810
4811/* Scale (multiply) the computed COST (except scratch part that should be
4812 hoisted out a loop) by header->frequency / AT->frequency, which makes
4813 expected cost more accurate. */
4814
4815static comp_cost
4816get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4817{
4818 if (data->speed
4819 && data->current_loop->header->count.to_frequency (cfun) > 0)
4820 {
4821 basic_block bb = gimple_bb (g: at);
4822 gcc_assert (cost.scratch <= cost.cost);
4823 int scale_factor = (int)(intptr_t) bb->aux;
4824 if (scale_factor == 1)
4825 return cost;
4826
4827 int64_t scaled_cost
4828 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4829
4830 if (dump_file && (dump_flags & TDF_DETAILS))
4831 fprintf (stream: dump_file, format: "Scaling cost based on bb prob by %2.2f: "
4832 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4833 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4834
4835 cost.cost = scaled_cost;
4836 }
4837
4838 return cost;
4839}
4840
4841/* Determines the cost of the computation by that USE is expressed
4842 from induction variable CAND. If ADDRESS_P is true, we just need
4843 to create an address from it, otherwise we want to get it into
4844 register. A set of invariants we depend on is stored in INV_VARS.
4845 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4846 addressing is likely. If INV_EXPR is nonnull, record invariant
4847 expr entry in it. */
4848
4849static comp_cost
4850get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4851 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4852 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4853{
4854 gimple *at = use->stmt;
4855 tree ubase = use->iv->base, cbase = cand->iv->base;
4856 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4857 tree comp_inv = NULL_TREE;
4858 HOST_WIDE_INT ratio, aratio;
4859 comp_cost cost;
4860 widest_int rat;
4861 aff_tree aff_inv, aff_var;
4862 bool speed = optimize_bb_for_speed_p (gimple_bb (g: at));
4863
4864 if (inv_vars)
4865 *inv_vars = NULL;
4866 if (can_autoinc)
4867 *can_autoinc = false;
4868 if (inv_expr)
4869 *inv_expr = NULL;
4870
4871 /* Check if we have enough precision to express the values of use. */
4872 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4873 return infinite_cost;
4874
4875 if (address_p
4876 || (use->iv->base_object
4877 && cand->iv->base_object
4878 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4879 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4880 {
4881 /* Do not try to express address of an object with computation based
4882 on address of a different object. This may cause problems in rtl
4883 level alias analysis (that does not expect this to be happening,
4884 as this is illegal in C), and would be unlikely to be useful
4885 anyway. */
4886 if (use->iv->base_object
4887 && cand->iv->base_object
4888 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, flags: 0))
4889 return infinite_cost;
4890 }
4891
4892 if (!get_computation_aff_1 (data, at, use, cand, aff_inv: &aff_inv, aff_var: &aff_var, prat: &rat)
4893 || !wi::fits_shwi_p (x: rat))
4894 return infinite_cost;
4895
4896 ratio = rat.to_shwi ();
4897 if (address_p)
4898 {
4899 cost = get_address_cost (data, use, cand, aff_inv: &aff_inv, aff_var: &aff_var, ratio,
4900 inv_vars, inv_expr, can_autoinc, speed);
4901 cost = get_scaled_computation_cost_at (data, at, cost);
4902 /* For doloop IV cand, add on the extra cost. */
4903 cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4904 return cost;
4905 }
4906
4907 bool simple_inv = (aff_combination_const_p (aff: &aff_inv)
4908 || aff_combination_singleton_var_p (aff: &aff_inv));
4909 tree signed_type = signed_type_for (aff_combination_type (aff: &aff_inv));
4910 aff_combination_convert (&aff_inv, signed_type);
4911 if (!aff_combination_zero_p (aff: &aff_inv))
4912 comp_inv = aff_combination_to_tree (&aff_inv);
4913
4914 cost = force_var_cost (data, expr: comp_inv, inv_vars);
4915 if (comp_inv && inv_expr && !simple_inv)
4916 {
4917 *inv_expr = get_loop_invariant_expr (data, inv_expr: comp_inv);
4918 /* Clear depends on. */
4919 if (*inv_expr != NULL && inv_vars && *inv_vars)
4920 bitmap_clear (*inv_vars);
4921
4922 cost.cost = adjust_setup_cost (data, cost: cost.cost);
4923 /* Record setup cost in scratch field. */
4924 cost.scratch = cost.cost;
4925 }
4926 /* Cost of constant integer can be covered when adding invariant part to
4927 variant part. */
4928 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4929 cost = no_cost;
4930
4931 /* Need type narrowing to represent use with cand. */
4932 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4933 {
4934 machine_mode outer_mode = TYPE_MODE (utype);
4935 machine_mode inner_mode = TYPE_MODE (ctype);
4936 cost += comp_cost (convert_cost (to_mode: outer_mode, from_mode: inner_mode, speed), 0);
4937 }
4938
4939 /* Turn a + i * (-c) into a - i * c. */
4940 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4941 aratio = -ratio;
4942 else
4943 aratio = ratio;
4944
4945 if (ratio != 1)
4946 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4947
4948 /* TODO: We may also need to check if we can compute a + i * 4 in one
4949 instruction. */
4950 /* Need to add up the invariant and variant parts. */
4951 if (comp_inv && !integer_zerop (comp_inv))
4952 cost += add_cost (speed, TYPE_MODE (utype));
4953
4954 cost = get_scaled_computation_cost_at (data, at, cost);
4955
4956 /* For doloop IV cand, add on the extra cost. */
4957 if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4958 cost += targetm.doloop_cost_for_generic;
4959
4960 return cost;
4961}
4962
4963/* Determines cost of computing the use in GROUP with CAND in a generic
4964 expression. */
4965
4966static bool
4967determine_group_iv_cost_generic (struct ivopts_data *data,
4968 struct iv_group *group, struct iv_cand *cand)
4969{
4970 comp_cost cost;
4971 iv_inv_expr_ent *inv_expr = NULL;
4972 bitmap inv_vars = NULL, inv_exprs = NULL;
4973 struct iv_use *use = group->vuses[0];
4974
4975 /* The simple case first -- if we need to express value of the preserved
4976 original biv, the cost is 0. This also prevents us from counting the
4977 cost of increment twice -- once at this use and once in the cost of
4978 the candidate. */
4979 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4980 cost = no_cost;
4981 /* If the IV candidate involves undefined SSA values and is not the
4982 same IV as on the USE avoid using that candidate here. */
4983 else if (cand->involves_undefs
4984 && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, flags: 0)))
4985 return false;
4986 else
4987 cost = get_computation_cost (data, use, cand, address_p: false,
4988 inv_vars: &inv_vars, NULL, inv_expr: &inv_expr);
4989
4990 if (inv_expr)
4991 {
4992 inv_exprs = BITMAP_ALLOC (NULL);
4993 bitmap_set_bit (inv_exprs, inv_expr->id);
4994 }
4995 set_group_iv_cost (data, group, cand, cost, inv_vars,
4996 NULL_TREE, comp: ERROR_MARK, inv_exprs);
4997 return !cost.infinite_cost_p ();
4998}
4999
5000/* Determines cost of computing uses in GROUP with CAND in addresses. */
5001
5002static bool
5003determine_group_iv_cost_address (struct ivopts_data *data,
5004 struct iv_group *group, struct iv_cand *cand)
5005{
5006 unsigned i;
5007 bitmap inv_vars = NULL, inv_exprs = NULL;
5008 bool can_autoinc;
5009 iv_inv_expr_ent *inv_expr = NULL;
5010 struct iv_use *use = group->vuses[0];
5011 comp_cost sum_cost = no_cost, cost;
5012
5013 cost = get_computation_cost (data, use, cand, address_p: true,
5014 inv_vars: &inv_vars, can_autoinc: &can_autoinc, inv_expr: &inv_expr);
5015
5016 if (inv_expr)
5017 {
5018 inv_exprs = BITMAP_ALLOC (NULL);
5019 bitmap_set_bit (inv_exprs, inv_expr->id);
5020 }
5021 sum_cost = cost;
5022 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5023 {
5024 if (can_autoinc)
5025 sum_cost -= cand->cost_step;
5026 /* If we generated the candidate solely for exploiting autoincrement
5027 opportunities, and it turns out it can't be used, set the cost to
5028 infinity to make sure we ignore it. */
5029 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5030 sum_cost = infinite_cost;
5031 }
5032
5033 /* Compute and add costs for rest uses of this group. */
5034 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5035 {
5036 struct iv_use *next = group->vuses[i];
5037
5038 /* TODO: We could skip computing cost for sub iv_use when it has the
5039 same cost as the first iv_use, but the cost really depends on the
5040 offset and where the iv_use is. */
5041 cost = get_computation_cost (data, use: next, cand, address_p: true,
5042 NULL, can_autoinc: &can_autoinc, inv_expr: &inv_expr);
5043 if (inv_expr)
5044 {
5045 if (!inv_exprs)
5046 inv_exprs = BITMAP_ALLOC (NULL);
5047
5048 /* Uses in a group can share setup code,
5049 so only add setup cost once. */
5050 if (bitmap_bit_p (inv_exprs, inv_expr->id))
5051 cost -= cost.scratch;
5052 else
5053 bitmap_set_bit (inv_exprs, inv_expr->id);
5054 }
5055 sum_cost += cost;
5056 }
5057 set_group_iv_cost (data, group, cand, cost: sum_cost, inv_vars,
5058 NULL_TREE, comp: ERROR_MARK, inv_exprs);
5059
5060 return !sum_cost.infinite_cost_p ();
5061}
5062
5063/* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5064 and stores it to VAL. */
5065
5066static void
5067cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5068 class tree_niter_desc *desc, aff_tree *val)
5069{
5070 aff_tree step, delta, nit;
5071 struct iv *iv = cand->iv;
5072 tree type = TREE_TYPE (iv->base);
5073 tree niter = desc->niter;
5074 bool after_adjust = stmt_after_increment (loop, cand, stmt: at);
5075 tree steptype;
5076
5077 if (POINTER_TYPE_P (type))
5078 steptype = sizetype;
5079 else
5080 steptype = unsigned_type_for (type);
5081
5082 /* If AFTER_ADJUST is required, the code below generates the equivalent
5083 of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5084 BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5085 SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5086 doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5087 class for common idioms that we know are safe. */
5088 if (after_adjust
5089 && desc->control.no_overflow
5090 && integer_onep (desc->control.step)
5091 && (desc->cmp == LT_EXPR
5092 || desc->cmp == NE_EXPR)
5093 && TREE_CODE (desc->bound) == SSA_NAME)
5094 {
5095 if (integer_onep (desc->control.base))
5096 {
5097 niter = desc->bound;
5098 after_adjust = false;
5099 }
5100 else if (TREE_CODE (niter) == MINUS_EXPR
5101 && integer_onep (TREE_OPERAND (niter, 1)))
5102 {
5103 niter = TREE_OPERAND (niter, 0);
5104 after_adjust = false;
5105 }
5106 }
5107
5108 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5109 aff_combination_convert (&step, steptype);
5110 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5111 aff_combination_convert (&nit, steptype);
5112 aff_combination_mult (&nit, &step, &delta);
5113 if (after_adjust)
5114 aff_combination_add (&delta, &step);
5115
5116 tree_to_aff_combination (iv->base, type, val);
5117 if (!POINTER_TYPE_P (type))
5118 aff_combination_convert (val, steptype);
5119 aff_combination_add (val, &delta);
5120}
5121
5122/* Returns period of induction variable iv. */
5123
5124static tree
5125iv_period (struct iv *iv)
5126{
5127 tree step = iv->step, period, type;
5128 tree pow2div;
5129
5130 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5131
5132 type = unsigned_type_for (TREE_TYPE (step));
5133 /* Period of the iv is lcm (step, type_range)/step -1,
5134 i.e., N*type_range/step - 1. Since type range is power
5135 of two, N == (step >> num_of_ending_zeros_binary (step),
5136 so the final result is
5137
5138 (type_range >> num_of_ending_zeros_binary (step)) - 1
5139
5140 */
5141 pow2div = num_ending_zeros (step);
5142
5143 period = build_low_bits_mask (type,
5144 (TYPE_PRECISION (type)
5145 - tree_to_uhwi (pow2div)));
5146
5147 return period;
5148}
5149
5150/* Returns the comparison operator used when eliminating the iv USE. */
5151
5152static enum tree_code
5153iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5154{
5155 class loop *loop = data->current_loop;
5156 basic_block ex_bb;
5157 edge exit;
5158
5159 ex_bb = gimple_bb (g: use->stmt);
5160 exit = EDGE_SUCC (ex_bb, 0);
5161 if (flow_bb_inside_loop_p (loop, exit->dest))
5162 exit = EDGE_SUCC (ex_bb, 1);
5163
5164 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5165}
5166
5167/* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5168 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5169 calculation is performed in non-wrapping type.
5170
5171 TODO: More generally, we could test for the situation that
5172 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5173 This would require knowing the sign of OFFSET. */
5174
5175static bool
5176difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5177{
5178 enum tree_code code;
5179 tree e1, e2;
5180 aff_tree aff_e1, aff_e2, aff_offset;
5181
5182 if (!nowrap_type_p (TREE_TYPE (base)))
5183 return false;
5184
5185 base = expand_simple_operations (base);
5186
5187 if (TREE_CODE (base) == SSA_NAME)
5188 {
5189 gimple *stmt = SSA_NAME_DEF_STMT (base);
5190
5191 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
5192 return false;
5193
5194 code = gimple_assign_rhs_code (gs: stmt);
5195 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5196 return false;
5197
5198 e1 = gimple_assign_rhs1 (gs: stmt);
5199 e2 = gimple_assign_rhs2 (gs: stmt);
5200 }
5201 else
5202 {
5203 code = TREE_CODE (base);
5204 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5205 return false;
5206 e1 = TREE_OPERAND (base, 0);
5207 e2 = TREE_OPERAND (base, 1);
5208 }
5209
5210 /* Use affine expansion as deeper inspection to prove the equality. */
5211 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5212 &aff_e2, &data->name_expansion_cache);
5213 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5214 &aff_offset, &data->name_expansion_cache);
5215 aff_combination_scale (&aff_offset, -1);
5216 switch (code)
5217 {
5218 case PLUS_EXPR:
5219 aff_combination_add (&aff_e2, &aff_offset);
5220 if (aff_combination_zero_p (aff: &aff_e2))
5221 return true;
5222
5223 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5224 &aff_e1, &data->name_expansion_cache);
5225 aff_combination_add (&aff_e1, &aff_offset);
5226 return aff_combination_zero_p (aff: &aff_e1);
5227
5228 case POINTER_PLUS_EXPR:
5229 aff_combination_add (&aff_e2, &aff_offset);
5230 return aff_combination_zero_p (aff: &aff_e2);
5231
5232 default:
5233 return false;
5234 }
5235}
5236
5237/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5238 comparison with CAND. NITER describes the number of iterations of
5239 the loops. If successful, the comparison in COMP_P is altered accordingly.
5240
5241 We aim to handle the following situation:
5242
5243 sometype *base, *p;
5244 int a, b, i;
5245
5246 i = a;
5247 p = p_0 = base + a;
5248
5249 do
5250 {
5251 bla (*p);
5252 p++;
5253 i++;
5254 }
5255 while (i < b);
5256
5257 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5258 We aim to optimize this to
5259
5260 p = p_0 = base + a;
5261 do
5262 {
5263 bla (*p);
5264 p++;
5265 }
5266 while (p < p_0 - a + b);
5267
5268 This preserves the correctness, since the pointer arithmetics does not
5269 overflow. More precisely:
5270
5271 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5272 overflow in computing it or the values of p.
5273 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5274 overflow. To prove this, we use the fact that p_0 = base + a. */
5275
5276static bool
5277iv_elimination_compare_lt (struct ivopts_data *data,
5278 struct iv_cand *cand, enum tree_code *comp_p,
5279 class tree_niter_desc *niter)
5280{
5281 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5282 class aff_tree nit, tmpa, tmpb;
5283 enum tree_code comp;
5284 HOST_WIDE_INT step;
5285
5286 /* We need to know that the candidate induction variable does not overflow.
5287 While more complex analysis may be used to prove this, for now just
5288 check that the variable appears in the original program and that it
5289 is computed in a type that guarantees no overflows. */
5290 cand_type = TREE_TYPE (cand->iv->base);
5291 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5292 return false;
5293
5294 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5295 the calculation of the BOUND could overflow, making the comparison
5296 invalid. */
5297 if (!data->loop_single_exit_p)
5298 return false;
5299
5300 /* We need to be able to decide whether candidate is increasing or decreasing
5301 in order to choose the right comparison operator. */
5302 if (!cst_and_fits_in_hwi (cand->iv->step))
5303 return false;
5304 step = int_cst_value (cand->iv->step);
5305
5306 /* Check that the number of iterations matches the expected pattern:
5307 a + 1 > b ? 0 : b - a - 1. */
5308 mbz = niter->may_be_zero;
5309 if (TREE_CODE (mbz) == GT_EXPR)
5310 {
5311 /* Handle a + 1 > b. */
5312 tree op0 = TREE_OPERAND (mbz, 0);
5313 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5314 {
5315 a = TREE_OPERAND (op0, 0);
5316 b = TREE_OPERAND (mbz, 1);
5317 }
5318 else
5319 return false;
5320 }
5321 else if (TREE_CODE (mbz) == LT_EXPR)
5322 {
5323 tree op1 = TREE_OPERAND (mbz, 1);
5324
5325 /* Handle b < a + 1. */
5326 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5327 {
5328 a = TREE_OPERAND (op1, 0);
5329 b = TREE_OPERAND (mbz, 0);
5330 }
5331 else
5332 return false;
5333 }
5334 else
5335 return false;
5336
5337 /* Expected number of iterations is B - A - 1. Check that it matches
5338 the actual number, i.e., that B - A - NITER = 1. */
5339 tree_to_aff_combination (niter->niter, nit_type, &nit);
5340 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5341 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5342 aff_combination_scale (&nit, -1);
5343 aff_combination_scale (&tmpa, -1);
5344 aff_combination_add (&tmpb, &tmpa);
5345 aff_combination_add (&tmpb, &nit);
5346 if (tmpb.n != 0 || maybe_ne (a: tmpb.offset, b: 1))
5347 return false;
5348
5349 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5350 overflow. */
5351 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5352 cand->iv->step,
5353 fold_convert (TREE_TYPE (cand->iv->step), a));
5354 if (!difference_cannot_overflow_p (data, base: cand->iv->base, offset))
5355 return false;
5356
5357 /* Determine the new comparison operator. */
5358 comp = step < 0 ? GT_EXPR : LT_EXPR;
5359 if (*comp_p == NE_EXPR)
5360 *comp_p = comp;
5361 else if (*comp_p == EQ_EXPR)
5362 *comp_p = invert_tree_comparison (comp, false);
5363 else
5364 gcc_unreachable ();
5365
5366 return true;
5367}
5368
5369/* Check whether it is possible to express the condition in USE by comparison
5370 of candidate CAND. If so, store the value compared with to BOUND, and the
5371 comparison operator to COMP. */
5372
5373static bool
5374may_eliminate_iv (struct ivopts_data *data,
5375 struct iv_use *use, struct iv_cand *cand, tree *bound,
5376 enum tree_code *comp)
5377{
5378 basic_block ex_bb;
5379 edge exit;
5380 tree period;
5381 class loop *loop = data->current_loop;
5382 aff_tree bnd;
5383 class tree_niter_desc *desc = NULL;
5384
5385 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5386 return false;
5387
5388 /* For now works only for exits that dominate the loop latch.
5389 TODO: extend to other conditions inside loop body. */
5390 ex_bb = gimple_bb (g: use->stmt);
5391 if (use->stmt != last_nondebug_stmt (ex_bb)
5392 || gimple_code (g: use->stmt) != GIMPLE_COND
5393 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5394 return false;
5395
5396 exit = EDGE_SUCC (ex_bb, 0);
5397 if (flow_bb_inside_loop_p (loop, exit->dest))
5398 exit = EDGE_SUCC (ex_bb, 1);
5399 if (flow_bb_inside_loop_p (loop, exit->dest))
5400 return false;
5401
5402 desc = niter_for_exit (data, exit);
5403 if (!desc)
5404 return false;
5405
5406 /* Determine whether we can use the variable to test the exit condition.
5407 This is the case iff the period of the induction variable is greater
5408 than the number of iterations for which the exit condition is true. */
5409 period = iv_period (iv: cand->iv);
5410
5411 /* If the number of iterations is constant, compare against it directly. */
5412 if (TREE_CODE (desc->niter) == INTEGER_CST)
5413 {
5414 /* See cand_value_at. */
5415 if (stmt_after_increment (loop, cand, stmt: use->stmt))
5416 {
5417 if (!tree_int_cst_lt (t1: desc->niter, t2: period))
5418 return false;
5419 }
5420 else
5421 {
5422 if (tree_int_cst_lt (t1: period, t2: desc->niter))
5423 return false;
5424 }
5425 }
5426
5427 /* If not, and if this is the only possible exit of the loop, see whether
5428 we can get a conservative estimate on the number of iterations of the
5429 entire loop and compare against that instead. */
5430 else
5431 {
5432 widest_int period_value, max_niter;
5433
5434 max_niter = desc->max;
5435 if (stmt_after_increment (loop, cand, stmt: use->stmt))
5436 max_niter += 1;
5437 period_value = wi::to_widest (t: period);
5438 if (wi::gtu_p (x: max_niter, y: period_value))
5439 {
5440 /* See if we can take advantage of inferred loop bound
5441 information. */
5442 if (data->loop_single_exit_p)
5443 {
5444 if (!max_loop_iterations (loop, &max_niter))
5445 return false;
5446 /* The loop bound is already adjusted by adding 1. */
5447 if (wi::gtu_p (x: max_niter, y: period_value))
5448 return false;
5449 }
5450 else
5451 return false;
5452 }
5453 }
5454
5455 /* For doloop IV cand, the bound would be zero. It's safe whether
5456 may_be_zero set or not. */
5457 if (cand->doloop_p)
5458 {
5459 *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5460 *comp = iv_elimination_compare (data, use);
5461 return true;
5462 }
5463
5464 cand_value_at (loop, cand, at: use->stmt, desc, val: &bnd);
5465
5466 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5467 aff_combination_to_tree (&bnd));
5468 *comp = iv_elimination_compare (data, use);
5469
5470 /* It is unlikely that computing the number of iterations using division
5471 would be more profitable than keeping the original induction variable. */
5472 bool cond_overflow_p;
5473 if (expression_expensive_p (*bound, &cond_overflow_p))
5474 return false;
5475
5476 /* Sometimes, it is possible to handle the situation that the number of
5477 iterations may be zero unless additional assumptions by using <
5478 instead of != in the exit condition.
5479
5480 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5481 base the exit condition on it. However, that is often too
5482 expensive. */
5483 if (!integer_zerop (desc->may_be_zero))
5484 return iv_elimination_compare_lt (data, cand, comp_p: comp, niter: desc);
5485
5486 return true;
5487}
5488
5489 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5490 be copied, if it is used in the loop body and DATA->body_includes_call. */
5491
5492static int
5493parm_decl_cost (struct ivopts_data *data, tree bound)
5494{
5495 tree sbound = bound;
5496 STRIP_NOPS (sbound);
5497
5498 if (TREE_CODE (sbound) == SSA_NAME
5499 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5500 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5501 && data->body_includes_call)
5502 return COSTS_N_INSNS (1);
5503
5504 return 0;
5505}
5506
5507/* Determines cost of computing the use in GROUP with CAND in a condition. */
5508
5509static bool
5510determine_group_iv_cost_cond (struct ivopts_data *data,
5511 struct iv_group *group, struct iv_cand *cand)
5512{
5513 tree bound = NULL_TREE;
5514 struct iv *cmp_iv;
5515 bitmap inv_exprs = NULL;
5516 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5517 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5518 enum comp_iv_rewrite rewrite_type;
5519 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5520 tree *control_var, *bound_cst;
5521 enum tree_code comp = ERROR_MARK;
5522 struct iv_use *use = group->vuses[0];
5523
5524 /* Extract condition operands. */
5525 rewrite_type = extract_cond_operands (data, stmt: use->stmt, control_var: &control_var,
5526 bound: &bound_cst, NULL, iv_bound: &cmp_iv);
5527 gcc_assert (rewrite_type != COMP_IV_NA);
5528
5529 /* Try iv elimination. */
5530 if (rewrite_type == COMP_IV_ELIM
5531 && may_eliminate_iv (data, use, cand, bound: &bound, comp: &comp))
5532 {
5533 elim_cost = force_var_cost (data, expr: bound, inv_vars: &inv_vars_elim);
5534 if (elim_cost.cost == 0)
5535 elim_cost.cost = parm_decl_cost (data, bound);
5536 else if (TREE_CODE (bound) == INTEGER_CST)
5537 elim_cost.cost = 0;
5538 /* If we replace a loop condition 'i < n' with 'p < base + n',
5539 inv_vars_elim will have 'base' and 'n' set, which implies that both
5540 'base' and 'n' will be live during the loop. More likely,
5541 'base + n' will be loop invariant, resulting in only one live value
5542 during the loop. So in that case we clear inv_vars_elim and set
5543 inv_expr_elim instead. */
5544 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5545 {
5546 inv_expr_elim = get_loop_invariant_expr (data, inv_expr: bound);
5547 bitmap_clear (inv_vars_elim);
5548 }
5549 /* The bound is a loop invariant, so it will be only computed
5550 once. */
5551 elim_cost.cost = adjust_setup_cost (data, cost: elim_cost.cost);
5552 }
5553
5554 /* When the condition is a comparison of the candidate IV against
5555 zero, prefer this IV.
5556
5557 TODO: The constant that we're subtracting from the cost should
5558 be target-dependent. This information should be added to the
5559 target costs for each backend. */
5560 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5561 && integer_zerop (*bound_cst)
5562 && (operand_equal_p (*control_var, cand->var_after, flags: 0)
5563 || operand_equal_p (*control_var, cand->var_before, flags: 0)))
5564 elim_cost -= 1;
5565
5566 express_cost = get_computation_cost (data, use, cand, address_p: false,
5567 inv_vars: &inv_vars_express, NULL,
5568 inv_expr: &inv_expr_express);
5569 if (cmp_iv != NULL)
5570 find_inv_vars (data, expr_p: &cmp_iv->base, inv_vars: &inv_vars_express);
5571
5572 /* Count the cost of the original bound as well. */
5573 bound_cost = force_var_cost (data, expr: *bound_cst, NULL);
5574 if (bound_cost.cost == 0)
5575 bound_cost.cost = parm_decl_cost (data, bound: *bound_cst);
5576 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5577 bound_cost.cost = 0;
5578 express_cost += bound_cost;
5579
5580 /* Choose the better approach, preferring the eliminated IV. */
5581 if (elim_cost <= express_cost)
5582 {
5583 cost = elim_cost;
5584 inv_vars = inv_vars_elim;
5585 inv_vars_elim = NULL;
5586 inv_expr = inv_expr_elim;
5587 /* For doloop candidate/use pair, adjust to zero cost. */
5588 if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5589 cost = no_cost;
5590 }
5591 else
5592 {
5593 cost = express_cost;
5594 inv_vars = inv_vars_express;
5595 inv_vars_express = NULL;
5596 bound = NULL_TREE;
5597 comp = ERROR_MARK;
5598 inv_expr = inv_expr_express;
5599 }
5600
5601 if (inv_expr)
5602 {
5603 inv_exprs = BITMAP_ALLOC (NULL);
5604 bitmap_set_bit (inv_exprs, inv_expr->id);
5605 }
5606 set_group_iv_cost (data, group, cand, cost,
5607 inv_vars, value: bound, comp, inv_exprs);
5608
5609 if (inv_vars_elim)
5610 BITMAP_FREE (inv_vars_elim);
5611 if (inv_vars_express)
5612 BITMAP_FREE (inv_vars_express);
5613
5614 return !cost.infinite_cost_p ();
5615}
5616
5617/* Determines cost of computing uses in GROUP with CAND. Returns false
5618 if USE cannot be represented with CAND. */
5619
5620static bool
5621determine_group_iv_cost (struct ivopts_data *data,
5622 struct iv_group *group, struct iv_cand *cand)
5623{
5624 switch (group->type)
5625 {
5626 case USE_NONLINEAR_EXPR:
5627 return determine_group_iv_cost_generic (data, group, cand);
5628
5629 case USE_REF_ADDRESS:
5630 case USE_PTR_ADDRESS:
5631 return determine_group_iv_cost_address (data, group, cand);
5632
5633 case USE_COMPARE:
5634 return determine_group_iv_cost_cond (data, group, cand);
5635
5636 default:
5637 gcc_unreachable ();
5638 }
5639}
5640
5641/* Return true if get_computation_cost indicates that autoincrement is
5642 a possibility for the pair of USE and CAND, false otherwise. */
5643
5644static bool
5645autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5646 struct iv_cand *cand)
5647{
5648 if (!address_p (type: use->type))
5649 return false;
5650
5651 bool can_autoinc = false;
5652 get_computation_cost (data, use, cand, address_p: true, NULL, can_autoinc: &can_autoinc, NULL);
5653 return can_autoinc;
5654}
5655
5656/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5657 use that allows autoincrement, and set their AINC_USE if possible. */
5658
5659static void
5660set_autoinc_for_original_candidates (struct ivopts_data *data)
5661{
5662 unsigned i, j;
5663
5664 for (i = 0; i < data->vcands.length (); i++)
5665 {
5666 struct iv_cand *cand = data->vcands[i];
5667 struct iv_use *closest_before = NULL;
5668 struct iv_use *closest_after = NULL;
5669 if (cand->pos != IP_ORIGINAL)
5670 continue;
5671
5672 for (j = 0; j < data->vgroups.length (); j++)
5673 {
5674 struct iv_group *group = data->vgroups[j];
5675 struct iv_use *use = group->vuses[0];
5676 unsigned uid = gimple_uid (g: use->stmt);
5677
5678 if (gimple_bb (g: use->stmt) != gimple_bb (g: cand->incremented_at))
5679 continue;
5680
5681 if (uid < gimple_uid (g: cand->incremented_at)
5682 && (closest_before == NULL
5683 || uid > gimple_uid (g: closest_before->stmt)))
5684 closest_before = use;
5685
5686 if (uid > gimple_uid (g: cand->incremented_at)
5687 && (closest_after == NULL
5688 || uid < gimple_uid (g: closest_after->stmt)))
5689 closest_after = use;
5690 }
5691
5692 if (closest_before != NULL
5693 && autoinc_possible_for_pair (data, use: closest_before, cand))
5694 cand->ainc_use = closest_before;
5695 else if (closest_after != NULL
5696 && autoinc_possible_for_pair (data, use: closest_after, cand))
5697 cand->ainc_use = closest_after;
5698 }
5699}
5700
5701/* Relate compare use with all candidates. */
5702
5703static void
5704relate_compare_use_with_all_cands (struct ivopts_data *data)
5705{
5706 unsigned i, count = data->vcands.length ();
5707 for (i = 0; i < data->vgroups.length (); i++)
5708 {
5709 struct iv_group *group = data->vgroups[i];
5710
5711 if (group->type == USE_COMPARE)
5712 bitmap_set_range (group->related_cands, 0, count);
5713 }
5714}
5715
5716/* If PREFERRED_MODE is suitable and profitable, use the preferred
5717 PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5718
5719static tree
5720compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5721 const widest_int &iterations_max)
5722{
5723 tree ntype = TREE_TYPE (niter);
5724 tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5725 if (!pref_type)
5726 return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5727 build_int_cst (ntype, 1));
5728
5729 gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5730
5731 int prec = TYPE_PRECISION (ntype);
5732 int pref_prec = TYPE_PRECISION (pref_type);
5733
5734 tree base;
5735
5736 /* Check if the PREFERRED_MODED is able to present niter. */
5737 if (pref_prec > prec
5738 || wi::ltu_p (x: iterations_max,
5739 y: widest_int::from (x: wi::max_value (pref_prec, UNSIGNED),
5740 sgn: UNSIGNED)))
5741 {
5742 /* No wrap, it is safe to use preferred type after niter + 1. */
5743 if (wi::ltu_p (x: iterations_max,
5744 y: widest_int::from (x: wi::max_value (prec, UNSIGNED),
5745 sgn: UNSIGNED)))
5746 {
5747 /* This could help to optimize "-1 +1" pair when niter looks
5748 like "n-1": n is in original mode. "base = (n - 1) + 1"
5749 in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5750 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5751 build_int_cst (ntype, 1));
5752 base = fold_convert (pref_type, base);
5753 }
5754
5755 /* To avoid wrap, convert niter to preferred type before plus 1. */
5756 else
5757 {
5758 niter = fold_convert (pref_type, niter);
5759 base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5760 build_int_cst (pref_type, 1));
5761 }
5762 }
5763 else
5764 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5765 build_int_cst (ntype, 1));
5766 return base;
5767}
5768
5769/* Add one doloop dedicated IV candidate:
5770 - Base is (may_be_zero ? 1 : (niter + 1)).
5771 - Step is -1. */
5772
5773static void
5774add_iv_candidate_for_doloop (struct ivopts_data *data)
5775{
5776 tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5777 gcc_assert (niter_desc && niter_desc->assumptions);
5778
5779 tree niter = niter_desc->niter;
5780 tree ntype = TREE_TYPE (niter);
5781 gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5782
5783 tree may_be_zero = niter_desc->may_be_zero;
5784 if (may_be_zero && integer_zerop (may_be_zero))
5785 may_be_zero = NULL_TREE;
5786 if (may_be_zero)
5787 {
5788 if (COMPARISON_CLASS_P (may_be_zero))
5789 {
5790 niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5791 build_int_cst (ntype, 0),
5792 rewrite_to_non_trapping_overflow (niter));
5793 }
5794 /* Don't try to obtain the iteration count expression when may_be_zero is
5795 integer_nonzerop (actually iteration count is one) or else. */
5796 else
5797 return;
5798 }
5799
5800 machine_mode mode = TYPE_MODE (ntype);
5801 machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5802
5803 tree base;
5804 if (mode != pref_mode)
5805 {
5806 base = compute_doloop_base_on_mode (preferred_mode: pref_mode, niter, iterations_max: niter_desc->max);
5807 ntype = TREE_TYPE (base);
5808 }
5809 else
5810 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5811 build_int_cst (ntype, 1));
5812
5813
5814 add_candidate (data, base, step: build_int_cst (ntype, -1), important: true, NULL, NULL, doloop: true);
5815}
5816
5817/* Finds the candidates for the induction variables. */
5818
5819static void
5820find_iv_candidates (struct ivopts_data *data)
5821{
5822 /* Add commonly used ivs. */
5823 add_standard_iv_candidates (data);
5824
5825 /* Add doloop dedicated ivs. */
5826 if (data->doloop_use_p)
5827 add_iv_candidate_for_doloop (data);
5828
5829 /* Add old induction variables. */
5830 add_iv_candidate_for_bivs (data);
5831
5832 /* Add induction variables derived from uses. */
5833 add_iv_candidate_for_groups (data);
5834
5835 set_autoinc_for_original_candidates (data);
5836
5837 /* Record the important candidates. */
5838 record_important_candidates (data);
5839
5840 /* Relate compare iv_use with all candidates. */
5841 if (!data->consider_all_candidates)
5842 relate_compare_use_with_all_cands (data);
5843
5844 if (dump_file && (dump_flags & TDF_DETAILS))
5845 {
5846 unsigned i;
5847
5848 fprintf (stream: dump_file, format: "\n<Important Candidates>:\t");
5849 for (i = 0; i < data->vcands.length (); i++)
5850 if (data->vcands[i]->important)
5851 fprintf (stream: dump_file, format: " %d,", data->vcands[i]->id);
5852 fprintf (stream: dump_file, format: "\n");
5853
5854 fprintf (stream: dump_file, format: "\n<Group, Cand> Related:\n");
5855 for (i = 0; i < data->vgroups.length (); i++)
5856 {
5857 struct iv_group *group = data->vgroups[i];
5858
5859 if (group->related_cands)
5860 {
5861 fprintf (stream: dump_file, format: " Group %d:\t", group->id);
5862 dump_bitmap (file: dump_file, map: group->related_cands);
5863 }
5864 }
5865 fprintf (stream: dump_file, format: "\n");
5866 }
5867}
5868
5869/* Determines costs of computing use of iv with an iv candidate. */
5870
5871static void
5872determine_group_iv_costs (struct ivopts_data *data)
5873{
5874 unsigned i, j;
5875 struct iv_cand *cand;
5876 struct iv_group *group;
5877 bitmap to_clear = BITMAP_ALLOC (NULL);
5878
5879 alloc_use_cost_map (data);
5880
5881 for (i = 0; i < data->vgroups.length (); i++)
5882 {
5883 group = data->vgroups[i];
5884
5885 if (data->consider_all_candidates)
5886 {
5887 for (j = 0; j < data->vcands.length (); j++)
5888 {
5889 cand = data->vcands[j];
5890 determine_group_iv_cost (data, group, cand);
5891 }
5892 }
5893 else
5894 {
5895 bitmap_iterator bi;
5896
5897 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5898 {
5899 cand = data->vcands[j];
5900 if (!determine_group_iv_cost (data, group, cand))
5901 bitmap_set_bit (to_clear, j);
5902 }
5903
5904 /* Remove the candidates for that the cost is infinite from
5905 the list of related candidates. */
5906 bitmap_and_compl_into (group->related_cands, to_clear);
5907 bitmap_clear (to_clear);
5908 }
5909 }
5910
5911 BITMAP_FREE (to_clear);
5912
5913 if (dump_file && (dump_flags & TDF_DETAILS))
5914 {
5915 bitmap_iterator bi;
5916
5917 /* Dump invariant variables. */
5918 fprintf (stream: dump_file, format: "\n<Invariant Vars>:\n");
5919 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5920 {
5921 struct version_info *info = ver_info (data, ver: i);
5922 if (info->inv_id)
5923 {
5924 fprintf (stream: dump_file, format: "Inv %d:\t", info->inv_id);
5925 print_generic_expr (dump_file, info->name, TDF_SLIM);
5926 fprintf (stream: dump_file, format: "%s\n",
5927 info->has_nonlin_use ? "" : "\t(eliminable)");
5928 }
5929 }
5930
5931 /* Dump invariant expressions. */
5932 fprintf (stream: dump_file, format: "\n<Invariant Expressions>:\n");
5933 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5934
5935 for (hash_table<iv_inv_expr_hasher>::iterator it
5936 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5937 ++it)
5938 list.safe_push (obj: *it);
5939
5940 list.qsort (sort_iv_inv_expr_ent);
5941
5942 for (i = 0; i < list.length (); ++i)
5943 {
5944 fprintf (stream: dump_file, format: "inv_expr %d: \t", list[i]->id);
5945 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5946 fprintf (stream: dump_file, format: "\n");
5947 }
5948
5949 fprintf (stream: dump_file, format: "\n<Group-candidate Costs>:\n");
5950
5951 for (i = 0; i < data->vgroups.length (); i++)
5952 {
5953 group = data->vgroups[i];
5954
5955 fprintf (stream: dump_file, format: "Group %d:\n", i);
5956 fprintf (stream: dump_file, format: " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5957 for (j = 0; j < group->n_map_members; j++)
5958 {
5959 if (!group->cost_map[j].cand
5960 || group->cost_map[j].cost.infinite_cost_p ())
5961 continue;
5962
5963 fprintf (stream: dump_file, format: " %d\t%" PRId64 "\t%d\t",
5964 group->cost_map[j].cand->id,
5965 group->cost_map[j].cost.cost,
5966 group->cost_map[j].cost.complexity);
5967 if (!group->cost_map[j].inv_exprs
5968 || bitmap_empty_p (map: group->cost_map[j].inv_exprs))
5969 fprintf (stream: dump_file, format: "NIL;\t");
5970 else
5971 bitmap_print (dump_file,
5972 group->cost_map[j].inv_exprs, "", ";\t");
5973 if (!group->cost_map[j].inv_vars
5974 || bitmap_empty_p (map: group->cost_map[j].inv_vars))
5975 fprintf (stream: dump_file, format: "NIL;\n");
5976 else
5977 bitmap_print (dump_file,
5978 group->cost_map[j].inv_vars, "", "\n");
5979 }
5980
5981 fprintf (stream: dump_file, format: "\n");
5982 }
5983 fprintf (stream: dump_file, format: "\n");
5984 }
5985}
5986
5987/* Determines cost of the candidate CAND. */
5988
5989static void
5990determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5991{
5992 comp_cost cost_base;
5993 int64_t cost, cost_step;
5994 tree base;
5995
5996 gcc_assert (cand->iv != NULL);
5997
5998 /* There are two costs associated with the candidate -- its increment
5999 and its initialization. The second is almost negligible for any loop
6000 that rolls enough, so we take it just very little into account. */
6001
6002 base = cand->iv->base;
6003 cost_base = force_var_cost (data, expr: base, NULL);
6004 /* It will be exceptional that the iv register happens to be initialized with
6005 the proper value at no cost. In general, there will at least be a regcopy
6006 or a const set. */
6007 if (cost_base.cost == 0)
6008 cost_base.cost = COSTS_N_INSNS (1);
6009 /* Doloop decrement should be considered as zero cost. */
6010 if (cand->doloop_p)
6011 cost_step = 0;
6012 else
6013 cost_step = add_cost (speed: data->speed, TYPE_MODE (TREE_TYPE (base)));
6014 cost = cost_step + adjust_setup_cost (data, cost: cost_base.cost);
6015
6016 /* Prefer the original ivs unless we may gain something by replacing it.
6017 The reason is to make debugging simpler; so this is not relevant for
6018 artificial ivs created by other optimization passes. */
6019 if ((cand->pos != IP_ORIGINAL
6020 || !SSA_NAME_VAR (cand->var_before)
6021 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6022 /* Prefer doloop as well. */
6023 && !cand->doloop_p)
6024 cost++;
6025
6026 /* Prefer not to insert statements into latch unless there are some
6027 already (so that we do not create unnecessary jumps). */
6028 if (cand->pos == IP_END
6029 && empty_block_p (ip_end_pos (data->current_loop)))
6030 cost++;
6031
6032 cand->cost = cost;
6033 cand->cost_step = cost_step;
6034}
6035
6036/* Determines costs of computation of the candidates. */
6037
6038static void
6039determine_iv_costs (struct ivopts_data *data)
6040{
6041 unsigned i;
6042
6043 if (dump_file && (dump_flags & TDF_DETAILS))
6044 {
6045 fprintf (stream: dump_file, format: "<Candidate Costs>:\n");
6046 fprintf (stream: dump_file, format: " cand\tcost\n");
6047 }
6048
6049 for (i = 0; i < data->vcands.length (); i++)
6050 {
6051 struct iv_cand *cand = data->vcands[i];
6052
6053 determine_iv_cost (data, cand);
6054
6055 if (dump_file && (dump_flags & TDF_DETAILS))
6056 fprintf (stream: dump_file, format: " %d\t%d\n", i, cand->cost);
6057 }
6058
6059 if (dump_file && (dump_flags & TDF_DETAILS))
6060 fprintf (stream: dump_file, format: "\n");
6061}
6062
6063/* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6064 induction variables. Note N_INVS includes both invariant variables and
6065 invariant expressions. */
6066
6067static unsigned
6068ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6069 unsigned n_cands)
6070{
6071 unsigned cost;
6072 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6073 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6074 bool speed = data->speed;
6075
6076 /* If there is a call in the loop body, the call-clobbered registers
6077 are not available for loop invariants. */
6078 if (data->body_includes_call)
6079 available_regs = available_regs - target_clobbered_regs;
6080
6081 /* If we have enough registers. */
6082 if (regs_needed + target_res_regs < available_regs)
6083 cost = n_new;
6084 /* If close to running out of registers, try to preserve them. */
6085 else if (regs_needed <= available_regs)
6086 cost = target_reg_cost [speed] * regs_needed;
6087 /* If we run out of available registers but the number of candidates
6088 does not, we penalize extra registers using target_spill_cost. */
6089 else if (n_cands <= available_regs)
6090 cost = target_reg_cost [speed] * available_regs
6091 + target_spill_cost [speed] * (regs_needed - available_regs);
6092 /* If the number of candidates runs out available registers, we penalize
6093 extra candidate registers using target_spill_cost * 2. Because it is
6094 more expensive to spill induction variable than invariant. */
6095 else
6096 cost = target_reg_cost [speed] * available_regs
6097 + target_spill_cost [speed] * (n_cands - available_regs) * 2
6098 + target_spill_cost [speed] * (regs_needed - n_cands);
6099
6100 /* Finally, add the number of candidates, so that we prefer eliminating
6101 induction variables if possible. */
6102 return cost + n_cands;
6103}
6104
6105/* For each size of the induction variable set determine the penalty. */
6106
6107static void
6108determine_set_costs (struct ivopts_data *data)
6109{
6110 unsigned j, n;
6111 gphi *phi;
6112 gphi_iterator psi;
6113 tree op;
6114 class loop *loop = data->current_loop;
6115 bitmap_iterator bi;
6116
6117 if (dump_file && (dump_flags & TDF_DETAILS))
6118 {
6119 fprintf (stream: dump_file, format: "<Global Costs>:\n");
6120 fprintf (stream: dump_file, format: " target_avail_regs %d\n", target_avail_regs);
6121 fprintf (stream: dump_file, format: " target_clobbered_regs %d\n", target_clobbered_regs);
6122 fprintf (stream: dump_file, format: " target_reg_cost %d\n", target_reg_cost[data->speed]);
6123 fprintf (stream: dump_file, format: " target_spill_cost %d\n", target_spill_cost[data->speed]);
6124 }
6125
6126 n = 0;
6127 for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
6128 {
6129 phi = psi.phi ();
6130 op = PHI_RESULT (phi);
6131
6132 if (virtual_operand_p (op))
6133 continue;
6134
6135 if (get_iv (data, var: op))
6136 continue;
6137
6138 if (!POINTER_TYPE_P (TREE_TYPE (op))
6139 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6140 continue;
6141
6142 n++;
6143 }
6144
6145 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6146 {
6147 struct version_info *info = ver_info (data, ver: j);
6148
6149 if (info->inv_id && info->has_nonlin_use)
6150 n++;
6151 }
6152
6153 data->regs_used = n;
6154 if (dump_file && (dump_flags & TDF_DETAILS))
6155 fprintf (stream: dump_file, format: " regs_used %d\n", n);
6156
6157 if (dump_file && (dump_flags & TDF_DETAILS))
6158 {
6159 fprintf (stream: dump_file, format: " cost for size:\n");
6160 fprintf (stream: dump_file, format: " ivs\tcost\n");
6161 for (j = 0; j <= 2 * target_avail_regs; j++)
6162 fprintf (stream: dump_file, format: " %d\t%d\n", j,
6163 ivopts_estimate_reg_pressure (data, n_invs: 0, n_cands: j));
6164 fprintf (stream: dump_file, format: "\n");
6165 }
6166}
6167
6168/* Returns true if A is a cheaper cost pair than B. */
6169
6170static bool
6171cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6172{
6173 if (!a)
6174 return false;
6175
6176 if (!b)
6177 return true;
6178
6179 if (a->cost < b->cost)
6180 return true;
6181
6182 if (b->cost < a->cost)
6183 return false;
6184
6185 /* In case the costs are the same, prefer the cheaper candidate. */
6186 if (a->cand->cost < b->cand->cost)
6187 return true;
6188
6189 return false;
6190}
6191
6192/* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6193 for more expensive, equal and cheaper respectively. */
6194
6195static int
6196compare_cost_pair (class cost_pair *a, class cost_pair *b)
6197{
6198 if (cheaper_cost_pair (a, b))
6199 return -1;
6200 if (cheaper_cost_pair (a: b, b: a))
6201 return 1;
6202
6203 return 0;
6204}
6205
6206/* Returns candidate by that USE is expressed in IVS. */
6207
6208static class cost_pair *
6209iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6210{
6211 return ivs->cand_for_group[group->id];
6212}
6213
6214/* Computes the cost field of IVS structure. */
6215
6216static void
6217iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6218{
6219 comp_cost cost = ivs->cand_use_cost;
6220
6221 cost += ivs->cand_cost;
6222 cost += ivopts_estimate_reg_pressure (data, n_invs: ivs->n_invs, n_cands: ivs->n_cands);
6223 ivs->cost = cost;
6224}
6225
6226/* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6227 and IVS. */
6228
6229static void
6230iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6231{
6232 bitmap_iterator bi;
6233 unsigned iid;
6234
6235 if (!invs)
6236 return;
6237
6238 gcc_assert (n_inv_uses != NULL);
6239 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6240 {
6241 n_inv_uses[iid]--;
6242 if (n_inv_uses[iid] == 0)
6243 ivs->n_invs--;
6244 }
6245}
6246
6247/* Set USE not to be expressed by any candidate in IVS. */
6248
6249static void
6250iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6251 struct iv_group *group)
6252{
6253 unsigned gid = group->id, cid;
6254 class cost_pair *cp;
6255
6256 cp = ivs->cand_for_group[gid];
6257 if (!cp)
6258 return;
6259 cid = cp->cand->id;
6260
6261 ivs->bad_groups++;
6262 ivs->cand_for_group[gid] = NULL;
6263 ivs->n_cand_uses[cid]--;
6264
6265 if (ivs->n_cand_uses[cid] == 0)
6266 {
6267 bitmap_clear_bit (ivs->cands, cid);
6268 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6269 ivs->n_cands--;
6270 ivs->cand_cost -= cp->cand->cost;
6271 iv_ca_set_remove_invs (ivs, invs: cp->cand->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6272 iv_ca_set_remove_invs (ivs, invs: cp->cand->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6273 }
6274
6275 ivs->cand_use_cost -= cp->cost;
6276 iv_ca_set_remove_invs (ivs, invs: cp->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6277 iv_ca_set_remove_invs (ivs, invs: cp->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6278 iv_ca_recount_cost (data, ivs);
6279}
6280
6281/* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6282 IVS. */
6283
6284static void
6285iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6286{
6287 bitmap_iterator bi;
6288 unsigned iid;
6289
6290 if (!invs)
6291 return;
6292
6293 gcc_assert (n_inv_uses != NULL);
6294 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6295 {
6296 n_inv_uses[iid]++;
6297 if (n_inv_uses[iid] == 1)
6298 ivs->n_invs++;
6299 }
6300}
6301
6302/* Set cost pair for GROUP in set IVS to CP. */
6303
6304static void
6305iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6306 struct iv_group *group, class cost_pair *cp)
6307{
6308 unsigned gid = group->id, cid;
6309
6310 if (ivs->cand_for_group[gid] == cp)
6311 return;
6312
6313 if (ivs->cand_for_group[gid])
6314 iv_ca_set_no_cp (data, ivs, group);
6315
6316 if (cp)
6317 {
6318 cid = cp->cand->id;
6319
6320 ivs->bad_groups--;
6321 ivs->cand_for_group[gid] = cp;
6322 ivs->n_cand_uses[cid]++;
6323 if (ivs->n_cand_uses[cid] == 1)
6324 {
6325 bitmap_set_bit (ivs->cands, cid);
6326 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6327 ivs->n_cands++;
6328 ivs->cand_cost += cp->cand->cost;
6329 iv_ca_set_add_invs (ivs, invs: cp->cand->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6330 iv_ca_set_add_invs (ivs, invs: cp->cand->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6331 }
6332
6333 ivs->cand_use_cost += cp->cost;
6334 iv_ca_set_add_invs (ivs, invs: cp->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6335 iv_ca_set_add_invs (ivs, invs: cp->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6336 iv_ca_recount_cost (data, ivs);
6337 }
6338}
6339
6340/* Extend set IVS by expressing USE by some of the candidates in it
6341 if possible. Consider all important candidates if candidates in
6342 set IVS don't give any result. */
6343
6344static void
6345iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6346 struct iv_group *group)
6347{
6348 class cost_pair *best_cp = NULL, *cp;
6349 bitmap_iterator bi;
6350 unsigned i;
6351 struct iv_cand *cand;
6352
6353 gcc_assert (ivs->upto >= group->id);
6354 ivs->upto++;
6355 ivs->bad_groups++;
6356
6357 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6358 {
6359 cand = data->vcands[i];
6360 cp = get_group_iv_cost (data, group, cand);
6361 if (cheaper_cost_pair (a: cp, b: best_cp))
6362 best_cp = cp;
6363 }
6364
6365 if (best_cp == NULL)
6366 {
6367 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6368 {
6369 cand = data->vcands[i];
6370 cp = get_group_iv_cost (data, group, cand);
6371 if (cheaper_cost_pair (a: cp, b: best_cp))
6372 best_cp = cp;
6373 }
6374 }
6375
6376 iv_ca_set_cp (data, ivs, group, cp: best_cp);
6377}
6378
6379/* Get cost for assignment IVS. */
6380
6381static comp_cost
6382iv_ca_cost (class iv_ca *ivs)
6383{
6384 /* This was a conditional expression but it triggered a bug in
6385 Sun C 5.5. */
6386 if (ivs->bad_groups)
6387 return infinite_cost;
6388 else
6389 return ivs->cost;
6390}
6391
6392/* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6393 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6394 respectively. */
6395
6396static int
6397iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6398 struct iv_group *group, class cost_pair *old_cp,
6399 class cost_pair *new_cp)
6400{
6401 gcc_assert (old_cp && new_cp && old_cp != new_cp);
6402 unsigned old_n_invs = ivs->n_invs;
6403 iv_ca_set_cp (data, ivs, group, cp: new_cp);
6404 unsigned new_n_invs = ivs->n_invs;
6405 iv_ca_set_cp (data, ivs, group, cp: old_cp);
6406
6407 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6408}
6409
6410/* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6411 it before NEXT. */
6412
6413static struct iv_ca_delta *
6414iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6415 class cost_pair *new_cp, struct iv_ca_delta *next)
6416{
6417 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6418
6419 change->group = group;
6420 change->old_cp = old_cp;
6421 change->new_cp = new_cp;
6422 change->next = next;
6423
6424 return change;
6425}
6426
6427/* Joins two lists of changes L1 and L2. Destructive -- old lists
6428 are rewritten. */
6429
6430static struct iv_ca_delta *
6431iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6432{
6433 struct iv_ca_delta *last;
6434
6435 if (!l2)
6436 return l1;
6437
6438 if (!l1)
6439 return l2;
6440
6441 for (last = l1; last->next; last = last->next)
6442 continue;
6443 last->next = l2;
6444
6445 return l1;
6446}
6447
6448/* Reverse the list of changes DELTA, forming the inverse to it. */
6449
6450static struct iv_ca_delta *
6451iv_ca_delta_reverse (struct iv_ca_delta *delta)
6452{
6453 struct iv_ca_delta *act, *next, *prev = NULL;
6454
6455 for (act = delta; act; act = next)
6456 {
6457 next = act->next;
6458 act->next = prev;
6459 prev = act;
6460
6461 std::swap (a&: act->old_cp, b&: act->new_cp);
6462 }
6463
6464 return prev;
6465}
6466
6467/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6468 reverted instead. */
6469
6470static void
6471iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6472 struct iv_ca_delta *delta, bool forward)
6473{
6474 class cost_pair *from, *to;
6475 struct iv_ca_delta *act;
6476
6477 if (!forward)
6478 delta = iv_ca_delta_reverse (delta);
6479
6480 for (act = delta; act; act = act->next)
6481 {
6482 from = act->old_cp;
6483 to = act->new_cp;
6484 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6485 iv_ca_set_cp (data, ivs, group: act->group, cp: to);
6486 }
6487
6488 if (!forward)
6489 iv_ca_delta_reverse (delta);
6490}
6491
6492/* Returns true if CAND is used in IVS. */
6493
6494static bool
6495iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6496{
6497 return ivs->n_cand_uses[cand->id] > 0;
6498}
6499
6500/* Returns number of induction variable candidates in the set IVS. */
6501
6502static unsigned
6503iv_ca_n_cands (class iv_ca *ivs)
6504{
6505 return ivs->n_cands;
6506}
6507
6508/* Free the list of changes DELTA. */
6509
6510static void
6511iv_ca_delta_free (struct iv_ca_delta **delta)
6512{
6513 struct iv_ca_delta *act, *next;
6514
6515 for (act = *delta; act; act = next)
6516 {
6517 next = act->next;
6518 free (ptr: act);
6519 }
6520
6521 *delta = NULL;
6522}
6523
6524/* Allocates new iv candidates assignment. */
6525
6526static class iv_ca *
6527iv_ca_new (struct ivopts_data *data)
6528{
6529 class iv_ca *nw = XNEW (class iv_ca);
6530
6531 nw->upto = 0;
6532 nw->bad_groups = 0;
6533 nw->cand_for_group = XCNEWVEC (class cost_pair *,
6534 data->vgroups.length ());
6535 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6536 nw->cands = BITMAP_ALLOC (NULL);
6537 nw->n_cands = 0;
6538 nw->n_invs = 0;
6539 nw->cand_use_cost = no_cost;
6540 nw->cand_cost = 0;
6541 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6542 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6543 nw->cost = no_cost;
6544
6545 return nw;
6546}
6547
6548/* Free memory occupied by the set IVS. */
6549
6550static void
6551iv_ca_free (class iv_ca **ivs)
6552{
6553 free (ptr: (*ivs)->cand_for_group);
6554 free (ptr: (*ivs)->n_cand_uses);
6555 BITMAP_FREE ((*ivs)->cands);
6556 free (ptr: (*ivs)->n_inv_var_uses);
6557 free (ptr: (*ivs)->n_inv_expr_uses);
6558 free (ptr: *ivs);
6559 *ivs = NULL;
6560}
6561
6562/* Dumps IVS to FILE. */
6563
6564static void
6565iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6566{
6567 unsigned i;
6568 comp_cost cost = iv_ca_cost (ivs);
6569
6570 fprintf (stream: file, format: " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6571 cost.complexity);
6572 fprintf (stream: file, format: " reg_cost: %d\n",
6573 ivopts_estimate_reg_pressure (data, n_invs: ivs->n_invs, n_cands: ivs->n_cands));
6574 fprintf (stream: file, format: " cand_cost: %" PRId64 "\n cand_group_cost: "
6575 "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6576 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6577 bitmap_print (file, ivs->cands, " candidates: ","\n");
6578
6579 for (i = 0; i < ivs->upto; i++)
6580 {
6581 struct iv_group *group = data->vgroups[i];
6582 class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6583 if (cp)
6584 fprintf (stream: file, format: " group:%d --> iv_cand:%d, cost=("
6585 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6586 cp->cost.cost, cp->cost.complexity);
6587 else
6588 fprintf (stream: file, format: " group:%d --> ??\n", group->id);
6589 }
6590
6591 const char *pref = "";
6592 fprintf (stream: file, format: " invariant variables: ");
6593 for (i = 1; i <= data->max_inv_var_id; i++)
6594 if (ivs->n_inv_var_uses[i])
6595 {
6596 fprintf (stream: file, format: "%s%d", pref, i);
6597 pref = ", ";
6598 }
6599
6600 pref = "";
6601 fprintf (stream: file, format: "\n invariant expressions: ");
6602 for (i = 1; i <= data->max_inv_expr_id; i++)
6603 if (ivs->n_inv_expr_uses[i])
6604 {
6605 fprintf (stream: file, format: "%s%d", pref, i);
6606 pref = ", ";
6607 }
6608
6609 fprintf (stream: file, format: "\n\n");
6610}
6611
6612/* Try changing candidate in IVS to CAND for each use. Return cost of the
6613 new set, and store differences in DELTA. Number of induction variables
6614 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6615 the function will try to find a solution with mimimal iv candidates. */
6616
6617static comp_cost
6618iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6619 struct iv_cand *cand, struct iv_ca_delta **delta,
6620 unsigned *n_ivs, bool min_ncand)
6621{
6622 unsigned i;
6623 comp_cost cost;
6624 struct iv_group *group;
6625 class cost_pair *old_cp, *new_cp;
6626
6627 *delta = NULL;
6628 for (i = 0; i < ivs->upto; i++)
6629 {
6630 group = data->vgroups[i];
6631 old_cp = iv_ca_cand_for_group (ivs, group);
6632
6633 if (old_cp
6634 && old_cp->cand == cand)
6635 continue;
6636
6637 new_cp = get_group_iv_cost (data, group, cand);
6638 if (!new_cp)
6639 continue;
6640
6641 if (!min_ncand)
6642 {
6643 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6644 /* Skip if new_cp depends on more invariants. */
6645 if (cmp_invs > 0)
6646 continue;
6647
6648 int cmp_cost = compare_cost_pair (a: new_cp, b: old_cp);
6649 /* Skip if new_cp is not cheaper. */
6650 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6651 continue;
6652 }
6653
6654 *delta = iv_ca_delta_add (group, old_cp, new_cp, next: *delta);
6655 }
6656
6657 iv_ca_delta_commit (data, ivs, delta: *delta, forward: true);
6658 cost = iv_ca_cost (ivs);
6659 if (n_ivs)
6660 *n_ivs = iv_ca_n_cands (ivs);
6661 iv_ca_delta_commit (data, ivs, delta: *delta, forward: false);
6662
6663 return cost;
6664}
6665
6666/* Try narrowing set IVS by removing CAND. Return the cost of
6667 the new set and store the differences in DELTA. START is
6668 the candidate with which we start narrowing. */
6669
6670static comp_cost
6671iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6672 struct iv_cand *cand, struct iv_cand *start,
6673 struct iv_ca_delta **delta)
6674{
6675 unsigned i, ci;
6676 struct iv_group *group;
6677 class cost_pair *old_cp, *new_cp, *cp;
6678 bitmap_iterator bi;
6679 struct iv_cand *cnd;
6680 comp_cost cost, best_cost, acost;
6681
6682 *delta = NULL;
6683 for (i = 0; i < data->vgroups.length (); i++)
6684 {
6685 group = data->vgroups[i];
6686
6687 old_cp = iv_ca_cand_for_group (ivs, group);
6688 if (old_cp->cand != cand)
6689 continue;
6690
6691 best_cost = iv_ca_cost (ivs);
6692 /* Start narrowing with START. */
6693 new_cp = get_group_iv_cost (data, group, cand: start);
6694
6695 if (data->consider_all_candidates)
6696 {
6697 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6698 {
6699 if (ci == cand->id || (start && ci == start->id))
6700 continue;
6701
6702 cnd = data->vcands[ci];
6703
6704 cp = get_group_iv_cost (data, group, cand: cnd);
6705 if (!cp)
6706 continue;
6707
6708 iv_ca_set_cp (data, ivs, group, cp);
6709 acost = iv_ca_cost (ivs);
6710
6711 if (acost < best_cost)
6712 {
6713 best_cost = acost;
6714 new_cp = cp;
6715 }
6716 }
6717 }
6718 else
6719 {
6720 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6721 {
6722 if (ci == cand->id || (start && ci == start->id))
6723 continue;
6724
6725 cnd = data->vcands[ci];
6726
6727 cp = get_group_iv_cost (data, group, cand: cnd);
6728 if (!cp)
6729 continue;
6730
6731 iv_ca_set_cp (data, ivs, group, cp);
6732 acost = iv_ca_cost (ivs);
6733
6734 if (acost < best_cost)
6735 {
6736 best_cost = acost;
6737 new_cp = cp;
6738 }
6739 }
6740 }
6741 /* Restore to old cp for use. */
6742 iv_ca_set_cp (data, ivs, group, cp: old_cp);
6743
6744 if (!new_cp)
6745 {
6746 iv_ca_delta_free (delta);
6747 return infinite_cost;
6748 }
6749
6750 *delta = iv_ca_delta_add (group, old_cp, new_cp, next: *delta);
6751 }
6752
6753 iv_ca_delta_commit (data, ivs, delta: *delta, forward: true);
6754 cost = iv_ca_cost (ivs);
6755 iv_ca_delta_commit (data, ivs, delta: *delta, forward: false);
6756
6757 return cost;
6758}
6759
6760/* Try optimizing the set of candidates IVS by removing candidates different
6761 from to EXCEPT_CAND from it. Return cost of the new set, and store
6762 differences in DELTA. */
6763
6764static comp_cost
6765iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6766 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6767{
6768 bitmap_iterator bi;
6769 struct iv_ca_delta *act_delta, *best_delta;
6770 unsigned i;
6771 comp_cost best_cost, acost;
6772 struct iv_cand *cand;
6773
6774 best_delta = NULL;
6775 best_cost = iv_ca_cost (ivs);
6776
6777 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6778 {
6779 cand = data->vcands[i];
6780
6781 if (cand == except_cand)
6782 continue;
6783
6784 acost = iv_ca_narrow (data, ivs, cand, start: except_cand, delta: &act_delta);
6785
6786 if (acost < best_cost)
6787 {
6788 best_cost = acost;
6789 iv_ca_delta_free (delta: &best_delta);
6790 best_delta = act_delta;
6791 }
6792 else
6793 iv_ca_delta_free (delta: &act_delta);
6794 }
6795
6796 if (!best_delta)
6797 {
6798 *delta = NULL;
6799 return best_cost;
6800 }
6801
6802 /* Recurse to possibly remove other unnecessary ivs. */
6803 iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
6804 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6805 iv_ca_delta_commit (data, ivs, delta: best_delta, forward: false);
6806 *delta = iv_ca_delta_join (l1: best_delta, l2: *delta);
6807 return best_cost;
6808}
6809
6810/* Check if CAND_IDX is a candidate other than OLD_CAND and has
6811 cheaper local cost for GROUP than BEST_CP. Return pointer to
6812 the corresponding cost_pair, otherwise just return BEST_CP. */
6813
6814static class cost_pair*
6815cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6816 unsigned int cand_idx, struct iv_cand *old_cand,
6817 class cost_pair *best_cp)
6818{
6819 struct iv_cand *cand;
6820 class cost_pair *cp;
6821
6822 gcc_assert (old_cand != NULL && best_cp != NULL);
6823 if (cand_idx == old_cand->id)
6824 return best_cp;
6825
6826 cand = data->vcands[cand_idx];
6827 cp = get_group_iv_cost (data, group, cand);
6828 if (cp != NULL && cheaper_cost_pair (a: cp, b: best_cp))
6829 return cp;
6830
6831 return best_cp;
6832}
6833
6834/* Try breaking local optimal fixed-point for IVS by replacing candidates
6835 which are used by more than one iv uses. For each of those candidates,
6836 this function tries to represent iv uses under that candidate using
6837 other ones with lower local cost, then tries to prune the new set.
6838 If the new set has lower cost, It returns the new cost after recording
6839 candidate replacement in list DELTA. */
6840
6841static comp_cost
6842iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6843 struct iv_ca_delta **delta)
6844{
6845 bitmap_iterator bi, bj;
6846 unsigned int i, j, k;
6847 struct iv_cand *cand;
6848 comp_cost orig_cost, acost;
6849 struct iv_ca_delta *act_delta, *tmp_delta;
6850 class cost_pair *old_cp, *best_cp = NULL;
6851
6852 *delta = NULL;
6853 orig_cost = iv_ca_cost (ivs);
6854
6855 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6856 {
6857 if (ivs->n_cand_uses[i] == 1
6858 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6859 continue;
6860
6861 cand = data->vcands[i];
6862
6863 act_delta = NULL;
6864 /* Represent uses under current candidate using other ones with
6865 lower local cost. */
6866 for (j = 0; j < ivs->upto; j++)
6867 {
6868 struct iv_group *group = data->vgroups[j];
6869 old_cp = iv_ca_cand_for_group (ivs, group);
6870
6871 if (old_cp->cand != cand)
6872 continue;
6873
6874 best_cp = old_cp;
6875 if (data->consider_all_candidates)
6876 for (k = 0; k < data->vcands.length (); k++)
6877 best_cp = cheaper_cost_with_cand (data, group, cand_idx: k,
6878 old_cand: old_cp->cand, best_cp);
6879 else
6880 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6881 best_cp = cheaper_cost_with_cand (data, group, cand_idx: k,
6882 old_cand: old_cp->cand, best_cp);
6883
6884 if (best_cp == old_cp)
6885 continue;
6886
6887 act_delta = iv_ca_delta_add (group, old_cp, new_cp: best_cp, next: act_delta);
6888 }
6889 /* No need for further prune. */
6890 if (!act_delta)
6891 continue;
6892
6893 /* Prune the new candidate set. */
6894 iv_ca_delta_commit (data, ivs, delta: act_delta, forward: true);
6895 acost = iv_ca_prune (data, ivs, NULL, delta: &tmp_delta);
6896 iv_ca_delta_commit (data, ivs, delta: act_delta, forward: false);
6897 act_delta = iv_ca_delta_join (l1: act_delta, l2: tmp_delta);
6898
6899 if (acost < orig_cost)
6900 {
6901 *delta = act_delta;
6902 return acost;
6903 }
6904 else
6905 iv_ca_delta_free (delta: &act_delta);
6906 }
6907
6908 return orig_cost;
6909}
6910
6911/* Tries to extend the sets IVS in the best possible way in order to
6912 express the GROUP. If ORIGINALP is true, prefer candidates from
6913 the original set of IVs, otherwise favor important candidates not
6914 based on any memory object. */
6915
6916static bool
6917try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6918 struct iv_group *group, bool originalp)
6919{
6920 comp_cost best_cost, act_cost;
6921 unsigned i;
6922 bitmap_iterator bi;
6923 struct iv_cand *cand;
6924 struct iv_ca_delta *best_delta = NULL, *act_delta;
6925 class cost_pair *cp;
6926
6927 iv_ca_add_group (data, ivs, group);
6928 best_cost = iv_ca_cost (ivs);
6929 cp = iv_ca_cand_for_group (ivs, group);
6930 if (cp)
6931 {
6932 best_delta = iv_ca_delta_add (group, NULL, new_cp: cp, NULL);
6933 iv_ca_set_no_cp (data, ivs, group);
6934 }
6935
6936 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6937 first try important candidates not based on any memory object. Only if
6938 this fails, try the specific ones. Rationale -- in loops with many
6939 variables the best choice often is to use just one generic biv. If we
6940 added here many ivs specific to the uses, the optimization algorithm later
6941 would be likely to get stuck in a local minimum, thus causing us to create
6942 too many ivs. The approach from few ivs to more seems more likely to be
6943 successful -- starting from few ivs, replacing an expensive use by a
6944 specific iv should always be a win. */
6945 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6946 {
6947 cand = data->vcands[i];
6948
6949 if (originalp && cand->pos !=IP_ORIGINAL)
6950 continue;
6951
6952 if (!originalp && cand->iv->base_object != NULL_TREE)
6953 continue;
6954
6955 if (iv_ca_cand_used_p (ivs, cand))
6956 continue;
6957
6958 cp = get_group_iv_cost (data, group, cand);
6959 if (!cp)
6960 continue;
6961
6962 iv_ca_set_cp (data, ivs, group, cp);
6963 act_cost = iv_ca_extend (data, ivs, cand, delta: &act_delta, NULL,
6964 min_ncand: true);
6965 iv_ca_set_no_cp (data, ivs, group);
6966 act_delta = iv_ca_delta_add (group, NULL, new_cp: cp, next: act_delta);
6967
6968 if (act_cost < best_cost)
6969 {
6970 best_cost = act_cost;
6971
6972 iv_ca_delta_free (delta: &best_delta);
6973 best_delta = act_delta;
6974 }
6975 else
6976 iv_ca_delta_free (delta: &act_delta);
6977 }
6978
6979 if (best_cost.infinite_cost_p ())
6980 {
6981 for (i = 0; i < group->n_map_members; i++)
6982 {
6983 cp = group->cost_map + i;
6984 cand = cp->cand;
6985 if (!cand)
6986 continue;
6987
6988 /* Already tried this. */
6989 if (cand->important)
6990 {
6991 if (originalp && cand->pos == IP_ORIGINAL)
6992 continue;
6993 if (!originalp && cand->iv->base_object == NULL_TREE)
6994 continue;
6995 }
6996
6997 if (iv_ca_cand_used_p (ivs, cand))
6998 continue;
6999
7000 act_delta = NULL;
7001 iv_ca_set_cp (data, ivs, group, cp);
7002 act_cost = iv_ca_extend (data, ivs, cand, delta: &act_delta, NULL, min_ncand: true);
7003 iv_ca_set_no_cp (data, ivs, group);
7004 act_delta = iv_ca_delta_add (group,
7005 old_cp: iv_ca_cand_for_group (ivs, group),
7006 new_cp: cp, next: act_delta);
7007
7008 if (act_cost < best_cost)
7009 {
7010 best_cost = act_cost;
7011
7012 if (best_delta)
7013 iv_ca_delta_free (delta: &best_delta);
7014 best_delta = act_delta;
7015 }
7016 else
7017 iv_ca_delta_free (delta: &act_delta);
7018 }
7019 }
7020
7021 iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
7022 iv_ca_delta_free (delta: &best_delta);
7023
7024 return !best_cost.infinite_cost_p ();
7025}
7026
7027/* Finds an initial assignment of candidates to uses. */
7028
7029static class iv_ca *
7030get_initial_solution (struct ivopts_data *data, bool originalp)
7031{
7032 unsigned i;
7033 class iv_ca *ivs = iv_ca_new (data);
7034
7035 for (i = 0; i < data->vgroups.length (); i++)
7036 if (!try_add_cand_for (data, ivs, group: data->vgroups[i], originalp))
7037 {
7038 iv_ca_free (ivs: &ivs);
7039 return NULL;
7040 }
7041
7042 return ivs;
7043}
7044
7045/* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7046 points to a bool variable, this function tries to break local
7047 optimal fixed-point by replacing candidates in IVS if it's true. */
7048
7049static bool
7050try_improve_iv_set (struct ivopts_data *data,
7051 class iv_ca *ivs, bool *try_replace_p)
7052{
7053 unsigned i, n_ivs;
7054 comp_cost acost, best_cost = iv_ca_cost (ivs);
7055 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7056 struct iv_cand *cand;
7057
7058 /* Try extending the set of induction variables by one. */
7059 for (i = 0; i < data->vcands.length (); i++)
7060 {
7061 cand = data->vcands[i];
7062
7063 if (iv_ca_cand_used_p (ivs, cand))
7064 continue;
7065
7066 acost = iv_ca_extend (data, ivs, cand, delta: &act_delta, n_ivs: &n_ivs, min_ncand: false);
7067 if (!act_delta)
7068 continue;
7069
7070 /* If we successfully added the candidate and the set is small enough,
7071 try optimizing it by removing other candidates. */
7072 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7073 {
7074 iv_ca_delta_commit (data, ivs, delta: act_delta, forward: true);
7075 acost = iv_ca_prune (data, ivs, except_cand: cand, delta: &tmp_delta);
7076 iv_ca_delta_commit (data, ivs, delta: act_delta, forward: false);
7077 act_delta = iv_ca_delta_join (l1: act_delta, l2: tmp_delta);
7078 }
7079
7080 if (acost < best_cost)
7081 {
7082 best_cost = acost;
7083 iv_ca_delta_free (delta: &best_delta);
7084 best_delta = act_delta;
7085 }
7086 else
7087 iv_ca_delta_free (delta: &act_delta);
7088 }
7089
7090 if (!best_delta)
7091 {
7092 /* Try removing the candidates from the set instead. */
7093 best_cost = iv_ca_prune (data, ivs, NULL, delta: &best_delta);
7094
7095 if (!best_delta && *try_replace_p)
7096 {
7097 *try_replace_p = false;
7098 /* So far candidate selecting algorithm tends to choose fewer IVs
7099 so that it can handle cases in which loops have many variables
7100 but the best choice is often to use only one general biv. One
7101 weakness is it can't handle opposite cases, in which different
7102 candidates should be chosen with respect to each use. To solve
7103 the problem, we replace candidates in a manner described by the
7104 comments of iv_ca_replace, thus give general algorithm a chance
7105 to break local optimal fixed-point in these cases. */
7106 best_cost = iv_ca_replace (data, ivs, delta: &best_delta);
7107 }
7108
7109 if (!best_delta)
7110 return false;
7111 }
7112
7113 iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
7114 iv_ca_delta_free (delta: &best_delta);
7115 return best_cost == iv_ca_cost (ivs);
7116}
7117
7118/* Attempts to find the optimal set of induction variables. We do simple
7119 greedy heuristic -- we try to replace at most one candidate in the selected
7120 solution and remove the unused ivs while this improves the cost. */
7121
7122static class iv_ca *
7123find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7124{
7125 class iv_ca *set;
7126 bool try_replace_p = true;
7127
7128 /* Get the initial solution. */
7129 set = get_initial_solution (data, originalp);
7130 if (!set)
7131 {
7132 if (dump_file && (dump_flags & TDF_DETAILS))
7133 fprintf (stream: dump_file, format: "Unable to substitute for ivs, failed.\n");
7134 return NULL;
7135 }
7136
7137 if (dump_file && (dump_flags & TDF_DETAILS))
7138 {
7139 fprintf (stream: dump_file, format: "Initial set of candidates:\n");
7140 iv_ca_dump (data, file: dump_file, ivs: set);
7141 }
7142
7143 while (try_improve_iv_set (data, ivs: set, try_replace_p: &try_replace_p))
7144 {
7145 if (dump_file && (dump_flags & TDF_DETAILS))
7146 {
7147 fprintf (stream: dump_file, format: "Improved to:\n");
7148 iv_ca_dump (data, file: dump_file, ivs: set);
7149 }
7150 }
7151
7152 /* If the set has infinite_cost, it can't be optimal. */
7153 if (iv_ca_cost (ivs: set).infinite_cost_p ())
7154 {
7155 if (dump_file && (dump_flags & TDF_DETAILS))
7156 fprintf (stream: dump_file,
7157 format: "Overflow to infinite cost in try_improve_iv_set.\n");
7158 iv_ca_free (ivs: &set);
7159 }
7160 return set;
7161}
7162
7163static class iv_ca *
7164find_optimal_iv_set (struct ivopts_data *data)
7165{
7166 unsigned i;
7167 comp_cost cost, origcost;
7168 class iv_ca *set, *origset;
7169
7170 /* Determine the cost based on a strategy that starts with original IVs,
7171 and try again using a strategy that prefers candidates not based
7172 on any IVs. */
7173 origset = find_optimal_iv_set_1 (data, originalp: true);
7174 set = find_optimal_iv_set_1 (data, originalp: false);
7175
7176 if (!origset && !set)
7177 return NULL;
7178
7179 origcost = origset ? iv_ca_cost (ivs: origset) : infinite_cost;
7180 cost = set ? iv_ca_cost (ivs: set) : infinite_cost;
7181
7182 if (dump_file && (dump_flags & TDF_DETAILS))
7183 {
7184 fprintf (stream: dump_file, format: "Original cost %" PRId64 " (complexity %d)\n\n",
7185 origcost.cost, origcost.complexity);
7186 fprintf (stream: dump_file, format: "Final cost %" PRId64 " (complexity %d)\n\n",
7187 cost.cost, cost.complexity);
7188 }
7189
7190 /* Choose the one with the best cost. */
7191 if (origcost <= cost)
7192 {
7193 if (set)
7194 iv_ca_free (ivs: &set);
7195 set = origset;
7196 }
7197 else if (origset)
7198 iv_ca_free (ivs: &origset);
7199
7200 for (i = 0; i < data->vgroups.length (); i++)
7201 {
7202 struct iv_group *group = data->vgroups[i];
7203 group->selected = iv_ca_cand_for_group (ivs: set, group)->cand;
7204 }
7205
7206 return set;
7207}
7208
7209/* Creates a new induction variable corresponding to CAND. */
7210
7211static void
7212create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7213{
7214 gimple_stmt_iterator incr_pos;
7215 tree base;
7216 struct iv_use *use;
7217 struct iv_group *group;
7218 bool after = false;
7219
7220 gcc_assert (cand->iv != NULL);
7221
7222 switch (cand->pos)
7223 {
7224 case IP_NORMAL:
7225 incr_pos = gsi_last_bb (bb: ip_normal_pos (data->current_loop));
7226 break;
7227
7228 case IP_END:
7229 incr_pos = gsi_last_bb (bb: ip_end_pos (data->current_loop));
7230 after = true;
7231 gcc_assert (gsi_end_p (incr_pos) || !stmt_ends_bb_p (*incr_pos));
7232 break;
7233
7234 case IP_AFTER_USE:
7235 after = true;
7236 /* fall through */
7237 case IP_BEFORE_USE:
7238 incr_pos = gsi_for_stmt (cand->incremented_at);
7239 break;
7240
7241 case IP_ORIGINAL:
7242 /* Mark that the iv is preserved. */
7243 name_info (data, name: cand->var_before)->preserve_biv = true;
7244 name_info (data, name: cand->var_after)->preserve_biv = true;
7245
7246 /* Rewrite the increment so that it uses var_before directly. */
7247 use = find_interesting_uses_op (data, op: cand->var_after);
7248 group = data->vgroups[use->group_id];
7249 group->selected = cand;
7250 return;
7251 }
7252
7253 gimple_add_tmp_var (cand->var_before);
7254
7255 base = unshare_expr (cand->iv->base);
7256
7257 /* The step computation could invoke UB when the loop does not iterate.
7258 Avoid inserting it on the preheader in its native form but rewrite
7259 it to a well-defined form. This also helps masking SCEV issues
7260 which freely re-associates the IV computations when building up
7261 CHRECs without much regard for signed overflow invoking UB. */
7262 gimple_seq stmts = NULL;
7263 tree step = force_gimple_operand (unshare_expr (cand->iv->step), &stmts,
7264 true, NULL_TREE);
7265 if (stmts)
7266 {
7267 for (auto gsi = gsi_start (seq&: stmts); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
7268 if (gimple_needing_rewrite_undefined (gsi_stmt (i: gsi)))
7269 rewrite_to_defined_unconditional (&gsi);
7270 gsi_insert_seq_on_edge_immediate
7271 (loop_preheader_edge (data->current_loop), stmts);
7272 }
7273
7274 create_iv (base, PLUS_EXPR, step,
7275 cand->var_before, data->current_loop,
7276 &incr_pos, after, &cand->var_before, &cand->var_after);
7277}
7278
7279/* Creates new induction variables described in SET. */
7280
7281static void
7282create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7283{
7284 unsigned i;
7285 struct iv_cand *cand;
7286 bitmap_iterator bi;
7287
7288 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7289 {
7290 cand = data->vcands[i];
7291 create_new_iv (data, cand);
7292 }
7293
7294 if (dump_file && (dump_flags & TDF_DETAILS))
7295 {
7296 fprintf (stream: dump_file, format: "Selected IV set for loop %d",
7297 data->current_loop->num);
7298 if (data->loop_loc != UNKNOWN_LOCATION)
7299 fprintf (stream: dump_file, format: " at %s:%d", LOCATION_FILE (data->loop_loc),
7300 LOCATION_LINE (data->loop_loc));
7301 fprintf (stream: dump_file, format: ", " HOST_WIDE_INT_PRINT_UNSIGNED " avg niters",
7302 avg_loop_niter (loop: data->current_loop));
7303 fprintf (stream: dump_file, format: ", %lu IVs:\n", bitmap_count_bits (set->cands));
7304 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7305 {
7306 cand = data->vcands[i];
7307 dump_cand (file: dump_file, cand);
7308 }
7309 fprintf (stream: dump_file, format: "\n");
7310 }
7311}
7312
7313/* Rewrites USE (definition of iv used in a nonlinear expression)
7314 using candidate CAND. */
7315
7316static void
7317rewrite_use_nonlinear_expr (struct ivopts_data *data,
7318 struct iv_use *use, struct iv_cand *cand)
7319{
7320 gassign *ass;
7321 gimple_stmt_iterator bsi;
7322 tree comp, type = get_use_type (use), tgt;
7323
7324 /* An important special case -- if we are asked to express value of
7325 the original iv by itself, just exit; there is no need to
7326 introduce a new computation (that might also need casting the
7327 variable to unsigned and back). */
7328 if (cand->pos == IP_ORIGINAL
7329 && cand->incremented_at == use->stmt)
7330 {
7331 tree op = NULL_TREE;
7332 enum tree_code stmt_code;
7333
7334 gcc_assert (is_gimple_assign (use->stmt));
7335 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7336
7337 /* Check whether we may leave the computation unchanged.
7338 This is the case only if it does not rely on other
7339 computations in the loop -- otherwise, the computation
7340 we rely upon may be removed in remove_unused_ivs,
7341 thus leading to ICE. */
7342 stmt_code = gimple_assign_rhs_code (gs: use->stmt);
7343 if (stmt_code == PLUS_EXPR
7344 || stmt_code == MINUS_EXPR
7345 || stmt_code == POINTER_PLUS_EXPR)
7346 {
7347 if (gimple_assign_rhs1 (gs: use->stmt) == cand->var_before)
7348 op = gimple_assign_rhs2 (gs: use->stmt);
7349 else if (gimple_assign_rhs2 (gs: use->stmt) == cand->var_before)
7350 op = gimple_assign_rhs1 (gs: use->stmt);
7351 }
7352
7353 if (op != NULL_TREE)
7354 {
7355 if (expr_invariant_in_loop_p (loop: data->current_loop, expr: op))
7356 return;
7357 if (TREE_CODE (op) == SSA_NAME)
7358 {
7359 struct iv *iv = get_iv (data, var: op);
7360 if (iv != NULL && integer_zerop (iv->step))
7361 return;
7362 }
7363 }
7364 }
7365
7366 switch (gimple_code (g: use->stmt))
7367 {
7368 case GIMPLE_PHI:
7369 tgt = PHI_RESULT (use->stmt);
7370
7371 /* If we should keep the biv, do not replace it. */
7372 if (name_info (data, name: tgt)->preserve_biv)
7373 return;
7374
7375 bsi = gsi_after_labels (bb: gimple_bb (g: use->stmt));
7376 break;
7377
7378 case GIMPLE_ASSIGN:
7379 tgt = gimple_assign_lhs (gs: use->stmt);
7380 bsi = gsi_for_stmt (use->stmt);
7381 break;
7382
7383 default:
7384 gcc_unreachable ();
7385 }
7386
7387 aff_tree aff_inv, aff_var;
7388 if (!get_computation_aff_1 (data, at: use->stmt, use, cand, aff_inv: &aff_inv, aff_var: &aff_var))
7389 gcc_unreachable ();
7390
7391 unshare_aff_combination (&aff_inv);
7392 unshare_aff_combination (&aff_var);
7393 /* Prefer CSE opportunity than loop invariant by adding offset at last
7394 so that iv_uses have different offsets can be CSEed. */
7395 poly_widest_int offset = aff_inv.offset;
7396 aff_inv.offset = 0;
7397
7398 gimple_seq stmt_list = NULL, seq = NULL;
7399 tree comp_op1 = aff_combination_to_tree (&aff_inv);
7400 tree comp_op2 = aff_combination_to_tree (&aff_var);
7401 gcc_assert (comp_op1 && comp_op2);
7402
7403 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7404 gimple_seq_add_seq (&stmt_list, seq);
7405 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7406 gimple_seq_add_seq (&stmt_list, seq);
7407
7408 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7409 std::swap (a&: comp_op1, b&: comp_op2);
7410
7411 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7412 {
7413 comp = fold_build_pointer_plus (comp_op1,
7414 fold_convert (sizetype, comp_op2));
7415 comp = fold_build_pointer_plus (comp,
7416 wide_int_to_tree (sizetype, offset));
7417 }
7418 else
7419 {
7420 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7421 fold_convert (TREE_TYPE (comp_op1), comp_op2));
7422 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7423 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7424 }
7425
7426 comp = fold_convert (type, comp);
7427 comp = force_gimple_operand (comp, &seq, false, NULL);
7428 gimple_seq_add_seq (&stmt_list, seq);
7429 if (gimple_code (g: use->stmt) != GIMPLE_PHI
7430 /* We can't allow re-allocating the stmt as it might be pointed
7431 to still. */
7432 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7433 >= gimple_num_ops (gs: gsi_stmt (i: bsi))))
7434 {
7435 comp = force_gimple_operand (comp, &seq, true, NULL);
7436 gimple_seq_add_seq (&stmt_list, seq);
7437 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7438 {
7439 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7440 /* As this isn't a plain copy we have to reset alignment
7441 information. */
7442 if (SSA_NAME_PTR_INFO (comp))
7443 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7444 }
7445 }
7446
7447 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7448 if (gimple_code (g: use->stmt) == GIMPLE_PHI)
7449 {
7450 ass = gimple_build_assign (tgt, comp);
7451 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7452
7453 bsi = gsi_for_stmt (use->stmt);
7454 remove_phi_node (&bsi, false);
7455 }
7456 else
7457 {
7458 gimple_assign_set_rhs_from_tree (&bsi, comp);
7459 use->stmt = gsi_stmt (i: bsi);
7460 }
7461}
7462
7463/* Performs a peephole optimization to reorder the iv update statement with
7464 a mem ref to enable instruction combining in later phases. The mem ref uses
7465 the iv value before the update, so the reordering transformation requires
7466 adjustment of the offset. CAND is the selected IV_CAND.
7467
7468 Example:
7469
7470 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7471 iv2 = iv1 + 1;
7472
7473 if (t < val) (1)
7474 goto L;
7475 goto Head;
7476
7477
7478 directly propagating t over to (1) will introduce overlapping live range
7479 thus increase register pressure. This peephole transform it into:
7480
7481
7482 iv2 = iv1 + 1;
7483 t = MEM_REF (base, iv2, 8, 8);
7484 if (t < val)
7485 goto L;
7486 goto Head;
7487*/
7488
7489static void
7490adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7491{
7492 tree var_after;
7493 gimple *iv_update, *stmt;
7494 basic_block bb;
7495 gimple_stmt_iterator gsi, gsi_iv;
7496
7497 if (cand->pos != IP_NORMAL)
7498 return;
7499
7500 var_after = cand->var_after;
7501 iv_update = SSA_NAME_DEF_STMT (var_after);
7502
7503 bb = gimple_bb (g: iv_update);
7504 gsi = gsi_last_nondebug_bb (bb);
7505 stmt = gsi_stmt (i: gsi);
7506
7507 /* Only handle conditional statement for now. */
7508 if (gimple_code (g: stmt) != GIMPLE_COND)
7509 return;
7510
7511 gsi_prev_nondebug (i: &gsi);
7512 stmt = gsi_stmt (i: gsi);
7513 if (stmt != iv_update)
7514 return;
7515
7516 gsi_prev_nondebug (i: &gsi);
7517 if (gsi_end_p (i: gsi))
7518 return;
7519
7520 stmt = gsi_stmt (i: gsi);
7521 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
7522 return;
7523
7524 if (stmt != use->stmt)
7525 return;
7526
7527 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7528 return;
7529
7530 if (dump_file && (dump_flags & TDF_DETAILS))
7531 {
7532 fprintf (stream: dump_file, format: "Reordering \n");
7533 print_gimple_stmt (dump_file, iv_update, 0);
7534 print_gimple_stmt (dump_file, use->stmt, 0);
7535 fprintf (stream: dump_file, format: "\n");
7536 }
7537
7538 gsi = gsi_for_stmt (use->stmt);
7539 gsi_iv = gsi_for_stmt (iv_update);
7540 gsi_move_before (&gsi_iv, &gsi);
7541
7542 cand->pos = IP_BEFORE_USE;
7543 cand->incremented_at = use->stmt;
7544}
7545
7546/* Return the alias pointer type that should be used for a MEM_REF
7547 associated with USE, which has type USE_PTR_ADDRESS. */
7548
7549static tree
7550get_alias_ptr_type_for_ptr_address (iv_use *use)
7551{
7552 gcall *call = as_a <gcall *> (p: use->stmt);
7553 switch (gimple_call_internal_fn (gs: call))
7554 {
7555 case IFN_MASK_LOAD:
7556 case IFN_MASK_STORE:
7557 case IFN_MASK_LOAD_LANES:
7558 case IFN_MASK_STORE_LANES:
7559 case IFN_MASK_LEN_LOAD_LANES:
7560 case IFN_MASK_LEN_STORE_LANES:
7561 case IFN_LEN_LOAD:
7562 case IFN_LEN_STORE:
7563 case IFN_MASK_LEN_LOAD:
7564 case IFN_MASK_LEN_STORE:
7565 /* The second argument contains the correct alias type. */
7566 gcc_assert (use->op_p == gimple_call_arg_ptr (call, 0));
7567 return TREE_TYPE (gimple_call_arg (call, 1));
7568
7569 default:
7570 gcc_unreachable ();
7571 }
7572}
7573
7574
7575/* Rewrites USE (address that is an iv) using candidate CAND. */
7576
7577static void
7578rewrite_use_address (struct ivopts_data *data,
7579 struct iv_use *use, struct iv_cand *cand)
7580{
7581 aff_tree aff;
7582 bool ok;
7583
7584 adjust_iv_update_pos (cand, use);
7585 ok = get_computation_aff (data, at: use->stmt, use, cand, aff: &aff);
7586 gcc_assert (ok);
7587 unshare_aff_combination (&aff);
7588
7589 /* To avoid undefined overflow problems, all IV candidates use unsigned
7590 integer types. The drawback is that this makes it impossible for
7591 create_mem_ref to distinguish an IV that is based on a memory object
7592 from one that represents simply an offset.
7593
7594 To work around this problem, we pass a hint to create_mem_ref that
7595 indicates which variable (if any) in aff is an IV based on a memory
7596 object. Note that we only consider the candidate. If this is not
7597 based on an object, the base of the reference is in some subexpression
7598 of the use -- but these will use pointer types, so they are recognized
7599 by the create_mem_ref heuristics anyway. */
7600 tree iv = var_at_stmt (loop: data->current_loop, cand, stmt: use->stmt);
7601 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7602 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7603 tree type = use->mem_type;
7604 tree alias_ptr_type;
7605 if (use->type == USE_PTR_ADDRESS)
7606 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7607 else
7608 {
7609 gcc_assert (type == TREE_TYPE (*use->op_p));
7610 unsigned int align = get_object_alignment (*use->op_p);
7611 if (align != TYPE_ALIGN (type))
7612 type = build_aligned_type (type, align);
7613 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7614 }
7615 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7616 iv, base_hint, data->speed);
7617
7618 if (use->type == USE_PTR_ADDRESS)
7619 {
7620 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7621 ref = fold_convert (get_use_type (use), ref);
7622 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7623 true, GSI_SAME_STMT);
7624 }
7625 else
7626 {
7627 /* When we end up confused enough and have no suitable base but
7628 stuffed everything to index2 use a LEA for the address and
7629 create a plain MEM_REF to avoid basing a memory reference
7630 on address zero which create_mem_ref_raw does as fallback. */
7631 if (TREE_CODE (ref) == TARGET_MEM_REF
7632 && TMR_INDEX2 (ref) != NULL_TREE
7633 && integer_zerop (TREE_OPERAND (ref, 0)))
7634 {
7635 ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7636 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7637 true, GSI_SAME_STMT);
7638 ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7639 }
7640 copy_ref_info (ref, *use->op_p);
7641 }
7642
7643 *use->op_p = ref;
7644}
7645
7646/* Rewrites USE (the condition such that one of the arguments is an iv) using
7647 candidate CAND. */
7648
7649static void
7650rewrite_use_compare (struct ivopts_data *data,
7651 struct iv_use *use, struct iv_cand *cand)
7652{
7653 tree comp, op, bound;
7654 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7655 enum tree_code compare;
7656 struct iv_group *group = data->vgroups[use->group_id];
7657 class cost_pair *cp = get_group_iv_cost (data, group, cand);
7658
7659 bound = cp->value;
7660 if (bound)
7661 {
7662 tree var = var_at_stmt (loop: data->current_loop, cand, stmt: use->stmt);
7663 tree var_type = TREE_TYPE (var);
7664 gimple_seq stmts;
7665
7666 if (dump_file && (dump_flags & TDF_DETAILS))
7667 {
7668 fprintf (stream: dump_file, format: "Replacing exit test: ");
7669 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7670 }
7671 compare = cp->comp;
7672 bound = unshare_expr (fold_convert (var_type, bound));
7673 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7674 if (stmts)
7675 gsi_insert_seq_on_edge_immediate (
7676 loop_preheader_edge (data->current_loop),
7677 stmts);
7678
7679 gcond *cond_stmt = as_a <gcond *> (p: use->stmt);
7680 gimple_cond_set_lhs (gs: cond_stmt, lhs: var);
7681 gimple_cond_set_code (gs: cond_stmt, code: compare);
7682 gimple_cond_set_rhs (gs: cond_stmt, rhs: op);
7683 return;
7684 }
7685
7686 /* The induction variable elimination failed; just express the original
7687 giv. */
7688 comp = get_computation_at (data, at: use->stmt, use, cand);
7689 gcc_assert (comp != NULL_TREE);
7690 gcc_assert (use->op_p != NULL);
7691 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7692 SSA_NAME_VAR (*use->op_p),
7693 true, GSI_SAME_STMT);
7694}
7695
7696/* Rewrite the groups using the selected induction variables. */
7697
7698static void
7699rewrite_groups (struct ivopts_data *data)
7700{
7701 unsigned i, j;
7702
7703 for (i = 0; i < data->vgroups.length (); i++)
7704 {
7705 struct iv_group *group = data->vgroups[i];
7706 struct iv_cand *cand = group->selected;
7707
7708 gcc_assert (cand);
7709
7710 if (group->type == USE_NONLINEAR_EXPR)
7711 {
7712 for (j = 0; j < group->vuses.length (); j++)
7713 {
7714 rewrite_use_nonlinear_expr (data, use: group->vuses[j], cand);
7715 update_stmt (s: group->vuses[j]->stmt);
7716 }
7717 }
7718 else if (address_p (type: group->type))
7719 {
7720 for (j = 0; j < group->vuses.length (); j++)
7721 {
7722 rewrite_use_address (data, use: group->vuses[j], cand);
7723 update_stmt (s: group->vuses[j]->stmt);
7724 }
7725 }
7726 else
7727 {
7728 gcc_assert (group->type == USE_COMPARE);
7729
7730 for (j = 0; j < group->vuses.length (); j++)
7731 {
7732 rewrite_use_compare (data, use: group->vuses[j], cand);
7733 update_stmt (s: group->vuses[j]->stmt);
7734 }
7735 }
7736 }
7737}
7738
7739/* Removes the ivs that are not used after rewriting. */
7740
7741static void
7742remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7743{
7744 unsigned j;
7745 bitmap_iterator bi;
7746
7747 /* Figure out an order in which to release SSA DEFs so that we don't
7748 release something that we'd have to propagate into a debug stmt
7749 afterwards. */
7750 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7751 {
7752 struct version_info *info;
7753
7754 info = ver_info (data, ver: j);
7755 if (info->iv
7756 && !integer_zerop (info->iv->step)
7757 && !info->inv_id
7758 && !info->iv->nonlin_use
7759 && !info->preserve_biv)
7760 {
7761 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7762
7763 tree def = info->iv->ssa_name;
7764
7765 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7766 {
7767 imm_use_iterator imm_iter;
7768 use_operand_p use_p;
7769 gimple *stmt;
7770 int count = 0;
7771
7772 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7773 {
7774 if (!gimple_debug_bind_p (s: stmt))
7775 continue;
7776
7777 /* We just want to determine whether to do nothing
7778 (count == 0), to substitute the computed
7779 expression into a single use of the SSA DEF by
7780 itself (count == 1), or to use a debug temp
7781 because the SSA DEF is used multiple times or as
7782 part of a larger expression (count > 1). */
7783 count++;
7784 if (gimple_debug_bind_get_value (dbg: stmt) != def)
7785 count++;
7786
7787 if (count > 1)
7788 break;
7789 }
7790
7791 if (!count)
7792 continue;
7793
7794 struct iv_use dummy_use;
7795 struct iv_cand *best_cand = NULL, *cand;
7796 unsigned i, best_pref = 0, cand_pref;
7797 tree comp = NULL_TREE;
7798
7799 memset (s: &dummy_use, c: 0, n: sizeof (dummy_use));
7800 dummy_use.iv = info->iv;
7801 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7802 {
7803 cand = data->vgroups[i]->selected;
7804 if (cand == best_cand)
7805 continue;
7806 cand_pref = operand_equal_p (cand->iv->step,
7807 info->iv->step, flags: 0)
7808 ? 4 : 0;
7809 cand_pref
7810 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7811 == TYPE_MODE (TREE_TYPE (info->iv->base))
7812 ? 2 : 0;
7813 cand_pref
7814 += TREE_CODE (cand->iv->base) == INTEGER_CST
7815 ? 1 : 0;
7816 if (best_cand == NULL || best_pref < cand_pref)
7817 {
7818 tree this_comp
7819 = get_debug_computation_at (data,
7820 SSA_NAME_DEF_STMT (def),
7821 use: &dummy_use, cand);
7822 if (this_comp)
7823 {
7824 best_cand = cand;
7825 best_pref = cand_pref;
7826 comp = this_comp;
7827 }
7828 }
7829 }
7830
7831 if (!best_cand)
7832 continue;
7833
7834 comp = unshare_expr (comp);
7835 if (count > 1)
7836 {
7837 tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7838 /* FIXME: Is setting the mode really necessary? */
7839 if (SSA_NAME_VAR (def))
7840 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7841 else
7842 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7843 gdebug *def_temp
7844 = gimple_build_debug_bind (vexpr, comp, NULL);
7845 gimple_stmt_iterator gsi;
7846
7847 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7848 gsi = gsi_after_labels (bb: gimple_bb
7849 (SSA_NAME_DEF_STMT (def)));
7850 else
7851 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7852
7853 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7854 comp = vexpr;
7855 }
7856
7857 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7858 {
7859 if (!gimple_debug_bind_p (s: stmt))
7860 continue;
7861
7862 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7863 SET_USE (use_p, comp);
7864
7865 update_stmt (s: stmt);
7866 }
7867 }
7868 }
7869 }
7870}
7871
7872/* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7873 for hash_map::traverse. */
7874
7875bool
7876free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7877{
7878 if (value)
7879 {
7880 value->~tree_niter_desc ();
7881 free (ptr: value);
7882 }
7883 return true;
7884}
7885
7886/* Frees data allocated by the optimization of a single loop. */
7887
7888static void
7889free_loop_data (struct ivopts_data *data)
7890{
7891 unsigned i, j;
7892 bitmap_iterator bi;
7893 tree obj;
7894
7895 if (data->niters)
7896 {
7897 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7898 delete data->niters;
7899 data->niters = NULL;
7900 }
7901
7902 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7903 {
7904 struct version_info *info;
7905
7906 info = ver_info (data, ver: i);
7907 info->iv = NULL;
7908 info->has_nonlin_use = false;
7909 info->preserve_biv = false;
7910 info->inv_id = 0;
7911 }
7912 bitmap_clear (data->relevant);
7913 bitmap_clear (data->important_candidates);
7914
7915 for (i = 0; i < data->vgroups.length (); i++)
7916 {
7917 struct iv_group *group = data->vgroups[i];
7918
7919 for (j = 0; j < group->vuses.length (); j++)
7920 free (ptr: group->vuses[j]);
7921 group->vuses.release ();
7922
7923 BITMAP_FREE (group->related_cands);
7924 for (j = 0; j < group->n_map_members; j++)
7925 {
7926 if (group->cost_map[j].inv_vars)
7927 BITMAP_FREE (group->cost_map[j].inv_vars);
7928 if (group->cost_map[j].inv_exprs)
7929 BITMAP_FREE (group->cost_map[j].inv_exprs);
7930 }
7931
7932 free (ptr: group->cost_map);
7933 free (ptr: group);
7934 }
7935 data->vgroups.truncate (size: 0);
7936
7937 for (i = 0; i < data->vcands.length (); i++)
7938 {
7939 struct iv_cand *cand = data->vcands[i];
7940
7941 if (cand->inv_vars)
7942 BITMAP_FREE (cand->inv_vars);
7943 if (cand->inv_exprs)
7944 BITMAP_FREE (cand->inv_exprs);
7945 free (ptr: cand);
7946 }
7947 data->vcands.truncate (size: 0);
7948
7949 if (data->version_info_size < num_ssa_names)
7950 {
7951 data->version_info_size = 2 * num_ssa_names;
7952 free (ptr: data->version_info);
7953 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7954 }
7955
7956 data->max_inv_var_id = 0;
7957 data->max_inv_expr_id = 0;
7958
7959 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7960 SET_DECL_RTL (obj, NULL_RTX);
7961
7962 decl_rtl_to_reset.truncate (size: 0);
7963
7964 data->inv_expr_tab->empty ();
7965
7966 data->iv_common_cand_tab->empty ();
7967 data->iv_common_cands.truncate (size: 0);
7968}
7969
7970/* Finalizes data structures used by the iv optimization pass. LOOPS is the
7971 loop tree. */
7972
7973static void
7974tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7975{
7976 free_loop_data (data);
7977 free (ptr: data->version_info);
7978 BITMAP_FREE (data->relevant);
7979 BITMAP_FREE (data->important_candidates);
7980
7981 decl_rtl_to_reset.release ();
7982 data->vgroups.release ();
7983 data->vcands.release ();
7984 delete data->inv_expr_tab;
7985 data->inv_expr_tab = NULL;
7986 free_affine_expand_cache (&data->name_expansion_cache);
7987 if (data->base_object_map)
7988 delete data->base_object_map;
7989 delete data->iv_common_cand_tab;
7990 data->iv_common_cand_tab = NULL;
7991 data->iv_common_cands.release ();
7992 obstack_free (&data->iv_obstack, NULL);
7993}
7994
7995/* Returns true if the loop body BODY includes any function calls. */
7996
7997static bool
7998loop_body_includes_call (basic_block *body, unsigned num_nodes)
7999{
8000 gimple_stmt_iterator gsi;
8001 unsigned i;
8002
8003 for (i = 0; i < num_nodes; i++)
8004 for (gsi = gsi_start_bb (bb: body[i]); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
8005 {
8006 gimple *stmt = gsi_stmt (i: gsi);
8007 if (is_gimple_call (gs: stmt)
8008 && !gimple_call_internal_p (gs: stmt)
8009 && !is_inexpensive_builtin (gimple_call_fndecl (gs: stmt)))
8010 return true;
8011 }
8012 return false;
8013}
8014
8015/* Determine cost scaling factor for basic blocks in loop. */
8016#define COST_SCALING_FACTOR_BOUND (20)
8017
8018static void
8019determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8020{
8021 int lfreq = data->current_loop->header->count.to_frequency (cfun);
8022 if (!data->speed || lfreq <= 0)
8023 return;
8024
8025 int max_freq = lfreq;
8026 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8027 {
8028 body[i]->aux = (void *)(intptr_t) 1;
8029 if (max_freq < body[i]->count.to_frequency (cfun))
8030 max_freq = body[i]->count.to_frequency (cfun);
8031 }
8032 if (max_freq > lfreq)
8033 {
8034 int divisor, factor;
8035 /* Check if scaling factor itself needs to be scaled by the bound. This
8036 is to avoid overflow when scaling cost according to profile info. */
8037 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8038 {
8039 divisor = max_freq;
8040 factor = COST_SCALING_FACTOR_BOUND;
8041 }
8042 else
8043 {
8044 divisor = lfreq;
8045 factor = 1;
8046 }
8047 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8048 {
8049 int bfreq = body[i]->count.to_frequency (cfun);
8050 if (bfreq <= lfreq)
8051 continue;
8052
8053 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8054 }
8055 }
8056}
8057
8058/* Find doloop comparison use and set its doloop_p on if found. */
8059
8060static bool
8061find_doloop_use (struct ivopts_data *data)
8062{
8063 struct loop *loop = data->current_loop;
8064
8065 for (unsigned i = 0; i < data->vgroups.length (); i++)
8066 {
8067 struct iv_group *group = data->vgroups[i];
8068 if (group->type == USE_COMPARE)
8069 {
8070 gcc_assert (group->vuses.length () == 1);
8071 struct iv_use *use = group->vuses[0];
8072 gimple *stmt = use->stmt;
8073 if (gimple_code (g: stmt) == GIMPLE_COND)
8074 {
8075 basic_block bb = gimple_bb (g: stmt);
8076 edge true_edge, false_edge;
8077 extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8078 /* This comparison is used for loop latch. Require latch is empty
8079 for now. */
8080 if ((loop->latch == true_edge->dest
8081 || loop->latch == false_edge->dest)
8082 && empty_block_p (loop->latch))
8083 {
8084 group->doloop_p = true;
8085 if (dump_file && (dump_flags & TDF_DETAILS))
8086 {
8087 fprintf (stream: dump_file, format: "Doloop cmp iv use: ");
8088 print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8089 }
8090 return true;
8091 }
8092 }
8093 }
8094 }
8095
8096 return false;
8097}
8098
8099/* For the targets which support doloop, to predict whether later RTL doloop
8100 transformation will perform on this loop, further detect the doloop use and
8101 mark the flag doloop_use_p if predicted. */
8102
8103void
8104analyze_and_mark_doloop_use (struct ivopts_data *data)
8105{
8106 data->doloop_use_p = false;
8107
8108 if (!flag_branch_on_count_reg)
8109 return;
8110
8111 if (data->current_loop->unroll == USHRT_MAX)
8112 return;
8113
8114 if (!generic_predict_doloop_p (data))
8115 return;
8116
8117 if (find_doloop_use (data))
8118 {
8119 data->doloop_use_p = true;
8120 if (dump_file && (dump_flags & TDF_DETAILS))
8121 {
8122 struct loop *loop = data->current_loop;
8123 fprintf (stream: dump_file,
8124 format: "Predict loop %d can perform"
8125 " doloop optimization later.\n",
8126 loop->num);
8127 flow_loop_dump (loop, dump_file, NULL, 1);
8128 }
8129 }
8130}
8131
8132/* Optimizes the LOOP. Returns true if anything changed. */
8133
8134static bool
8135tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8136 bitmap toremove)
8137{
8138 bool changed = false;
8139 class iv_ca *iv_ca;
8140 edge exit = single_dom_exit (loop);
8141 basic_block *body;
8142
8143 gcc_assert (!data->niters);
8144 data->current_loop = loop;
8145 data->loop_loc = find_loop_location (loop).get_location_t ();
8146 data->speed = optimize_loop_for_speed_p (loop);
8147
8148 if (dump_file && (dump_flags & TDF_DETAILS))
8149 {
8150 fprintf (stream: dump_file, format: "Processing loop %d", loop->num);
8151 if (data->loop_loc != UNKNOWN_LOCATION)
8152 fprintf (stream: dump_file, format: " at %s:%d", LOCATION_FILE (data->loop_loc),
8153 LOCATION_LINE (data->loop_loc));
8154 fprintf (stream: dump_file, format: "\n");
8155
8156 if (exit)
8157 {
8158 fprintf (stream: dump_file, format: " single exit %d -> %d, exit condition ",
8159 exit->src->index, exit->dest->index);
8160 print_gimple_stmt (dump_file, *gsi_last_bb (bb: exit->src),
8161 0, TDF_SLIM);
8162 fprintf (stream: dump_file, format: "\n");
8163 }
8164
8165 fprintf (stream: dump_file, format: "\n");
8166 }
8167
8168 body = get_loop_body (loop);
8169 data->body_includes_call = loop_body_includes_call (body, num_nodes: loop->num_nodes);
8170 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8171
8172 data->loop_single_exit_p
8173 = exit != NULL && loop_only_exit_p (loop, body, exit);
8174
8175 /* For each ssa name determines whether it behaves as an induction variable
8176 in some loop. */
8177 if (!find_induction_variables (data, body))
8178 goto finish;
8179
8180 /* Finds interesting uses (item 1). */
8181 find_interesting_uses (data, body);
8182 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8183 goto finish;
8184
8185 /* Determine cost scaling factor for basic blocks in loop. */
8186 determine_scaling_factor (data, body);
8187
8188 /* Analyze doloop possibility and mark the doloop use if predicted. */
8189 analyze_and_mark_doloop_use (data);
8190
8191 /* Finds candidates for the induction variables (item 2). */
8192 find_iv_candidates (data);
8193
8194 /* Calculates the costs (item 3, part 1). */
8195 determine_iv_costs (data);
8196 determine_group_iv_costs (data);
8197 determine_set_costs (data);
8198
8199 /* Find the optimal set of induction variables (item 3, part 2). */
8200 iv_ca = find_optimal_iv_set (data);
8201 /* Cleanup basic block aux field. */
8202 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8203 body[i]->aux = NULL;
8204 if (!iv_ca)
8205 goto finish;
8206 changed = true;
8207
8208 /* Create the new induction variables (item 4, part 1). */
8209 create_new_ivs (data, set: iv_ca);
8210 iv_ca_free (ivs: &iv_ca);
8211
8212 /* Rewrite the uses (item 4, part 2). */
8213 rewrite_groups (data);
8214
8215 /* Remove the ivs that are unused after rewriting. */
8216 remove_unused_ivs (data, toremove);
8217
8218finish:
8219 free (ptr: body);
8220 free_loop_data (data);
8221
8222 return changed;
8223}
8224
8225/* Main entry point. Optimizes induction variables in loops. */
8226
8227void
8228tree_ssa_iv_optimize (void)
8229{
8230 struct ivopts_data data;
8231 auto_bitmap toremove;
8232
8233 tree_ssa_iv_optimize_init (data: &data);
8234 mark_ssa_maybe_undefs ();
8235
8236 /* Optimize the loops starting with the innermost ones. */
8237 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8238 {
8239 if (!dbg_cnt (index: ivopts_loop))
8240 continue;
8241
8242 if (dump_file && (dump_flags & TDF_DETAILS))
8243 flow_loop_dump (loop, dump_file, NULL, 1);
8244
8245 tree_ssa_iv_optimize_loop (data: &data, loop, toremove);
8246 }
8247
8248 /* Remove eliminated IV defs. */
8249 release_defs_bitset (toremove);
8250
8251 /* We have changed the structure of induction variables; it might happen
8252 that definitions in the scev database refer to some of them that were
8253 eliminated. */
8254 scev_reset_htab ();
8255 /* Likewise niter and control-IV information. */
8256 free_numbers_of_iterations_estimates (cfun);
8257
8258 tree_ssa_iv_optimize_finalize (data: &data);
8259}
8260
8261#include "gt-tree-ssa-loop-ivopts.h"
8262

source code of gcc/tree-ssa-loop-ivopts.cc