tree-ssa-loop-ivopts.cc source code [gcc/tree-ssa-loop-ivopts.cc]

1	/ Induction variable optimizations.*
2	Copyright (C) 2003-2026 Free Software Foundation, Inc.
3
4	This file is part of GCC.
5
6	GCC is free software; you can redistribute it and/or modify it
7	under the terms of the GNU General Public License as published by the
8	Free Software Foundation; either version 3, or (at your option) any
9	later version.
10
11	GCC is distributed in the hope that it will be useful, but WITHOUT
12	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14	for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with GCC; see the file COPYING3. If not see
18	<http://www.gnu.org/licenses/>. /*
19
20	/ This pass tries to find the optimal set of induction variables for the loop.*
21	It optimizes just the basic linear induction variables (although adding
22	support for other types should not be too hard). It includes the
23	optimizations commonly known as strength reduction, induction variable
24	coalescing and induction variable elimination. It does it in the
25	following steps:
26
27	1) The interesting uses of induction variables are found. This includes
28
29	-- uses of induction variables in non-linear expressions
30	-- addresses of arrays
31	-- comparisons of induction variables
32
33	Note the interesting uses are categorized and handled in group.
34	Generally, address type uses are grouped together if their iv bases
35	are different in constant offset.
36
37	2) Candidates for the induction variables are found. This includes
38
39	-- old induction variables
40	-- the variables defined by expressions derived from the "interesting
41	groups/uses" above
42
43	3) The optimal (w.r. to a cost function) set of variables is chosen. The
44	cost function assigns a cost to sets of induction variables and consists
45	of three parts:
46
47	-- The group/use costs. Each of the interesting groups/uses chooses
48	the best induction variable in the set and adds its cost to the sum.
49	The cost reflects the time spent on modifying the induction variables
50	value to be usable for the given purpose (adding base and offset for
51	arrays, etc.).
52	-- The variable costs. Each of the variables has a cost assigned that
53	reflects the costs associated with incrementing the value of the
54	variable. The original variables are somewhat preferred.
55	-- The set cost. Depending on the size of the set, extra cost may be
56	added to reflect register pressure.
57
58	All the costs are defined in a machine-specific way, using the target
59	hooks and machine descriptions to determine them.
60
61	4) The trees are transformed to use the new variables, the dead code is
62	removed.
63
64	All of this is done loop by loop. Doing it globally is theoretically
65	possible, it might give a better performance and it might enable us
66	to decide costs more precisely, but getting all the interactions right
67	would be complicated.
68
69	For the targets supporting low-overhead loops, IVOPTs has to take care of
70	the loops which will probably be transformed in RTL doloop optimization,
71	to try to make selected IV candidate set optimal. The process of doloop
72	support includes:
73
74	1) Analyze the current loop will be transformed to doloop or not, find and
75	mark its compare type IV use as doloop use (iv_group field doloop_p), and
76	set flag doloop_use_p of ivopts_data to notify subsequent processings on
77	doloop. See analyze_and_mark_doloop_use and its callees for the details.
78	The target hook predict_doloop_p can be used for target specific checks.
79
80	2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81	set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82	like biv. For cost determination between doloop IV cand and IV use, the
83	target hooks doloop_cost_for_generic and doloop_cost_for_address are
84	provided to add on extra costs for generic type and address type IV use.
85	Zero cost is assigned to the pair between doloop IV cand and doloop IV
86	use, and bound zero is set for IV elimination.
87
88	3) With the cost setting in step 2), the current cost model based IV
89	selection algorithm will process as usual, pick up doloop dedicated IV if
90	profitable. /*
91
92	#include "config.h"
93	#include "system.h"
94	#include "coretypes.h"
95	#include "backend.h"
96	#include "rtl.h"
97	#include "tree.h"
98	#include "gimple.h"
99	#include "cfghooks.h"
100	#include "tree-pass.h"
101	#include "memmodel.h"
102	#include "tm_p.h"
103	#include "ssa.h"
104	#include "expmed.h"
105	#include "insn-config.h"
106	#include "emit-rtl.h"
107	#include "recog.h"
108	#include "cgraph.h"
109	#include "gimple-pretty-print.h"
110	#include "alias.h"
111	#include "fold-const.h"
112	#include "stor-layout.h"
113	#include "tree-eh.h"
114	#include "gimplify.h"
115	#include "gimple-iterator.h"
116	#include "gimplify-me.h"
117	#include "tree-cfg.h"
118	#include "tree-ssa-loop-ivopts.h"
119	#include "tree-ssa-loop-manip.h"
120	#include "tree-ssa-loop-niter.h"
121	#include "tree-ssa-loop.h"
122	#include "explow.h"
123	#include "expr.h"
124	#include "tree-dfa.h"
125	#include "tree-ssa.h"
126	#include "cfgloop.h"
127	#include "tree-scalar-evolution.h"
128	#include "tree-affine.h"
129	#include "tree-ssa-propagate.h"
130	#include "tree-ssa-address.h"
131	#include "builtins.h"
132	#include "tree-vectorizer.h"
133	#include "dbgcnt.h"
134	#include "cfganal.h"
135	#include "gimple-fold.h"
136
137	/ For lang_hooks.types.type_for_mode. /
138	#include "langhooks.h"
139
140	/ FIXME: Expressions are expanded to RTL in this pass to determine the*
141	cost of different addressing modes. This should be moved to a TBD
142	interface between the GIMPLE and RTL worlds. /*
143
144	/ The infinite cost. /
145	#define INFTY 1000000000
146
147	/ Returns the expected number of loop iterations for LOOP.*
148	The average trip count is computed from profile data if it
149	exists. /*
150
151	static inline unsigned HOST_WIDE_INT
152	avg_loop_niter (class loop *loop)
153	{
154	HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
155	if (niter == -`1`)
156	{
157	niter = likely_max_stmt_executions_int (loop);
158
159	if (niter == -`1` \|\| niter > param_avg_loop_niter)
160	return param_avg_loop_niter;
161	}
162
163	return niter;
164	}
165
166	struct iv_use;
167
168	/ Representation of the induction variable. /
169	struct iv
170	{
171	tree base; / Initial value of the iv. /
172	tree base_object; / A memory object to that the induction variable points. /
173	tree step; / Step of the iv (constant only). /
174	tree ssa_name; / The ssa name with the value. /
175	struct iv_use nonlin_use; /* The identifier in the use if it is the case. /
176	bool biv_p; / Is it a biv? /
177	bool no_overflow; / True if the iv doesn't overflow. /
178	bool have_address_use;/ For biv, indicate if it's used in any address*
179	type use. /*
180	};
181
182	/ Per-ssa version information (induction variable descriptions, etc.). /
183	struct version_info
184	{
185	tree name; / The ssa name. /
186	struct iv iv; /* Induction variable description. /
187	bool has_nonlin_use; / For a loop-level invariant, whether it is used in*
188	an expression that is not an induction variable. /*
189	bool preserve_biv; / For the original biv, whether to preserve it. /
190	unsigned inv_id; / Id of an invariant. /
191	};
192
193	/ Types of uses. /
194	enum use_type
195	{
196	USE_NONLINEAR_EXPR, / Use in a nonlinear expression. /
197	USE_REF_ADDRESS, / Use is an address for an explicit memory*
198	reference. /*
199	USE_PTR_ADDRESS, / Use is a pointer argument to a function in*
200	cases where the expansion of the function
201	will turn the argument into a normal address. /*
202	USE_COMPARE / Use is a compare. /
203	};
204
205	/ Cost of a computation. /
206	class comp_cost
207	{
208	public:
209	comp_cost (): cost (`0`), complexity (`0`), scratch (`0`)
210	{}
211
212	comp_cost (int64_t cost, unsigned complexity, int64_t scratch = `0`)
213	: cost (cost), complexity (complexity), scratch (scratch)
214	{}
215
216	/ Returns true if COST is infinite. /
217	bool infinite_cost_p ();
218
219	/ Adds costs COST1 and COST2. /
220	friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221
222	/ Adds COST to the comp_cost. /
223	comp_cost operator+= (comp_cost cost);
224
225	/ Adds constant C to this comp_cost. /
226	comp_cost operator+= (HOST_WIDE_INT c);
227
228	/ Subtracts constant C to this comp_cost. /
229	comp_cost operator-= (HOST_WIDE_INT c);
230
231	/ Divide the comp_cost by constant C. /
232	comp_cost operator/= (HOST_WIDE_INT c);
233
234	/ Multiply the comp_cost by constant C. /
235	comp_cost operator*= (HOST_WIDE_INT c);
236
237	/ Subtracts costs COST1 and COST2. /
238	friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239
240	/ Subtracts COST from this comp_cost. /
241	comp_cost operator-= (comp_cost cost);
242
243	/ Returns true if COST1 is smaller than COST2. /
244	friend bool operator< (comp_cost cost1, comp_cost cost2);
245
246	/ Returns true if COST1 and COST2 are equal. /
247	friend bool operator== (comp_cost cost1, comp_cost cost2);
248
249	/ Returns true if COST1 is smaller or equal than COST2. /
250	friend bool operator<= (comp_cost cost1, comp_cost cost2);
251
252	int64_t cost; / The runtime cost. /
253	unsigned complexity; / The estimate of the complexity of the code for*
254	the computation (in no concrete units --
255	complexity field should be larger for more
256	complex expressions and addressing modes). /*
257	int64_t scratch; / Scratch used during cost computation. /
258	};
259
260	static const comp_cost no_cost;
261	static const comp_cost infinite_cost (INFTY, `0`, INFTY);
262
263	bool
264	comp_cost::infinite_cost_p ()
265	{
266	return cost == INFTY;
267	}
268
269	comp_cost
270	operator+ (comp_cost cost1, comp_cost cost2)
271	{
272	if (cost1.infinite_cost_p () \|\| cost2.infinite_cost_p ())
273	return infinite_cost;
274
275	gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
276	cost1.cost += cost2.cost;
277	cost1.complexity += cost2.complexity;
278
279	return cost1;
280	}
281
282	comp_cost
283	operator- (comp_cost cost1, comp_cost cost2)
284	{
285	if (cost1.infinite_cost_p ())
286	return infinite_cost;
287
288	gcc_assert (!cost2.infinite_cost_p ());
289	gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290
291	cost1.cost -= cost2.cost;
292	cost1.complexity -= cost2.complexity;
293
294	return cost1;
295	}
296
297	comp_cost
298	comp_cost::operator+= (comp_cost cost)
299	{
300	*this = *this + cost;
301	return *this;
302	}
303
304	comp_cost
305	comp_cost::operator+= (HOST_WIDE_INT c)
306	{
307	if (c >= INFTY)
308	this->cost = INFTY;
309
310	if (infinite_cost_p ())
311	return *this;
312
313	gcc_assert (this->cost + c < infinite_cost.cost);
314	this->cost += c;
315
316	return *this;
317	}
318
319	comp_cost
320	comp_cost::operator-= (HOST_WIDE_INT c)
321	{
322	if (infinite_cost_p ())
323	return *this;
324
325	gcc_assert (this->cost - c < infinite_cost.cost);
326	this->cost -= c;
327
328	return *this;
329	}
330
331	comp_cost
332	comp_cost::operator/= (HOST_WIDE_INT c)
333	{
334	gcc_assert (c != `0`);
335	if (infinite_cost_p ())
336	return *this;
337
338	this->cost /= c;
339
340	return *this;
341	}
342
343	comp_cost
344	comp_cost::operator*= (HOST_WIDE_INT c)
345	{
346	if (infinite_cost_p ())
347	return *this;
348
349	gcc_assert (this->cost * c < infinite_cost.cost);
350	this->cost *= c;
351
352	return *this;
353	}
354
355	comp_cost
356	comp_cost::operator-= (comp_cost cost)
357	{
358	*this = *this - cost;
359	return *this;
360	}
361
362	bool
363	operator< (comp_cost cost1, comp_cost cost2)
364	{
365	if (cost1.cost == cost2.cost)
366	return cost1.complexity < cost2.complexity;
367
368	return cost1.cost < cost2.cost;
369	}
370
371	bool
372	operator== (comp_cost cost1, comp_cost cost2)
373	{
374	return cost1.cost == cost2.cost
375	&& cost1.complexity == cost2.complexity;
376	}
377
378	bool
379	operator<= (comp_cost cost1, comp_cost cost2)
380	{
381	return cost1 < cost2 \|\| cost1 == cost2;
382	}
383
384	struct iv_inv_expr_ent;
385
386	/ The candidate - cost pair. /
387	class cost_pair
388	{
389	public:
390	struct iv_cand cand; /* The candidate. /
391	comp_cost cost; / The cost. /
392	enum tree_code comp; / For iv elimination, the comparison. /
393	bitmap inv_vars; / The list of invariant ssa_vars that have to be*
394	preserved when representing iv_use with iv_cand. /*
395	bitmap inv_exprs; / The list of newly created invariant expressions*
396	when representing iv_use with iv_cand. /*
397	tree value; / For final value elimination, the expression for*
398	the final value of the iv. For iv elimination,
399	the new bound to compare with. /*
400	};
401
402	/ Use. /
403	struct iv_use
404	{
405	unsigned id; / The id of the use. /
406	unsigned group_id; / The group id the use belongs to. /
407	enum use_type type; / Type of the use. /
408	tree mem_type; / The memory type to use when testing whether an*
409	address is legitimate, and what the address's
410	cost is. /*
411	struct iv iv; /* The induction variable it is based on. /
412	gimple stmt; /* Statement in that it occurs. /
413	tree op_p; /* The place where it occurs. /
414
415	tree addr_base; / Base address with const offset stripped. /
416	poly_uint64 addr_offset;
417	/ Const offset stripped from base address. /
418	};
419
420	/ Group of uses. /
421	struct iv_group
422	{
423	/ The id of the group. /
424	unsigned id;
425	/ Uses of the group are of the same type. /
426	enum use_type type;
427	/ The set of "related" IV candidates, plus the important ones. /
428	bitmap related_cands;
429	/ Number of IV candidates in the cost_map. /
430	unsigned n_map_members;
431	/ The costs wrto the iv candidates. /
432	class cost_pair *cost_map;
433	/ The selected candidate for the group. /
434	struct iv_cand *selected;
435	/ To indicate this is a doloop use group. /
436	bool doloop_p;
437	/ Uses in the group. /
438	vec<struct iv_use *> vuses;
439	};
440
441	/ The position where the iv is computed. /
442	enum iv_position
443	{
444	IP_NORMAL, / At the end, just before the exit condition. /
445	IP_END, / At the end of the latch block. /
446	IP_BEFORE_USE, / Immediately before a specific use. /
447	IP_AFTER_USE, / Immediately after a specific use. /
448	IP_ORIGINAL / The original biv. /
449	};
450
451	/ The induction variable candidate. /
452	struct iv_cand
453	{
454	unsigned id; / The number of the candidate. /
455	bool important; / Whether this is an "important" candidate, i.e. such*
456	that it should be considered by all uses. /*
457	bool involves_undefs; / Whether the IV involves undefined values. /
458	ENUM_BITFIELD(iv_position) pos : `8`; / Where it is computed. /
459	gimple incremented_at;/* For original biv, the statement where it is*
460	incremented. /*
461	tree var_before; / The variable used for it before increment. /
462	tree var_after; / The variable used for it after increment. /
463	struct iv iv; /* The value of the candidate. NULL for*
464	"pseudocandidate" used to indicate the possibility
465	to replace the final value of an iv by direct
466	computation of the value. /*
467	unsigned cost; / Cost of the candidate. /
468	unsigned cost_step; / Cost of the candidate's increment operation. /
469	struct iv_use ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place*
470	where it is incremented. /*
471	bitmap inv_vars; / The list of invariant ssa_vars used in step of the*
472	iv_cand. /*
473	bitmap inv_exprs; / If step is more complicated than a single ssa_var,*
474	handle it as a new invariant expression which will
475	be hoisted out of loop. /*
476	struct iv orig_iv; /* The original iv if this cand is added from biv with*
477	smaller type. /*
478	bool doloop_p; / Whether this is a doloop candidate. /
479	};
480
481	/ Hashtable entry for common candidate derived from iv uses. /
482	class iv_common_cand
483	{
484	public:
485	tree base;
486	tree step;
487	/ IV uses from which this common candidate is derived. /
488	auto_vec<struct iv_use *> uses;
489	hashval_t hash;
490	};
491
492	/ Hashtable helpers. /
493
494	struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495	{
496	static inline hashval_t hash (const iv_common_cand *);
497	static inline bool equal (const iv_common_cand , const* iv_common_cand *);
498	};
499
500	/ Hash function for possible common candidates. /
501
502	inline hashval_t
503	iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504	{
505	return ccand->hash;
506	}
507
508	/ Hash table equality function for common candidates. /
509
510	inline bool
511	iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
512	const iv_common_cand *ccand2)
513	{
514	return (ccand1->hash == ccand2->hash
515	&& operand_equal_p (ccand1->base, ccand2->base, flags: `0`)
516	&& operand_equal_p (ccand1->step, ccand2->step, flags: `0`)
517	&& (TYPE_PRECISION (TREE_TYPE (ccand1->base))
518	== TYPE_PRECISION (TREE_TYPE (ccand2->base))));
519	}
520
521	/ Loop invariant expression hashtable entry. /
522
523	struct iv_inv_expr_ent
524	{
525	/ Tree expression of the entry. /
526	tree expr;
527	/ Unique indentifier. /
528	int id;
529	/ Hash value. /
530	hashval_t hash;
531	};
532
533	/ Sort iv_inv_expr_ent pair A and B by id field. /
534
535	static int
536	sort_iv_inv_expr_ent (const void a, const* void *b)
537	{
538	const iv_inv_expr_ent * const e1 = (const* iv_inv_expr_ent * const *) (a);
539	const iv_inv_expr_ent * const e2 = (const* iv_inv_expr_ent * const *) (b);
540
541	unsigned id1 = (*e1)->id;
542	unsigned id2 = (*e2)->id;
543
544	if (id1 < id2)
545	return -`1`;
546	else if (id1 > id2)
547	return `1`;
548	else
549	return `0`;
550	}
551
552	/ Hashtable helpers. /
553
554	struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555	{
556	static inline hashval_t hash (const iv_inv_expr_ent *);
557	static inline bool equal (const iv_inv_expr_ent , const* iv_inv_expr_ent *);
558	};
559
560	/ Return true if uses of type TYPE represent some form of address. /
561
562	inline bool
563	address_p (use_type type)
564	{
565	return type == USE_REF_ADDRESS \|\| type == USE_PTR_ADDRESS;
566	}
567
568	/ Hash function for loop invariant expressions. /
569
570	inline hashval_t
571	iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572	{
573	return expr->hash;
574	}
575
576	/ Hash table equality function for expressions. /
577
578	inline bool
579	iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
580	const iv_inv_expr_ent *expr2)
581	{
582	return expr1->hash == expr2->hash
583	&& operand_equal_p (expr1->expr, expr2->expr, flags: `0`);
584	}
585
586	struct ivopts_data
587	{
588	/ The currently optimized loop. /
589	class loop *current_loop;
590	location_t loop_loc;
591
592	/ Numbers of iterations for all exits of the current loop. /
593	hash_map<edge, tree_niter_desc > niters;
594
595	/ Number of registers used in it. /
596	unsigned regs_used;
597
598	/ The size of version_info array allocated. /
599	unsigned version_info_size;
600
601	/ The array of information for the ssa names. /
602	struct version_info *version_info;
603
604	/ The hashtable of loop invariant expressions created*
605	by ivopt. /*
606	hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607
608	/ The bitmap of indices in version_info whose value was changed. /
609	bitmap relevant;
610
611	/ The uses of induction variables. /
612	vec<iv_group *> vgroups;
613
614	/ The candidates. /
615	vec<iv_cand *> vcands;
616
617	/ A bitmap of important candidates. /
618	bitmap important_candidates;
619
620	/ Cache used by tree_to_aff_combination_expand. /
621	hash_map<tree, name_expansion > name_expansion_cache;
622
623	/ The hashtable of common candidates derived from iv uses. /
624	hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625
626	/ The common candidates. /
627	vec<iv_common_cand *> iv_common_cands;
628
629	/ Hash map recording base object information of tree exp. /
630	hash_map<tree, tree> *base_object_map;
631
632	/ The maximum invariant variable id. /
633	unsigned max_inv_var_id;
634
635	/ The maximum invariant expression id. /
636	unsigned max_inv_expr_id;
637
638	/ Number of no_overflow BIVs which are not used in memory address. /
639	unsigned bivs_not_used_in_addr;
640
641	/ Obstack for iv structure. /
642	struct obstack iv_obstack;
643
644	/ Whether to consider just related and important candidates when replacing a*
645	use. /*
646	bool consider_all_candidates;
647
648	/ Are we optimizing for speed? /
649	bool speed;
650
651	/ Whether the loop body includes any function calls. /
652	bool body_includes_call;
653
654	/ Whether the loop body can only be exited via single exit. /
655	bool loop_single_exit_p;
656
657	/ Whether the loop has doloop comparison use. /
658	bool doloop_use_p;
659	};
660
661	/ An assignment of iv candidates to uses. /
662
663	class iv_ca
664	{
665	public:
666	/ The number of uses covered by the assignment. /
667	unsigned upto;
668
669	/ Number of uses that cannot be expressed by the candidates in the set. /
670	unsigned bad_groups;
671
672	/ Candidate assigned to a use, together with the related costs. /
673	class cost_pair **cand_for_group;
674
675	/ Number of times each candidate is used. /
676	unsigned *n_cand_uses;
677
678	/ The candidates used. /
679	bitmap cands;
680
681	/ The number of candidates in the set. /
682	unsigned n_cands;
683
684	/ The number of invariants needed, including both invariant variants and*
685	invariant expressions. /*
686	unsigned n_invs;
687
688	/ Total cost of expressing uses. /
689	comp_cost cand_use_cost;
690
691	/ Total cost of candidates. /
692	int64_t cand_cost;
693
694	/ Number of times each invariant variable is used. /
695	unsigned *n_inv_var_uses;
696
697	/ Number of times each invariant expression is used. /
698	unsigned *n_inv_expr_uses;
699
700	/ Total cost of the assignment. /
701	comp_cost cost;
702	};
703
704	/ Difference of two iv candidate assignments. /
705
706	struct iv_ca_delta
707	{
708	/ Changed group. /
709	struct iv_group *group;
710
711	/ An old assignment (for rollback purposes). /
712	class cost_pair *old_cp;
713
714	/ A new assignment. /
715	class cost_pair *new_cp;
716
717	/ Next change in the list. /
718	struct iv_ca_delta *next;
719	};
720
721	/ Bound on number of candidates below that all candidates are considered. /
722
723	#define CONSIDER_ALL_CANDIDATES_BOUND \
724	((unsigned) param_iv_consider_all_candidates_bound)
725
726	/ If there are more iv occurrences, we just give up (it is quite unlikely that*
727	optimizing such a loop would help, and it would take ages). /*
728
729	#define MAX_CONSIDERED_GROUPS \
730	((unsigned) param_iv_max_considered_uses)
731
732	/ If there are at most this number of ivs in the set, try removing unnecessary*
733	ivs from the set always. /*
734
735	#define ALWAYS_PRUNE_CAND_SET_BOUND \
736	((unsigned) param_iv_always_prune_cand_set_bound)
737
738	/ The list of trees for that the decl_rtl field must be reset is stored*
739	here. /*
740
741	static vec<tree> decl_rtl_to_reset;
742
743	static comp_cost force_expr_to_var_cost (tree, bool);
744
745	/ The single loop exit if it dominates the latch, NULL otherwise. /
746
747	edge
748	single_dom_exit (class loop *loop)
749	{
750	edge exit = single_exit (loop);
751
752	if (!exit)
753	return NULL;
754
755	if (!just_once_each_iteration_p (loop, exit->src))
756	return NULL;
757
758	return exit;
759	}
760
761	/ Dumps information about the induction variable IV to FILE. Don't dump*
762	variable's name if DUMP_NAME is FALSE. The information is dumped with
763	preceding spaces indicated by INDENT_LEVEL. /*
764
765	void
766	dump_iv (FILE file, struct* iv iv, bool* dump_name, unsigned indent_level)
767	{
768	const char *p;
769	const char spaces[`9`] = {`' '`, `' '`, `' '`, `' '`, `' '`, `' '`, `' '`, `' '`, `'\0'`};
770
771	if (indent_level > `4`)
772	indent_level = `4`;
773	p = spaces + `8` - (indent_level << `1`);
774
775	fprintf (stream: file, format: "%sIV struct:\n", p);
776	if (iv->ssa_name && dump_name)
777	{
778	fprintf (stream: file, format: "%s SSA_NAME:\t", p);
779	print_generic_expr (file, iv->ssa_name, TDF_SLIM);
780	fprintf (stream: file, format: "\n");
781	}
782
783	fprintf (stream: file, format: "%s Type:\t", p);
784	print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
785	fprintf (stream: file, format: "\n");
786
787	fprintf (stream: file, format: "%s Base:\t", p);
788	print_generic_expr (file, iv->base, TDF_SLIM);
789	fprintf (stream: file, format: "\n");
790
791	fprintf (stream: file, format: "%s Step:\t", p);
792	print_generic_expr (file, iv->step, TDF_SLIM);
793	fprintf (stream: file, format: "\n");
794
795	if (iv->base_object)
796	{
797	fprintf (stream: file, format: "%s Object:\t", p);
798	print_generic_expr (file, iv->base_object, TDF_SLIM);
799	fprintf (stream: file, format: "\n");
800	}
801
802	fprintf (stream: file, format: "%s Biv:\t%c\n", p, iv->biv_p ? `'Y'` : `'N'`);
803
804	fprintf (stream: file, format: "%s Overflowness wrto loop niter:\t%s\n",
805	p, iv->no_overflow ? "No-overflow" : "Overflow");
806	}
807
808	/ Dumps information about the USE to FILE. /
809
810	void
811	dump_use (FILE file, struct* iv_use *use)
812	{
813	fprintf (stream: file, format: " Use %d.%d:\n", use->group_id, use->id);
814	fprintf (stream: file, format: " At stmt:\t");
815	print_gimple_stmt (file, use->stmt, `0`);
816	fprintf (stream: file, format: " At pos:\t");
817	if (use->op_p)
818	print_generic_expr (file, *use->op_p, TDF_SLIM);
819	fprintf (stream: file, format: "\n");
820	dump_iv (file, iv: use->iv, dump_name: false, indent_level: `2`);
821	}
822
823	/ Dumps information about the uses to FILE. /
824
825	void
826	dump_groups (FILE file, struct* ivopts_data *data)
827	{
828	unsigned i, j;
829	struct iv_group *group;
830
831	for (i = `0`; i < data->vgroups.length (); i++)
832	{
833	group = data->vgroups [i];
834	fprintf (stream: file, format: "Group %d:\n", group->id);
835	if (group->type == USE_NONLINEAR_EXPR)
836	fprintf (stream: file, format: " Type:\tGENERIC\n");
837	else if (group->type == USE_REF_ADDRESS)
838	fprintf (stream: file, format: " Type:\tREFERENCE ADDRESS\n");
839	else if (group->type == USE_PTR_ADDRESS)
840	fprintf (stream: file, format: " Type:\tPOINTER ARGUMENT ADDRESS\n");
841	else
842	{
843	gcc_assert (group->type == USE_COMPARE);
844	fprintf (stream: file, format: " Type:\tCOMPARE\n");
845	}
846	for (j = `0`; j < group->vuses.length (); j++)
847	dump_use (file, use: group->vuses [j]);
848	}
849	}
850
851	/ Dumps information about induction variable candidate CAND to FILE. /
852
853	void
854	dump_cand (FILE file, struct* iv_cand *cand)
855	{
856	struct iv *iv = cand->iv;
857
858	fprintf (stream: file, format: "Candidate %d:\n", cand->id);
859	if (cand->inv_vars)
860	{
861	fprintf (stream: file, format: " Depend on inv.vars: ");
862	dump_bitmap (file, map: cand->inv_vars);
863	}
864	if (cand->inv_exprs)
865	{
866	fprintf (stream: file, format: " Depend on inv.exprs: ");
867	dump_bitmap (file, map: cand->inv_exprs);
868	}
869
870	if (cand->var_before)
871	{
872	fprintf (stream: file, format: " Var befor: ");
873	print_generic_expr (file, cand->var_before, TDF_SLIM);
874	fprintf (stream: file, format: "\n");
875	}
876	if (cand->var_after)
877	{
878	fprintf (stream: file, format: " Var after: ");
879	print_generic_expr (file, cand->var_after, TDF_SLIM);
880	fprintf (stream: file, format: "\n");
881	}
882
883	switch (cand->pos)
884	{
885	case IP_NORMAL:
886	fprintf (stream: file, format: " Incr POS: before exit test\n");
887	break;
888
889	case IP_BEFORE_USE:
890	fprintf (stream: file, format: " Incr POS: before use %d\n", cand->ainc_use->id);
891	break;
892
893	case IP_AFTER_USE:
894	fprintf (stream: file, format: " Incr POS: after use %d\n", cand->ainc_use->id);
895	break;
896
897	case IP_END:
898	fprintf (stream: file, format: " Incr POS: at end\n");
899	break;
900
901	case IP_ORIGINAL:
902	fprintf (stream: file, format: " Incr POS: orig biv\n");
903	break;
904	}
905
906	dump_iv (file, iv, dump_name: false, indent_level: `1`);
907	}
908
909	/ Returns the info for ssa version VER. /
910
911	static inline struct version_info *
912	ver_info (struct ivopts_data data, unsigned* ver)
913	{
914	return data->version_info + ver;
915	}
916
917	/ Returns the info for ssa name NAME. /
918
919	static inline struct version_info *
920	name_info (struct ivopts_data *data, tree name)
921	{
922	return ver_info (data, SSA_NAME_VERSION (name));
923	}
924
925	/ Returns true if STMT is after the place where the IP_NORMAL ivs will be*
926	emitted in LOOP. /*
927
928	static bool
929	stmt_after_ip_normal_pos (class loop loop, gimple stmt)
930	{
931	basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (g: stmt);
932
933	gcc_assert (bb);
934
935	if (sbb == loop->latch)
936	return true;
937
938	if (sbb != bb)
939	return false;
940
941	return stmt == last_nondebug_stmt (bb);
942	}
943
944	/ Returns true if STMT if after the place where the original induction*
945	variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
946	if the positions are identical. /*
947
948	static bool
949	stmt_after_inc_pos (struct iv_cand cand, gimple stmt, bool true_if_equal)
950	{
951	basic_block cand_bb = gimple_bb (g: cand->incremented_at);
952	basic_block stmt_bb = gimple_bb (g: stmt);
953
954	if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
955	return false;
956
957	if (stmt_bb != cand_bb)
958	return true;
959
960	if (true_if_equal
961	&& gimple_uid (g: stmt) == gimple_uid (g: cand->incremented_at))
962	return true;
963	return gimple_uid (g: stmt) > gimple_uid (g: cand->incremented_at);
964	}
965
966	/ Returns true if STMT if after the place where the induction variable*
967	CAND is incremented in LOOP. /*
968
969	static bool
970	stmt_after_increment (class loop loop, struct* iv_cand cand, gimple stmt)
971	{
972	switch (cand->pos)
973	{
974	case IP_END:
975	return false;
976
977	case IP_NORMAL:
978	return stmt_after_ip_normal_pos (loop, stmt);
979
980	case IP_ORIGINAL:
981	case IP_AFTER_USE:
982	return stmt_after_inc_pos (cand, stmt, true_if_equal: false);
983
984	case IP_BEFORE_USE:
985	return stmt_after_inc_pos (cand, stmt, true_if_equal: true);
986
987	default:
988	gcc_unreachable ();
989	}
990	}
991
992	/ walk_tree callback for contains_abnormal_ssa_name_p. /
993
994	static tree
995	contains_abnormal_ssa_name_p_1 (tree tp, int* walk_subtrees, void* *)
996	{
997	if (TREE_CODE (*tp) == SSA_NAME
998	&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
999	return *tp;
1000
1001	if (!EXPR_P (*tp))
1002	*walk_subtrees = `0`;
1003
1004	return NULL_TREE;
1005	}
1006
1007	/ Returns true if EXPR contains a ssa name that occurs in an*
1008	abnormal phi node. /*
1009
1010	bool
1011	contains_abnormal_ssa_name_p (tree expr)
1012	{
1013	return walk_tree_without_duplicates
1014	(&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1015	}
1016
1017	/ Returns the structure describing number of iterations determined from*
1018	EXIT of DATA->current_loop, or NULL if something goes wrong. /*
1019
1020	static class tree_niter_desc *
1021	niter_for_exit (struct ivopts_data *data, edge exit)
1022	{
1023	class tree_niter_desc *desc;
1024	tree_niter_desc **slot;
1025
1026	if (!data->niters)
1027	{
1028	data->niters = new hash_map<edge, tree_niter_desc *>;
1029	slot = NULL;
1030	}
1031	else
1032	slot = data->niters->get (k: exit);
1033
1034	if (!slot)
1035	{
1036	/ Try to determine number of iterations. We cannot safely work with ssa*
1037	names that appear in phi nodes on abnormal edges, so that we do not
1038	create overlapping life ranges for them (PR 27283). /*
1039	desc = XNEW (class tree_niter_desc);
1040	::new (static_cast<void*> (desc)) tree_niter_desc ();
1041	if (!number_of_iterations_exit (data->current_loop,
1042	exit, niter: desc, true)
1043	\|\| contains_abnormal_ssa_name_p (expr: desc->niter))
1044	{
1045	desc->~tree_niter_desc ();
1046	XDELETE (desc);
1047	desc = NULL;
1048	}
1049	data->niters->put (k: exit, v: desc);
1050	}
1051	else
1052	desc = *slot;
1053
1054	return desc;
1055	}
1056
1057	/ Returns the structure describing number of iterations determined from*
1058	single dominating exit of DATA->current_loop, or NULL if something
1059	goes wrong. /*
1060
1061	static class tree_niter_desc *
1062	niter_for_single_dom_exit (struct ivopts_data *data)
1063	{
1064	edge exit = single_dom_exit (loop: data->current_loop);
1065
1066	if (!exit)
1067	return NULL;
1068
1069	return niter_for_exit (data, exit);
1070	}
1071
1072	/ Initializes data structures used by the iv optimization pass, stored*
1073	in DATA. /*
1074
1075	static void
1076	tree_ssa_iv_optimize_init (struct ivopts_data *data)
1077	{
1078	data->version_info_size = `2` * num_ssa_names;
1079	data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1080	data->relevant = BITMAP_ALLOC (NULL);
1081	data->important_candidates = BITMAP_ALLOC (NULL);
1082	data->max_inv_var_id = `0`;
1083	data->max_inv_expr_id = `0`;
1084	data->niters = NULL;
1085	data->vgroups.create (nelems: `20`);
1086	data->vcands.create (nelems: `20`);
1087	data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (`10`);
1088	data->name_expansion_cache = NULL;
1089	data->base_object_map = NULL;
1090	data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (`10`);
1091	data->iv_common_cands.create (nelems: `20`);
1092	decl_rtl_to_reset.create (nelems: `20`);
1093	gcc_obstack_init (&data->iv_obstack);
1094	}
1095
1096	/ walk_tree callback for determine_base_object. /
1097
1098	static tree
1099	determine_base_object_1 (tree tp, int* walk_subtrees, void* *wdata)
1100	{
1101	tree_code code = TREE_CODE (*tp);
1102	tree obj = NULL_TREE;
1103	if (code == ADDR_EXPR)
1104	{
1105	tree base = get_base_address (TREE_OPERAND (*tp, `0`));
1106	if (!base)
1107	obj = *tp;
1108	else if (TREE_CODE (base) != MEM_REF)
1109	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1110	}
1111	else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1112	obj = fold_convert (ptr_type_node, *tp);
1113
1114	if (!obj)
1115	{
1116	if (!EXPR_P (*tp))
1117	*walk_subtrees = `0`;
1118
1119	return NULL_TREE;
1120	}
1121	/ Record special node for multiple base objects and stop. /
1122	if (*static_cast<tree *> (wdata))
1123	{
1124	*static_cast<tree *> (wdata) = integer_zero_node;
1125	return integer_zero_node;
1126	}
1127	/ Record the base object and continue looking. /
1128	*static_cast<tree *> (wdata) = obj;
1129	return NULL_TREE;
1130	}
1131
1132	/ Returns a memory object to that EXPR points with caching. Return NULL if we*
1133	are able to determine that it does not point to any such object; specially
1134	return integer_zero_node if EXPR contains multiple base objects. /*
1135
1136	static tree
1137	determine_base_object (struct ivopts_data *data, tree expr)
1138	{
1139	tree *slot, obj = NULL_TREE;
1140	if (data->base_object_map)
1141	{
1142	if ((slot = data->base_object_map->get(k: expr)) != NULL)
1143	return *slot;
1144	}
1145	else
1146	data->base_object_map = new hash_map<tree, tree>;
1147
1148	(void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1149	data->base_object_map->put (k: expr, v: obj);
1150	return obj;
1151	}
1152
1153	/ Allocates an induction variable with given initial value BASE and step STEP*
1154	for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. /*
1155
1156	static struct iv *
1157	alloc_iv (struct ivopts_data *data, tree base, tree step,
1158	bool no_overflow = false)
1159	{
1160	tree expr = base;
1161	struct iv iv = (struct* iv*) obstack_alloc (&data->iv_obstack,
1162	sizeof (struct iv));
1163	gcc_assert (step != NULL_TREE);
1164
1165	/ Canonicalize the address expression in base if it were an unsigned*
1166	computation. That leads to more equalities being detected and results in:
1167
1168	1) More accurate cost can be computed for address expressions;
1169	2) Duplicate candidates won't be created for bases in different
1170	forms, like &a[0] and &a.
1171	3) Duplicate candidates won't be created for IV expressions that differ
1172	only in their sign. /*
1173	aff_tree comb;
1174	STRIP_NOPS (expr);
1175	expr = fold_convert (unsigned_type_for (TREE_TYPE (expr)), expr);
1176	tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1177	base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178
1179	iv->base = base;
1180	iv->base_object = determine_base_object (data, expr: base);
1181	iv->step = step;
1182	iv->biv_p = false;
1183	iv->nonlin_use = NULL;
1184	iv->ssa_name = NULL_TREE;
1185	if (!no_overflow
1186	&& !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1187	base, step))
1188	no_overflow = true;
1189	iv->no_overflow = no_overflow;
1190	iv->have_address_use = false;
1191
1192	return iv;
1193	}
1194
1195	/ Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV*
1196	doesn't overflow. /*
1197
1198	static void
1199	set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1200	bool no_overflow)
1201	{
1202	struct version_info *info = name_info (data, name: iv);
1203
1204	gcc_assert (!info->iv);
1205
1206	bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1207	info->iv = alloc_iv (data, base, step, no_overflow);
1208	info->iv->ssa_name = iv;
1209	}
1210
1211	/ Finds induction variable declaration for VAR. /
1212
1213	static struct iv *
1214	get_iv (struct ivopts_data *data, tree var)
1215	{
1216	basic_block bb;
1217	tree type = TREE_TYPE (var);
1218
1219	if (!POINTER_TYPE_P (type)
1220	&& !INTEGRAL_TYPE_P (type))
1221	return NULL;
1222
1223	if (!name_info (data, name: var)->iv)
1224	{
1225	bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1226
1227	if (!bb
1228	\|\| !flow_bb_inside_loop_p (data->current_loop, bb))
1229	{
1230	if (POINTER_TYPE_P (type))
1231	type = sizetype;
1232	set_iv (data, iv: var, base: var, step: build_int_cst (type, `0`), no_overflow: true);
1233	}
1234	}
1235
1236	return name_info (data, name: var)->iv;
1237	}
1238
1239	/ Return the first non-invariant ssa var found in EXPR. /
1240
1241	static tree
1242	extract_single_var_from_expr (tree expr)
1243	{
1244	int i, n;
1245	tree tmp;
1246	enum tree_code code;
1247
1248	if (!expr \|\| is_gimple_min_invariant (expr))
1249	return NULL;
1250
1251	code = TREE_CODE (expr);
1252	if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1253	{
1254	n = TREE_OPERAND_LENGTH (expr);
1255	for (i = `0`; i < n; i++)
1256	{
1257	tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1258
1259	if (tmp)
1260	return tmp;
1261	}
1262	}
1263	return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1264	}
1265
1266	/ Finds basic ivs. /
1267
1268	static bool
1269	find_bivs (struct ivopts_data *data)
1270	{
1271	gphi *phi;
1272	affine_iv iv;
1273	tree step, type, base, stop;
1274	bool found = false;
1275	class loop *loop = data->current_loop;
1276	gphi_iterator psi;
1277
1278	for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
1279	{
1280	phi = psi.phi ();
1281
1282	if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1283	continue;
1284
1285	if (virtual_operand_p (PHI_RESULT (phi)))
1286	continue;
1287
1288	if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1289	continue;
1290
1291	if (integer_zerop (iv.step))
1292	continue;
1293
1294	step = iv.step;
1295	base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1296	/ Stop expanding iv base at the first ssa var referred by iv step.*
1297	Ideally we should stop at any ssa var, because that's expensive
1298	and unusual to happen, we just do it on the first one.
1299
1300	See PR64705 for the rationale. /*
1301	stop = extract_single_var_from_expr (expr: step);
1302	base = expand_simple_operations (base, stop);
1303	if (contains_abnormal_ssa_name_p (expr: base)
1304	\|\| contains_abnormal_ssa_name_p (expr: step))
1305	continue;
1306
1307	type = TREE_TYPE (PHI_RESULT (phi));
1308	base = fold_convert (type, base);
1309	if (step)
1310	{
1311	if (POINTER_TYPE_P (type))
1312	step = convert_to_ptrofftype (step);
1313	else
1314	step = fold_convert (type, step);
1315	}
1316
1317	set_iv (data, PHI_RESULT (phi), base, step, no_overflow: iv.no_overflow);
1318	found = true;
1319	}
1320
1321	return found;
1322	}
1323
1324	/ Marks basic ivs. /
1325
1326	static void
1327	mark_bivs (struct ivopts_data *data)
1328	{
1329	gphi *phi;
1330	gimple *def;
1331	tree var;
1332	struct iv iv, incr_iv;
1333	class loop *loop = data->current_loop;
1334	basic_block incr_bb;
1335	gphi_iterator psi;
1336
1337	data->bivs_not_used_in_addr = `0`;
1338	for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
1339	{
1340	phi = psi.phi ();
1341
1342	iv = get_iv (data, PHI_RESULT (phi));
1343	if (!iv)
1344	continue;
1345
1346	var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1347	def = SSA_NAME_DEF_STMT (var);
1348	/ Don't mark iv peeled from other one as biv. /
1349	if (def
1350	&& gimple_code (g: def) == GIMPLE_PHI
1351	&& gimple_bb (g: def) == loop->header)
1352	continue;
1353
1354	incr_iv = get_iv (data, var);
1355	if (!incr_iv)
1356	continue;
1357
1358	/ If the increment is in the subloop, ignore it. /
1359	incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1360	if (incr_bb->loop_father != data->current_loop
1361	\|\| (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1362	continue;
1363
1364	iv->biv_p = true;
1365	incr_iv->biv_p = true;
1366	if (iv->no_overflow)
1367	data->bivs_not_used_in_addr++;
1368	if (incr_iv->no_overflow)
1369	data->bivs_not_used_in_addr++;
1370	}
1371	}
1372
1373	/ Checks whether STMT defines a linear induction variable and stores its*
1374	parameters to IV. /*
1375
1376	static bool
1377	find_givs_in_stmt_scev (struct ivopts_data data, gimple stmt, affine_iv *iv)
1378	{
1379	tree lhs, stop;
1380	class loop *loop = data->current_loop;
1381
1382	iv->base = NULL_TREE;
1383	iv->step = NULL_TREE;
1384
1385	if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1386	return false;
1387
1388	lhs = gimple_assign_lhs (gs: stmt);
1389	if (TREE_CODE (lhs) != SSA_NAME)
1390	return false;
1391
1392	if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1393	return false;
1394
1395	/ Stop expanding iv base at the first ssa var referred by iv step.*
1396	Ideally we should stop at any ssa var, because that's expensive
1397	and unusual to happen, we just do it on the first one.
1398
1399	See PR64705 for the rationale. /*
1400	stop = extract_single_var_from_expr (expr: iv->step);
1401	iv->base = expand_simple_operations (iv->base, stop);
1402	if (contains_abnormal_ssa_name_p (expr: iv->base)
1403	\|\| contains_abnormal_ssa_name_p (expr: iv->step))
1404	return false;
1405
1406	/ If STMT could throw, then do not consider STMT as defining a GIV.*
1407	While this will suppress optimizations, we cannot safely delete this
1408	GIV and associated statements, even if it appears it is not used. /*
1409	if (stmt_could_throw_p (cfun, stmt))
1410	return false;
1411
1412	return true;
1413	}
1414
1415	/ Finds general ivs in statement STMT. /
1416
1417	static void
1418	find_givs_in_stmt (struct ivopts_data data, gimple stmt)
1419	{
1420	affine_iv iv;
1421
1422	if (!find_givs_in_stmt_scev (data, stmt, iv: &iv))
1423	return;
1424
1425	set_iv (data, iv: gimple_assign_lhs (gs: stmt), base: iv.base, step: iv.step, no_overflow: iv.no_overflow);
1426	}
1427
1428	/ Finds general ivs in basic block BB. /
1429
1430	static void
1431	find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1432	{
1433	gimple_stmt_iterator bsi;
1434
1435	for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
1436	if (!is_gimple_debug (gs: gsi_stmt (i: bsi)))
1437	find_givs_in_stmt (data, stmt: gsi_stmt (i: bsi));
1438	}
1439
1440	/ Finds general ivs. /
1441
1442	static void
1443	find_givs (struct ivopts_data data, basic_block body)
1444	{
1445	class loop *loop = data->current_loop;
1446	unsigned i;
1447
1448	for (i = `0`; i < loop->num_nodes; i++)
1449	find_givs_in_bb (data, bb: body[i]);
1450	}
1451
1452	/ For each ssa name defined in LOOP determines whether it is an induction*
1453	variable and if so, its initial value and step. /*
1454
1455	static bool
1456	find_induction_variables (struct ivopts_data data, basic_block body)
1457	{
1458	unsigned i;
1459	bitmap_iterator bi;
1460
1461	if (!find_bivs (data))
1462	return false;
1463
1464	find_givs (data, body);
1465	mark_bivs (data);
1466
1467	if (dump_file && (dump_flags & TDF_DETAILS))
1468	{
1469	class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1470
1471	if (niter)
1472	{
1473	fprintf (stream: dump_file, format: " number of iterations ");
1474	print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1475	if (!integer_zerop (niter->may_be_zero))
1476	{
1477	fprintf (stream: dump_file, format: "; zero if ");
1478	print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1479	}
1480	fprintf (stream: dump_file, format: "\n");
1481	};
1482
1483	fprintf (stream: dump_file, format: "\n<Induction Vars>:\n");
1484	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
1485	{
1486	struct version_info *info = ver_info (data, ver: i);
1487	if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1488	dump_iv (file: dump_file, iv: ver_info (data, ver: i)->iv, dump_name: true, indent_level: `0`);
1489	}
1490	}
1491
1492	return true;
1493	}
1494
1495	/ Records a use of TYPE at USE_P in STMT whose value is IV in GROUP.
1496	For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1497	is the const offset stripped from IV base and MEM_TYPE is the type
1498	of the memory being addressed. For uses of other types, ADDR_BASE
1499	and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. /*
1500
1501	static struct iv_use *
1502	record_use (struct iv_group group, tree use_p, struct iv *iv,
1503	gimple stmt, enum* use_type type, tree mem_type,
1504	tree addr_base, poly_uint64 addr_offset)
1505	{
1506	struct iv_use use = XCNEW (struct* iv_use);
1507
1508	use->id = group->vuses.length ();
1509	use->group_id = group->id;
1510	use->type = type;
1511	use->mem_type = mem_type;
1512	use->iv = iv;
1513	use->stmt = stmt;
1514	use->op_p = use_p;
1515	use->addr_base = addr_base;
1516	use->addr_offset = addr_offset;
1517
1518	group->vuses.safe_push (obj: use);
1519	return use;
1520	}
1521
1522	/ Checks whether OP is a loop-level invariant and if so, records it.*
1523	NONLINEAR_USE is true if the invariant is used in a way we do not
1524	handle specially. /*
1525
1526	static void
1527	record_invariant (struct ivopts_data data, tree op, bool* nonlinear_use)
1528	{
1529	basic_block bb;
1530	struct version_info *info;
1531
1532	if (TREE_CODE (op) != SSA_NAME
1533	\|\| virtual_operand_p (op))
1534	return;
1535
1536	bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1537	if (bb
1538	&& flow_bb_inside_loop_p (data->current_loop, bb))
1539	return;
1540
1541	info = name_info (data, name: op);
1542	info->name = op;
1543	info->has_nonlin_use \|= nonlinear_use;
1544	if (!info->inv_id)
1545	info->inv_id = ++data->max_inv_var_id;
1546	bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1547	}
1548
1549	/ Record a group of TYPE. /
1550
1551	static struct iv_group *
1552	record_group (struct ivopts_data data, enum* use_type type)
1553	{
1554	struct iv_group group = XCNEW (struct* iv_group);
1555
1556	group->id = data->vgroups.length ();
1557	group->type = type;
1558	group->related_cands = BITMAP_ALLOC (NULL);
1559	group->vuses.create (nelems: `1`);
1560	group->doloop_p = false;
1561
1562	data->vgroups.safe_push (obj: group);
1563	return group;
1564	}
1565
1566	/ Record a use of TYPE at USE_P in STMT whose value is IV in a group.
1567	New group will be created if there is no existing group for the use.
1568	MEM_TYPE is the type of memory being addressed, or NULL if this
1569	isn't an address reference. /*
1570
1571	static struct iv_use *
1572	record_group_use (struct ivopts_data data, tree use_p,
1573	struct iv iv, gimple stmt, enum use_type type,
1574	tree mem_type)
1575	{
1576	tree addr_base = NULL;
1577	struct iv_group *group = NULL;
1578	poly_uint64 addr_offset = `0`;
1579
1580	/ Record non address type use in a new group. /
1581	if (address_p (type))
1582	{
1583	unsigned int i;
1584
1585	gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1586	tree addr_toffset;
1587	split_constant_offset (iv->base, &addr_base, &addr_toffset);
1588	addr_offset = int_cst_value (addr_toffset);
1589	for (i = `0`; i < data->vgroups.length (); i++)
1590	{
1591	struct iv_use *use;
1592
1593	group = data->vgroups [i];
1594	use = group->vuses [`0`];
1595	if (!address_p (type: use->type))
1596	continue;
1597
1598	/ Check if it has the same stripped base and step. /
1599	if (operand_equal_p (iv->base_object, use->iv->base_object, flags: `0`)
1600	&& operand_equal_p (iv->step, use->iv->step, flags: OEP_ASSUME_WRAPV)
1601	&& operand_equal_p (addr_base, use->addr_base, flags: OEP_ASSUME_WRAPV))
1602	break;
1603	}
1604	if (i == data->vgroups.length ())
1605	group = NULL;
1606	}
1607
1608	if (!group)
1609	group = record_group (data, type);
1610
1611	return record_use (group, use_p, iv, stmt, type, mem_type,
1612	addr_base, addr_offset);
1613	}
1614
1615	/ Checks whether the use OP is interesting and if so, records it. /
1616
1617	static struct iv_use *
1618	find_interesting_uses_op (struct ivopts_data *data, tree op)
1619	{
1620	struct iv *iv;
1621	gimple *stmt;
1622	struct iv_use *use;
1623
1624	if (TREE_CODE (op) != SSA_NAME)
1625	return NULL;
1626
1627	iv = get_iv (data, var: op);
1628	if (!iv)
1629	return NULL;
1630
1631	if (iv->nonlin_use)
1632	{
1633	gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1634	return iv->nonlin_use;
1635	}
1636
1637	if (integer_zerop (iv->step))
1638	{
1639	record_invariant (data, op, nonlinear_use: true);
1640	return NULL;
1641	}
1642
1643	stmt = SSA_NAME_DEF_STMT (op);
1644	gcc_assert (gimple_code (stmt) == GIMPLE_PHI \|\| is_gimple_assign (stmt));
1645
1646	use = record_group_use (data, NULL, iv, stmt, type: USE_NONLINEAR_EXPR, NULL_TREE);
1647	iv->nonlin_use = use;
1648	return use;
1649	}
1650
1651	/ Indicate how compare type iv_use can be handled. /
1652	enum comp_iv_rewrite
1653	{
1654	COMP_IV_NA,
1655	/ We may rewrite compare type iv_use by expressing value of the iv_use. /
1656	COMP_IV_EXPR,
1657	/ We may rewrite compare type iv_uses on both sides of comparison by*
1658	expressing value of each iv_use. /*
1659	COMP_IV_EXPR_2,
1660	/ We may rewrite compare type iv_use by expressing value of the iv_use*
1661	or by eliminating it with other iv_cand. /*
1662	COMP_IV_ELIM
1663	};
1664
1665	/ Given a condition in statement STMT, checks whether it is a compare*
1666	of an induction variable and an invariant. If this is the case,
1667	CONTROL_VAR is set to location of the iv, BOUND to the location of
1668	the invariant, IV_VAR and IV_BOUND are set to the corresponding
1669	induction variable descriptions, and true is returned. If this is not
1670	the case, CONTROL_VAR and BOUND are set to the arguments of the
1671	condition and false is returned. /*
1672
1673	static enum comp_iv_rewrite
1674	extract_cond_operands (struct ivopts_data data, gimple stmt,
1675	tree control_var, tree bound,
1676	struct iv iv_var, struct iv iv_bound)
1677	{
1678	/ The objects returned when COND has constant operands. /
1679	static struct iv const_iv;
1680	static tree zero;
1681	tree op0 = &zero, op1 = &zero;
1682	struct iv iv0 = &const_iv, iv1 = &const_iv;
1683	enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1684
1685	if (gimple_code (g: stmt) == GIMPLE_COND)
1686	{
1687	gcond cond_stmt = as_a <gcond > (p: stmt);
1688	op0 = gimple_cond_lhs_ptr (gs: cond_stmt);
1689	op1 = gimple_cond_rhs_ptr (gs: cond_stmt);
1690	}
1691	else
1692	{
1693	op0 = gimple_assign_rhs1_ptr (gs: stmt);
1694	op1 = gimple_assign_rhs2_ptr (gs: stmt);
1695	}
1696
1697	zero = integer_zero_node;
1698	const_iv.step = integer_zero_node;
1699
1700	if (TREE_CODE (*op0) == SSA_NAME)
1701	iv0 = get_iv (data, var: *op0);
1702	if (TREE_CODE (*op1) == SSA_NAME)
1703	iv1 = get_iv (data, var: *op1);
1704
1705	/ If both sides of comparison are IVs. We can express ivs on both end. /
1706	if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1707	{
1708	rewrite_type = COMP_IV_EXPR_2;
1709	goto end;
1710	}
1711
1712	/ If none side of comparison is IV. /
1713	if ((!iv0 \|\| integer_zerop (iv0->step))
1714	&& (!iv1 \|\| integer_zerop (iv1->step)))
1715	goto end;
1716
1717	/ Control variable may be on the other side. /
1718	if (!iv0 \|\| integer_zerop (iv0->step))
1719	{
1720	std::swap (a&: op0, b&: op1);
1721	std::swap (a&: iv0, b&: iv1);
1722	}
1723	/ If one side is IV and the other side isn't loop invariant. /
1724	if (!iv1)
1725	rewrite_type = COMP_IV_EXPR;
1726	/ If one side is IV and the other side is loop invariant. /
1727	else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1728	rewrite_type = COMP_IV_ELIM;
1729
1730	end:
1731	if (control_var)
1732	*control_var = op0;
1733	if (iv_var)
1734	*iv_var = iv0;
1735	if (bound)
1736	*bound = op1;
1737	if (iv_bound)
1738	*iv_bound = iv1;
1739
1740	return rewrite_type;
1741	}
1742
1743	/ Checks whether the condition in STMT is interesting and if so,*
1744	records it. /*
1745
1746	static void
1747	find_interesting_uses_cond (struct ivopts_data data, gimple stmt)
1748	{
1749	tree var_p, bound_p;
1750	struct iv var_iv, bound_iv;
1751	enum comp_iv_rewrite ret;
1752
1753	ret = extract_cond_operands (data, stmt,
1754	control_var: &var_p, bound: &bound_p, iv_var: &var_iv, iv_bound: &bound_iv);
1755	if (ret == COMP_IV_NA)
1756	{
1757	find_interesting_uses_op (data, op: *var_p);
1758	find_interesting_uses_op (data, op: *bound_p);
1759	return;
1760	}
1761
1762	record_group_use (data, use_p: var_p, iv: var_iv, stmt, type: USE_COMPARE, NULL_TREE);
1763	/ Record compare type iv_use for iv on the other side of comparison. /
1764	if (ret == COMP_IV_EXPR_2)
1765	record_group_use (data, use_p: bound_p, iv: bound_iv, stmt, type: USE_COMPARE, NULL_TREE);
1766	}
1767
1768	/ Returns the outermost loop EXPR is obviously invariant in*
1769	relative to the loop LOOP, i.e. if all its operands are defined
1770	outside of the returned loop. Returns NULL if EXPR is not
1771	even obviously invariant in LOOP. /*
1772
1773	class loop *
1774	outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1775	{
1776	basic_block def_bb;
1777	unsigned i, len;
1778
1779	if (is_gimple_min_invariant (expr))
1780	return current_loops->tree_root;
1781
1782	if (TREE_CODE (expr) == SSA_NAME)
1783	{
1784	def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1785	if (def_bb)
1786	{
1787	if (flow_bb_inside_loop_p (loop, def_bb))
1788	return NULL;
1789	return superloop_at_depth (loop,
1790	loop_depth (loop: def_bb->loop_father) + `1`);
1791	}
1792
1793	return current_loops->tree_root;
1794	}
1795
1796	if (!EXPR_P (expr))
1797	return NULL;
1798
1799	unsigned maxdepth = `0`;
1800	len = TREE_OPERAND_LENGTH (expr);
1801	for (i = `0`; i < len; i++)
1802	{
1803	class loop *ivloop;
1804	if (!TREE_OPERAND (expr, i))
1805	continue;
1806
1807	ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1808	if (!ivloop)
1809	return NULL;
1810	maxdepth = MAX (maxdepth, loop_depth (ivloop));
1811	}
1812
1813	return superloop_at_depth (loop, maxdepth);
1814	}
1815
1816	/ Returns true if expression EXPR is obviously invariant in LOOP,*
1817	i.e. if all its operands are defined outside of the LOOP. LOOP
1818	should not be the function body. /*
1819
1820	bool
1821	expr_invariant_in_loop_p (class loop *loop, tree expr)
1822	{
1823	basic_block def_bb;
1824	unsigned i, len;
1825
1826	gcc_assert (loop_depth (loop) > `0`);
1827
1828	if (is_gimple_min_invariant (expr))
1829	return true;
1830
1831	if (TREE_CODE (expr) == SSA_NAME)
1832	{
1833	def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1834	if (def_bb
1835	&& flow_bb_inside_loop_p (loop, def_bb))
1836	return false;
1837
1838	return true;
1839	}
1840
1841	if (!EXPR_P (expr))
1842	return false;
1843
1844	len = TREE_OPERAND_LENGTH (expr);
1845	for (i = `0`; i < len; i++)
1846	if (TREE_OPERAND (expr, i)
1847	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1848	return false;
1849
1850	return true;
1851	}
1852
1853	/ Given expression EXPR which computes inductive values with respect*
1854	to loop recorded in DATA, this function returns biv from which EXPR
1855	is derived by tracing definition chains of ssa variables in EXPR. /*
1856
1857	static struct iv*
1858	find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1859	{
1860	struct iv *iv;
1861	unsigned i, n;
1862	tree e2, e1;
1863	enum tree_code code;
1864	gimple *stmt;
1865
1866	if (expr == NULL_TREE)
1867	return NULL;
1868
1869	if (is_gimple_min_invariant (expr))
1870	return NULL;
1871
1872	code = TREE_CODE (expr);
1873	if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1874	{
1875	n = TREE_OPERAND_LENGTH (expr);
1876	for (i = `0`; i < n; i++)
1877	{
1878	iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1879	if (iv)
1880	return iv;
1881	}
1882	}
1883
1884	/ Stop if it's not ssa name. /
1885	if (code != SSA_NAME)
1886	return NULL;
1887
1888	iv = get_iv (data, var: expr);
1889	if (!iv \|\| integer_zerop (iv->step))
1890	return NULL;
1891	else if (iv->biv_p)
1892	return iv;
1893
1894	stmt = SSA_NAME_DEF_STMT (expr);
1895	if (gphi phi = dyn_cast <gphi > (p: stmt))
1896	{
1897	ssa_op_iter iter;
1898	use_operand_p use_p;
1899	basic_block phi_bb = gimple_bb (g: phi);
1900
1901	/ Skip loop header PHI that doesn't define biv. /
1902	if (phi_bb->loop_father == data->current_loop)
1903	return NULL;
1904
1905	if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
1906	return NULL;
1907
1908	FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1909	{
1910	tree use = USE_FROM_PTR (use_p);
1911	iv = find_deriving_biv_for_expr (data, expr: use);
1912	if (iv)
1913	return iv;
1914	}
1915	return NULL;
1916	}
1917	if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1918	return NULL;
1919
1920	e1 = gimple_assign_rhs1 (gs: stmt);
1921	code = gimple_assign_rhs_code (gs: stmt);
1922	if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1923	return find_deriving_biv_for_expr (data, expr: e1);
1924
1925	switch (code)
1926	{
1927	case MULT_EXPR:
1928	case PLUS_EXPR:
1929	case MINUS_EXPR:
1930	case POINTER_PLUS_EXPR:
1931	/ Increments, decrements and multiplications by a constant*
1932	are simple. /*
1933	e2 = gimple_assign_rhs2 (gs: stmt);
1934	iv = find_deriving_biv_for_expr (data, expr: e2);
1935	if (iv)
1936	return iv;
1937	gcc_fallthrough ();
1938
1939	CASE_CONVERT:
1940	/ Casts are simple. /
1941	return find_deriving_biv_for_expr (data, expr: e1);
1942
1943	default:
1944	break;
1945	}
1946
1947	return NULL;
1948	}
1949
1950	/ Record BIV, its predecessor and successor that they are used in*
1951	address type uses. /*
1952
1953	static void
1954	record_biv_for_address_use (struct ivopts_data data, struct* iv *biv)
1955	{
1956	unsigned i;
1957	tree type, base_1, base_2;
1958	bitmap_iterator bi;
1959
1960	if (!biv \|\| !biv->biv_p \|\| integer_zerop (biv->step)
1961	\|\| biv->have_address_use \|\| !biv->no_overflow)
1962	return;
1963
1964	type = TREE_TYPE (biv->base);
1965	if (!INTEGRAL_TYPE_P (type))
1966	return;
1967
1968	biv->have_address_use = true;
1969	data->bivs_not_used_in_addr--;
1970	base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1971	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
1972	{
1973	struct iv *iv = ver_info (data, ver: i)->iv;
1974
1975	if (!iv \|\| !iv->biv_p \|\| integer_zerop (iv->step)
1976	\|\| iv->have_address_use \|\| !iv->no_overflow)
1977	continue;
1978
1979	if (type != TREE_TYPE (iv->base)
1980	\|\| !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1981	continue;
1982
1983	if (!operand_equal_p (biv->step, iv->step, flags: `0`))
1984	continue;
1985
1986	base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1987	if (operand_equal_p (base_1, iv->base, flags: `0`)
1988	\|\| operand_equal_p (base_2, biv->base, flags: `0`))
1989	{
1990	iv->have_address_use = true;
1991	data->bivs_not_used_in_addr--;
1992	}
1993	}
1994	}
1995
1996	/ Cumulates the steps of indices into DATA and replaces their values with the*
1997	initial ones. Returns false when the value of the index cannot be determined.
1998	Callback for for_each_index. /*
1999
2000	struct ifs_ivopts_data
2001	{
2002	struct ivopts_data *ivopts_data;
2003	gimple *stmt;
2004	tree step;
2005	};
2006
2007	static bool
2008	idx_find_step (tree base, tree idx, void* *data)
2009	{
2010	struct ifs_ivopts_data dta = (struct* ifs_ivopts_data *) data;
2011	struct iv *iv;
2012	bool use_overflow_semantics = false;
2013	tree step, iv_base, iv_step, lbound, off;
2014	class loop *loop = dta->ivopts_data->current_loop;
2015
2016	/ If base is a component ref, require that the offset of the reference*
2017	be invariant. /*
2018	if (TREE_CODE (base) == COMPONENT_REF)
2019	{
2020	off = component_ref_field_offset (base);
2021	return expr_invariant_in_loop_p (loop, expr: off);
2022	}
2023
2024	/ If base is array, first check whether we will be able to move the*
2025	reference out of the loop (in order to take its address in strength
2026	reduction). In order for this to work we need both lower bound
2027	and step to be loop invariants. /*
2028	if (TREE_CODE (base) == ARRAY_REF \|\| TREE_CODE (base) == ARRAY_RANGE_REF)
2029	{
2030	/ Moreover, for a range, the size needs to be invariant as well. /
2031	if (TREE_CODE (base) == ARRAY_RANGE_REF
2032	&& !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2033	return false;
2034
2035	step = array_ref_element_size (base);
2036	lbound = array_ref_low_bound (base);
2037
2038	if (!expr_invariant_in_loop_p (loop, expr: step)
2039	\|\| !expr_invariant_in_loop_p (loop, expr: lbound))
2040	return false;
2041	}
2042
2043	if (TREE_CODE (*idx) != SSA_NAME)
2044	return true;
2045
2046	iv = get_iv (data: dta->ivopts_data, var: *idx);
2047	if (!iv)
2048	return false;
2049
2050	/ XXX We produce for a base of D42 with iv->base being &x[0]
2051	*&x[0], which is not folded and does not trigger the
2052	ARRAY_REF path below. /*
2053	*idx = iv->base;
2054
2055	if (integer_zerop (iv->step))
2056	return true;
2057
2058	if (TREE_CODE (base) == ARRAY_REF \|\| TREE_CODE (base) == ARRAY_RANGE_REF)
2059	{
2060	step = array_ref_element_size (base);
2061
2062	/ We only handle addresses whose step is an integer constant. /
2063	if (TREE_CODE (step) != INTEGER_CST)
2064	return false;
2065	}
2066	else
2067	/ The step for pointer arithmetics already is 1 byte. /
2068	step = size_one_node;
2069
2070	iv_base = iv->base;
2071	iv_step = iv->step;
2072	if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2073	use_overflow_semantics = true;
2074
2075	if (!convert_affine_scev (dta->ivopts_data->current_loop,
2076	sizetype, &iv_base, &iv_step, dta->stmt,
2077	use_overflow_semantics))
2078	{
2079	/ The index might wrap. /
2080	return false;
2081	}
2082
2083	step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2084	dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2085
2086	if (dta->ivopts_data->bivs_not_used_in_addr)
2087	{
2088	if (!iv->biv_p)
2089	iv = find_deriving_biv_for_expr (data: dta->ivopts_data, expr: iv->ssa_name);
2090
2091	record_biv_for_address_use (data: dta->ivopts_data, biv: iv);
2092	}
2093	return true;
2094	}
2095
2096	/ Records use in index IDX. Callback for for_each_index. Ivopts data*
2097	object is passed to it in DATA. /*
2098
2099	static bool
2100	idx_record_use (tree base, tree *idx,
2101	void *vdata)
2102	{
2103	struct ivopts_data data = (struct* ivopts_data *) vdata;
2104	find_interesting_uses_op (data, op: *idx);
2105	if (TREE_CODE (base) == ARRAY_REF \|\| TREE_CODE (base) == ARRAY_RANGE_REF)
2106	{
2107	if (TREE_OPERAND (base, `2`))
2108	find_interesting_uses_op (data, TREE_OPERAND (base, `2`));
2109	if (TREE_OPERAND (base, `3`))
2110	find_interesting_uses_op (data, TREE_OPERAND (base, `3`));
2111	}
2112	return true;
2113	}
2114
2115	/ If we can prove that TOP = cst * BOT for some constant cst,*
2116	store cst to MUL and return true. Otherwise return false.
2117	The returned value is always sign-extended, regardless of the
2118	signedness of TOP and BOT. /*
2119
2120	static bool
2121	constant_multiple_of (tree top, tree bot, widest_int *mul,
2122	struct ivopts_data *data)
2123	{
2124	aff_tree aff_top, aff_bot;
2125	tree_to_aff_combination_expand (top, TREE_TYPE (top), &aff_top,
2126	&data->name_expansion_cache);
2127	tree_to_aff_combination_expand (bot, TREE_TYPE (bot), &aff_bot,
2128	&data->name_expansion_cache);
2129
2130	poly_widest_int poly_mul;
2131	if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
2132	&& poly_mul.is_constant (const_value: mul))
2133	return true;
2134
2135	return false;
2136	}
2137
2138	/ Return true if memory reference REF with step STEP may be unaligned. /
2139
2140	static bool
2141	may_be_unaligned_p (tree ref, tree step)
2142	{
2143	/ TARGET_MEM_REFs are translated directly to valid MEMs on the target,*
2144	thus they are not misaligned. /*
2145	if (TREE_CODE (ref) == TARGET_MEM_REF)
2146	return false;
2147
2148	unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2149	if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2150	align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2151
2152	unsigned HOST_WIDE_INT bitpos;
2153	unsigned int ref_align;
2154	get_object_alignment_1 (ref, &ref_align, &bitpos);
2155	if (ref_align < align
2156	\|\| (bitpos % align) != `0`
2157	\|\| (bitpos % BITS_PER_UNIT) != `0`)
2158	return true;
2159
2160	unsigned int trailing_zeros = tree_ctz (step);
2161	if (trailing_zeros < HOST_BITS_PER_INT
2162	&& (`1U` << trailing_zeros) * BITS_PER_UNIT < align)
2163	return true;
2164
2165	return false;
2166	}
2167
2168	/ Return true if EXPR may be non-addressable. /
2169
2170	bool
2171	may_be_nonaddressable_p (tree expr)
2172	{
2173	switch (TREE_CODE (expr))
2174	{
2175	case VAR_DECL:
2176	/ Check if it's a register variable. /
2177	return DECL_HARD_REGISTER (expr);
2178
2179	case TARGET_MEM_REF:
2180	/ TARGET_MEM_REFs are translated directly to valid MEMs on the*
2181	target, thus they are always addressable. /*
2182	return false;
2183
2184	case MEM_REF:
2185	/ Likewise for MEM_REFs, modulo the storage order. /
2186	return REF_REVERSE_STORAGE_ORDER (expr);
2187
2188	case BIT_FIELD_REF:
2189	if (REF_REVERSE_STORAGE_ORDER (expr))
2190	return true;
2191	return may_be_nonaddressable_p (TREE_OPERAND (expr, `0`));
2192
2193	case COMPONENT_REF:
2194	if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, `0`))))
2195	return true;
2196	return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, `1`))
2197	\|\| may_be_nonaddressable_p (TREE_OPERAND (expr, `0`));
2198
2199	case ARRAY_REF:
2200	case ARRAY_RANGE_REF:
2201	if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, `0`))))
2202	return true;
2203	return may_be_nonaddressable_p (TREE_OPERAND (expr, `0`));
2204
2205	case VIEW_CONVERT_EXPR:
2206	/ This kind of view-conversions may wrap non-addressable objects*
2207	and make them look addressable. After some processing the
2208	non-addressability may be uncovered again, causing ADDR_EXPRs
2209	of inappropriate objects to be built. /*
2210	if (is_gimple_reg (TREE_OPERAND (expr, `0`))
2211	\|\| !is_gimple_addressable (TREE_OPERAND (expr, `0`)))
2212	return true;
2213	return may_be_nonaddressable_p (TREE_OPERAND (expr, `0`));
2214
2215	CASE_CONVERT:
2216	return true;
2217
2218	default:
2219	break;
2220	}
2221
2222	return false;
2223	}
2224
2225	/ Finds addresses in OP_P inside STMT. /*
2226
2227	static void
2228	find_interesting_uses_address (struct ivopts_data data, gimple stmt,
2229	tree *op_p)
2230	{
2231	tree base = *op_p, step = size_zero_node;
2232	struct iv *civ;
2233	struct ifs_ivopts_data ifs_ivopts_data;
2234
2235	/ Do not play with volatile memory references. A bit too conservative,*
2236	perhaps, but safe. /*
2237	if (gimple_has_volatile_ops (stmt))
2238	goto fail;
2239
2240	/ Ignore bitfields for now. Not really something terribly complicated*
2241	to handle. TODO. /*
2242	if (TREE_CODE (base) == BIT_FIELD_REF)
2243	goto fail;
2244
2245	base = unshare_expr (base);
2246
2247	if (TREE_CODE (base) == TARGET_MEM_REF)
2248	{
2249	tree type = build_pointer_type (TREE_TYPE (base));
2250	tree astep;
2251
2252	if (TMR_BASE (base)
2253	&& TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2254	{
2255	civ = get_iv (data, TMR_BASE (base));
2256	if (!civ)
2257	goto fail;
2258
2259	TMR_BASE (base) = civ->base;
2260	step = civ->step;
2261	}
2262	if (TMR_INDEX2 (base)
2263	&& TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2264	{
2265	civ = get_iv (data, TMR_INDEX2 (base));
2266	if (!civ)
2267	goto fail;
2268
2269	TMR_INDEX2 (base) = civ->base;
2270	step = civ->step;
2271	}
2272	if (TMR_INDEX (base)
2273	&& TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2274	{
2275	civ = get_iv (data, TMR_INDEX (base));
2276	if (!civ)
2277	goto fail;
2278
2279	TMR_INDEX (base) = civ->base;
2280	astep = civ->step;
2281
2282	if (astep)
2283	{
2284	if (TMR_STEP (base))
2285	astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2286
2287	step = fold_build2 (PLUS_EXPR, type, step, astep);
2288	}
2289	}
2290
2291	if (integer_zerop (step))
2292	goto fail;
2293	base = tree_mem_ref_addr (type, base);
2294	}
2295	else
2296	{
2297	ifs_ivopts_data.ivopts_data = data;
2298	ifs_ivopts_data.stmt = stmt;
2299	ifs_ivopts_data.step = size_zero_node;
2300	if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2301	\|\| integer_zerop (ifs_ivopts_data.step))
2302	goto fail;
2303	step = ifs_ivopts_data.step;
2304
2305	/ Check that the base expression is addressable. This needs*
2306	to be done after substituting bases of IVs into it. /*
2307	if (may_be_nonaddressable_p (expr: base))
2308	goto fail;
2309
2310	/ Moreover, on strict alignment platforms, check that it is*
2311	sufficiently aligned. /*
2312	if (STRICT_ALIGNMENT && may_be_unaligned_p (ref: base, step))
2313	goto fail;
2314
2315	base = build_fold_addr_expr (base);
2316
2317	/ Substituting bases of IVs into the base expression might*
2318	have caused folding opportunities. /*
2319	if (TREE_CODE (base) == ADDR_EXPR)
2320	{
2321	tree *ref = &TREE_OPERAND (base, `0`);
2322	while (handled_component_p (t: *ref))
2323	ref = &TREE_OPERAND (*ref, `0`);
2324	if (TREE_CODE (*ref) == MEM_REF)
2325	{
2326	tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2327	TREE_OPERAND (*ref, `0`),
2328	TREE_OPERAND (*ref, `1`));
2329	if (tem)
2330	*ref = tem;
2331	}
2332	}
2333	}
2334
2335	civ = alloc_iv (data, base, step);
2336	/ Fail if base object of this memory reference is unknown. /
2337	if (civ->base_object == NULL_TREE)
2338	goto fail;
2339
2340	record_group_use (data, use_p: op_p, iv: civ, stmt, type: USE_REF_ADDRESS, TREE_TYPE (*op_p));
2341	return;
2342
2343	fail:
2344	for_each_index (op_p, idx_record_use, data);
2345	}
2346
2347	/ Finds and records invariants used in STMT. /
2348
2349	static void
2350	find_invariants_stmt (struct ivopts_data data, gimple stmt)
2351	{
2352	ssa_op_iter iter;
2353	use_operand_p use_p;
2354	tree op;
2355
2356	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2357	{
2358	op = USE_FROM_PTR (use_p);
2359	record_invariant (data, op, nonlinear_use: false);
2360	}
2361	}
2362
2363	/ CALL calls an internal function. If operand OP_P will become an
2364	address when the call is expanded, return the type of the memory
2365	being addressed, otherwise return null. /*
2366
2367	static tree
2368	get_mem_type_for_internal_fn (gcall call, tree op_p)
2369	{
2370	switch (gimple_call_internal_fn (gs: call))
2371	{
2372	case IFN_MASK_LOAD:
2373	case IFN_MASK_LOAD_LANES:
2374	case IFN_MASK_LEN_LOAD_LANES:
2375	case IFN_LEN_LOAD:
2376	case IFN_MASK_LEN_LOAD:
2377	if (op_p == gimple_call_arg_ptr (gs: call, index: `0`))
2378	return TREE_TYPE (gimple_call_lhs (call));
2379	return NULL_TREE;
2380
2381	case IFN_MASK_STORE:
2382	case IFN_MASK_STORE_LANES:
2383	case IFN_MASK_LEN_STORE_LANES:
2384	case IFN_LEN_STORE:
2385	case IFN_MASK_LEN_STORE:
2386	{
2387	if (op_p == gimple_call_arg_ptr (gs: call, index: `0`))
2388	{
2389	internal_fn ifn = gimple_call_internal_fn (gs: call);
2390	int index = internal_fn_stored_value_index (ifn);
2391	return TREE_TYPE (gimple_call_arg (call, index));
2392	}
2393	return NULL_TREE;
2394	}
2395
2396	default:
2397	return NULL_TREE;
2398	}
2399	}
2400
2401	/ IV is a (non-address) iv that describes operand OP_P of STMT.
2402	Return true if the operand will become an address when STMT
2403	is expanded and record the associated address use if so. /*
2404
2405	static bool
2406	find_address_like_use (struct ivopts_data data, gimple stmt, tree *op_p,
2407	struct iv *iv)
2408	{
2409	/ Fail if base object of this memory reference is unknown. /
2410	if (iv->base_object == NULL_TREE)
2411	return false;
2412
2413	tree mem_type = NULL_TREE;
2414	if (gcall call = dyn_cast <gcall > (p: stmt))
2415	if (gimple_call_internal_p (gs: call))
2416	mem_type = get_mem_type_for_internal_fn (call, op_p);
2417	if (mem_type)
2418	{
2419	iv = alloc_iv (data, base: iv->base, step: iv->step);
2420	record_group_use (data, use_p: op_p, iv, stmt, type: USE_PTR_ADDRESS, mem_type);
2421	return true;
2422	}
2423	return false;
2424	}
2425
2426	/ Finds interesting uses of induction variables in the statement STMT. /
2427
2428	static void
2429	find_interesting_uses_stmt (struct ivopts_data data, gimple stmt)
2430	{
2431	struct iv *iv;
2432	tree op, lhs, rhs;
2433	ssa_op_iter iter;
2434	use_operand_p use_p;
2435	enum tree_code code;
2436
2437	find_invariants_stmt (data, stmt);
2438
2439	if (gimple_code (g: stmt) == GIMPLE_COND)
2440	{
2441	find_interesting_uses_cond (data, stmt);
2442	return;
2443	}
2444
2445	if (is_gimple_assign (gs: stmt))
2446	{
2447	lhs = gimple_assign_lhs_ptr (gs: stmt);
2448	rhs = gimple_assign_rhs1_ptr (gs: stmt);
2449
2450	if (TREE_CODE (*lhs) == SSA_NAME)
2451	{
2452	/ If the statement defines an induction variable, the uses are not*
2453	interesting by themselves. /*
2454
2455	iv = get_iv (data, var: *lhs);
2456
2457	if (iv && !integer_zerop (iv->step))
2458	return;
2459	}
2460
2461	code = gimple_assign_rhs_code (gs: stmt);
2462	if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2463	&& (REFERENCE_CLASS_P (*rhs)
2464	\|\| is_gimple_val (*rhs)))
2465	{
2466	if (REFERENCE_CLASS_P (*rhs))
2467	find_interesting_uses_address (data, stmt, op_p: rhs);
2468	else
2469	find_interesting_uses_op (data, op: *rhs);
2470
2471	if (REFERENCE_CLASS_P (*lhs))
2472	find_interesting_uses_address (data, stmt, op_p: lhs);
2473	return;
2474	}
2475	else if (TREE_CODE_CLASS (code) == tcc_comparison)
2476	{
2477	find_interesting_uses_cond (data, stmt);
2478	return;
2479	}
2480
2481	/ TODO -- we should also handle address uses of type*
2482
2483	memory = call (whatever);
2484
2485	and
2486
2487	call (memory). /*
2488	}
2489
2490	if (gimple_code (g: stmt) == GIMPLE_PHI
2491	&& gimple_bb (g: stmt) == data->current_loop->header)
2492	{
2493	iv = get_iv (data, PHI_RESULT (stmt));
2494
2495	if (iv && !integer_zerop (iv->step))
2496	return;
2497	}
2498
2499	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2500	{
2501	op = USE_FROM_PTR (use_p);
2502
2503	if (TREE_CODE (op) != SSA_NAME)
2504	continue;
2505
2506	iv = get_iv (data, var: op);
2507	if (!iv)
2508	continue;
2509
2510	if (!find_address_like_use (data, stmt, op_p: use_p->use, iv))
2511	find_interesting_uses_op (data, op);
2512	}
2513	}
2514
2515	/ Finds interesting uses of induction variables outside of loops*
2516	on loop exit edge EXIT. /*
2517
2518	static void
2519	find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2520	{
2521	gphi *phi;
2522	gphi_iterator psi;
2523	tree def;
2524
2525	for (psi = gsi_start_phis (exit->dest); !gsi_end_p (i: psi); gsi_next (i: &psi))
2526	{
2527	phi = psi.phi ();
2528	def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2529	if (!virtual_operand_p (op: def))
2530	find_interesting_uses_op (data, op: def);
2531	}
2532	}
2533
2534	/ Return TRUE if OFFSET is within the range of [base + offset] addressing*
2535	mode for memory reference represented by USE. /*
2536
2537	static GTY (()) vec<rtx, va_gc> *addr_list;
2538
2539	static bool
2540	addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2541	{
2542	rtx reg, addr;
2543	unsigned list_index;
2544	addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2545	machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2546
2547	list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2548	if (list_index >= vec_safe_length (v: addr_list))
2549	vec_safe_grow_cleared (v&: addr_list, len: list_index + MAX_MACHINE_MODE, exact: true);
2550
2551	addr = (*addr_list)[list_index];
2552	if (!addr)
2553	{
2554	addr_mode = targetm.addr_space.address_mode (as);
2555	reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + `1`);
2556	addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2557	(*addr_list)[list_index] = addr;
2558	}
2559	else
2560	addr_mode = GET_MODE (addr);
2561
2562	XEXP (addr, `1`) = gen_int_mode (offset, addr_mode);
2563	return (memory_address_addr_space_p (mem_mode, addr, as));
2564	}
2565
2566	/ Comparison function to sort group in ascending order of addr_offset. /
2567
2568	static int
2569	group_compare_offset (const void a, const* void *b)
2570	{
2571	const struct iv_use *const u1 = (const* struct iv_use *const *) a;
2572	const struct iv_use *const u2 = (const* struct iv_use *const *) b;
2573
2574	return compare_sizes_for_sort (a: (u1)->addr_offset, b: (u2)->addr_offset);
2575	}
2576
2577	/ Check if small groups should be split. Return true if no group*
2578	contains more than two uses with distinct addr_offsets. Return
2579	false otherwise. We want to split such groups because:
2580
2581	1) Small groups don't have much benefit and may interfer with
2582	general candidate selection.
2583	2) Size for problem with only small groups is usually small and
2584	general algorithm can handle it well.
2585
2586	TODO -- Above claim may not hold when we want to merge memory
2587	accesses with conseuctive addresses. /*
2588
2589	static bool
2590	split_small_address_groups_p (struct ivopts_data *data)
2591	{
2592	unsigned int i, j, distinct = `1`;
2593	struct iv_use *pre;
2594	struct iv_group *group;
2595
2596	for (i = `0`; i < data->vgroups.length (); i++)
2597	{
2598	group = data->vgroups [i];
2599	if (group->vuses.length () == `1`)
2600	continue;
2601
2602	gcc_assert (address_p (group->type));
2603	if (group->vuses.length () == `2`)
2604	{
2605	if (compare_sizes_for_sort (a: group->vuses [`0`]->addr_offset,
2606	b: group->vuses [`1`]->addr_offset) > `0`)
2607	std::swap (a&: group->vuses [`0`], b&: group->vuses [`1`]);
2608	}
2609	else
2610	group->vuses.qsort (group_compare_offset);
2611
2612	if (distinct > `2`)
2613	continue;
2614
2615	distinct = `1`;
2616	for (pre = group->vuses [`0`], j = `1`; j < group->vuses.length (); j++)
2617	{
2618	if (maybe_ne (a: group->vuses [j]->addr_offset, b: pre->addr_offset))
2619	{
2620	pre = group->vuses [j];
2621	distinct++;
2622	}
2623
2624	if (distinct > `2`)
2625	break;
2626	}
2627	}
2628
2629	return (distinct <= `2`);
2630	}
2631
2632	/ For each group of address type uses, this function further groups*
2633	these uses according to the maximum offset supported by target's
2634	[base + offset] addressing mode. /*
2635
2636	static void
2637	split_address_groups (struct ivopts_data *data)
2638	{
2639	unsigned int i, j;
2640	/ Always split group. /
2641	bool split_p = split_small_address_groups_p (data);
2642
2643	for (i = `0`; i < data->vgroups.length (); i++)
2644	{
2645	struct iv_group *new_group = NULL;
2646	struct iv_group *group = data->vgroups [i];
2647	struct iv_use *use = group->vuses [`0`];
2648
2649	use->id = `0`;
2650	use->group_id = group->id;
2651	if (group->vuses.length () == `1`)
2652	continue;
2653
2654	gcc_assert (address_p (use->type));
2655
2656	for (j = `1`; j < group->vuses.length ();)
2657	{
2658	struct iv_use *next = group->vuses [j];
2659	poly_int64 offset = next->addr_offset - use->addr_offset;
2660
2661	/ Split group if aksed to, or the offset against the first*
2662	use can't fit in offset part of addressing mode. IV uses
2663	having the same offset are still kept in one group. /*
2664	if (maybe_ne (a: offset, b: `0`)
2665	&& (split_p \|\| !addr_offset_valid_p (use, offset)))
2666	{
2667	if (!new_group)
2668	new_group = record_group (data, type: group->type);
2669	group->vuses.ordered_remove (ix: j);
2670	new_group->vuses.safe_push (obj: next);
2671	continue;
2672	}
2673
2674	next->id = j;
2675	next->group_id = group->id;
2676	j++;
2677	}
2678	}
2679	}
2680
2681	/ Finds uses of the induction variables that are interesting. /
2682
2683	static void
2684	find_interesting_uses (struct ivopts_data data, basic_block body)
2685	{
2686	basic_block bb;
2687	gimple_stmt_iterator bsi;
2688	unsigned i;
2689	edge e;
2690
2691	for (i = `0`; i < data->current_loop->num_nodes; i++)
2692	{
2693	edge_iterator ei;
2694	bb = body[i];
2695
2696	FOR_EACH_EDGE (e, ei, bb->succs)
2697	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2698	&& !flow_bb_inside_loop_p (data->current_loop, e->dest))
2699	find_interesting_uses_outside (data, exit: e);
2700
2701	for (bsi = gsi_start_phis (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
2702	find_interesting_uses_stmt (data, stmt: gsi_stmt (i: bsi));
2703	for (bsi = gsi_start_bb (bb); !gsi_end_p (i: bsi); gsi_next (i: &bsi))
2704	if (!is_gimple_debug (gs: gsi_stmt (i: bsi)))
2705	find_interesting_uses_stmt (data, stmt: gsi_stmt (i: bsi));
2706	}
2707
2708	split_address_groups (data);
2709
2710	if (dump_file && (dump_flags & TDF_DETAILS))
2711	{
2712	fprintf (stream: dump_file, format: "\n<IV Groups>:\n");
2713	dump_groups (file: dump_file, data);
2714	fprintf (stream: dump_file, format: "\n");
2715	}
2716	}
2717
2718	/ Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR*
2719	is true, assume we are inside an address. If TOP_COMPREF is true, assume
2720	we are at the top-level of the processed address. /*
2721
2722	static tree
2723	strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2724	poly_int64 *offset)
2725	{
2726	tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2727	enum tree_code code;
2728	tree type, orig_type = TREE_TYPE (expr);
2729	poly_int64 off0, off1;
2730	HOST_WIDE_INT st;
2731	tree orig_expr = expr;
2732
2733	STRIP_NOPS (expr);
2734
2735	type = TREE_TYPE (expr);
2736	code = TREE_CODE (expr);
2737	*offset = `0`;
2738
2739	switch (code)
2740	{
2741	case POINTER_PLUS_EXPR:
2742	case PLUS_EXPR:
2743	case MINUS_EXPR:
2744	op0 = TREE_OPERAND (expr, `0`);
2745	op1 = TREE_OPERAND (expr, `1`);
2746
2747	op0 = strip_offset_1 (expr: op0, inside_addr: false, top_compref: false, offset: &off0);
2748	op1 = strip_offset_1 (expr: op1, inside_addr: false, top_compref: false, offset: &off1);
2749
2750	*offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2751	if (op0 == TREE_OPERAND (expr, `0`)
2752	&& op1 == TREE_OPERAND (expr, `1`))
2753	return orig_expr;
2754
2755	if (integer_zerop (op1))
2756	expr = op0;
2757	else if (integer_zerop (op0))
2758	{
2759	if (code == MINUS_EXPR)
2760	{
2761	if (TYPE_OVERFLOW_UNDEFINED (type))
2762	{
2763	type = unsigned_type_for (type);
2764	op1 = fold_convert (type, op1);
2765	}
2766	expr = fold_build1 (NEGATE_EXPR, type, op1);
2767	}
2768	else
2769	expr = op1;
2770	}
2771	else
2772	{
2773	if (TYPE_OVERFLOW_UNDEFINED (type))
2774	{
2775	type = unsigned_type_for (type);
2776	if (code == POINTER_PLUS_EXPR)
2777	code = PLUS_EXPR;
2778	op0 = fold_convert (type, op0);
2779	op1 = fold_convert (type, op1);
2780	}
2781	expr = fold_build2 (code, type, op0, op1);
2782	}
2783
2784	return fold_convert (orig_type, expr);
2785
2786	case MULT_EXPR:
2787	op1 = TREE_OPERAND (expr, `1`);
2788	if (!cst_and_fits_in_hwi (op1))
2789	return orig_expr;
2790
2791	op0 = TREE_OPERAND (expr, `0`);
2792	op0 = strip_offset_1 (expr: op0, inside_addr: false, top_compref: false, offset: &off0);
2793	if (op0 == TREE_OPERAND (expr, `0`))
2794	return orig_expr;
2795
2796	offset = off0 int_cst_value (op1);
2797	if (integer_zerop (op0))
2798	expr = op0;
2799	else
2800	{
2801	if (TYPE_OVERFLOW_UNDEFINED (type))
2802	{
2803	type = unsigned_type_for (type);
2804	op0 = fold_convert (type, op0);
2805	op1 = fold_convert (type, op1);
2806	}
2807	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2808	}
2809
2810	return fold_convert (orig_type, expr);
2811
2812	case ARRAY_REF:
2813	case ARRAY_RANGE_REF:
2814	if (!inside_addr)
2815	return orig_expr;
2816
2817	step = array_ref_element_size (expr);
2818	if (!cst_and_fits_in_hwi (step))
2819	break;
2820
2821	st = int_cst_value (step);
2822	op1 = TREE_OPERAND (expr, `1`);
2823	op1 = strip_offset_1 (expr: op1, inside_addr: false, top_compref: false, offset: &off1);
2824	offset = off1 st;
2825
2826	if (top_compref
2827	&& integer_zerop (op1))
2828	{
2829	/ Strip the component reference completely. /
2830	op0 = TREE_OPERAND (expr, `0`);
2831	op0 = strip_offset_1 (expr: op0, inside_addr, top_compref, offset: &off0);
2832	*offset += off0;
2833	return op0;
2834	}
2835	break;
2836
2837	case COMPONENT_REF:
2838	{
2839	tree field;
2840
2841	if (!inside_addr)
2842	return orig_expr;
2843
2844	tmp = component_ref_field_offset (expr);
2845	field = TREE_OPERAND (expr, `1`);
2846	if (top_compref
2847	&& cst_and_fits_in_hwi (tmp)
2848	&& cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2849	{
2850	HOST_WIDE_INT boffset, abs_off;
2851
2852	/ Strip the component reference completely. /
2853	op0 = TREE_OPERAND (expr, `0`);
2854	op0 = strip_offset_1 (expr: op0, inside_addr, top_compref, offset: &off0);
2855	boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2856	abs_off = abs_hwi (x: boffset) / BITS_PER_UNIT;
2857	if (boffset < `0`)
2858	abs_off = -abs_off;
2859
2860	*offset = off0 + int_cst_value (tmp) + abs_off;
2861	return op0;
2862	}
2863	}
2864	break;
2865
2866	case ADDR_EXPR:
2867	op0 = TREE_OPERAND (expr, `0`);
2868	op0 = strip_offset_1 (expr: op0, inside_addr: true, top_compref: true, offset: &off0);
2869	*offset += off0;
2870
2871	if (op0 == TREE_OPERAND (expr, `0`))
2872	return orig_expr;
2873
2874	expr = build_fold_addr_expr (op0);
2875	return fold_convert (orig_type, expr);
2876
2877	case MEM_REF:
2878	/ ??? Offset operand? /
2879	inside_addr = false;
2880	break;
2881
2882	default:
2883	if (ptrdiff_tree_p (expr, offset) && maybe_ne (a: *offset, b: `0`))
2884	return build_int_cst (orig_type, `0`);
2885	return orig_expr;
2886	}
2887
2888	/ Default handling of expressions for that we want to recurse into*
2889	the first operand. /*
2890	op0 = TREE_OPERAND (expr, `0`);
2891	op0 = strip_offset_1 (expr: op0, inside_addr, top_compref: false, offset: &off0);
2892	*offset += off0;
2893
2894	if (op0 == TREE_OPERAND (expr, `0`)
2895	&& (!op1 \|\| op1 == TREE_OPERAND (expr, `1`)))
2896	return orig_expr;
2897
2898	expr = copy_node (expr);
2899	TREE_OPERAND (expr, `0`) = op0;
2900	if (op1)
2901	TREE_OPERAND (expr, `1`) = op1;
2902
2903	/ Inside address, we might strip the top level component references,*
2904	thus changing type of the expression. Handling of ADDR_EXPR
2905	will fix that. /*
2906	expr = fold_convert (orig_type, expr);
2907
2908	return expr;
2909	}
2910
2911	/ Strips constant offsets from EXPR and stores them to OFFSET. /
2912
2913	static tree
2914	strip_offset (tree expr, poly_uint64 *offset)
2915	{
2916	poly_int64 off;
2917	tree core = strip_offset_1 (expr, inside_addr: false, top_compref: false, offset: &off);
2918	*offset = off;
2919	return core;
2920	}
2921
2922	/ Returns variant of TYPE that can be used as base for different uses.*
2923	We return unsigned type with the same precision, which avoids problems
2924	with overflows. /*
2925
2926	static tree
2927	generic_type_for (tree type)
2928	{
2929	if (POINTER_TYPE_P (type))
2930	return unsigned_type_for (type);
2931
2932	if (TYPE_UNSIGNED (type))
2933	return type;
2934
2935	return unsigned_type_for (type);
2936	}
2937
2938	/ Private data for walk_tree. /
2939
2940	struct walk_tree_data
2941	{
2942	bitmap *inv_vars;
2943	struct ivopts_data *idata;
2944	};
2945
2946	/ Callback function for walk_tree, it records invariants and symbol*
2947	reference in EXPR_P. DATA is the structure storing result info. /
2948
2949	static tree
2950	find_inv_vars_cb (tree expr_p, int* ws ATTRIBUTE_UNUSED, void* *data)
2951	{
2952	tree op = *expr_p;
2953	struct version_info *info;
2954	struct walk_tree_data wdata = (struct* walk_tree_data*) data;
2955
2956	if (TREE_CODE (op) != SSA_NAME)
2957	return NULL_TREE;
2958
2959	info = name_info (data: wdata->idata, name: op);
2960	/ Because we expand simple operations when finding IVs, loop invariant*
2961	variable that isn't referred by the original loop could be used now.
2962	Record such invariant variables here. /*
2963	if (!info->iv)
2964	{
2965	struct ivopts_data *idata = wdata->idata;
2966	basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2967
2968	if (!bb \|\| !flow_bb_inside_loop_p (idata->current_loop, bb))
2969	{
2970	tree steptype = TREE_TYPE (op);
2971	if (POINTER_TYPE_P (steptype))
2972	steptype = sizetype;
2973	set_iv (data: idata, iv: op, base: op, step: build_int_cst (steptype, `0`), no_overflow: true);
2974	record_invariant (data: idata, op, nonlinear_use: false);
2975	}
2976	}
2977	if (!info->inv_id \|\| info->has_nonlin_use)
2978	return NULL_TREE;
2979
2980	if (!*wdata->inv_vars)
2981	*wdata->inv_vars = BITMAP_ALLOC (NULL);
2982	bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2983
2984	return NULL_TREE;
2985	}
2986
2987	/ Records invariants in EXPR_P. INV_VARS is the bitmap to that we should
2988	store it. /*
2989
2990	static inline void
2991	find_inv_vars (struct ivopts_data data, tree expr_p, bitmap *inv_vars)
2992	{
2993	struct walk_tree_data wdata;
2994
2995	if (!inv_vars)
2996	return;
2997
2998	wdata.idata = data;
2999	wdata.inv_vars = inv_vars;
3000	walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3001	}
3002
3003	/ Get entry from invariant expr hash table for INV_EXPR. New entry*
3004	will be recorded if it doesn't exist yet. Given below two exprs:
3005	inv_expr + cst1, inv_expr + cst2
3006	It's hard to make decision whether constant part should be stripped
3007	or not. We choose to not strip based on below facts:
3008	1) We need to count ADD cost for constant part if it's stripped,
3009	which isn't always trivial where this functions is called.
3010	2) Stripping constant away may be conflict with following loop
3011	invariant hoisting pass.
3012	3) Not stripping constant away results in more invariant exprs,
3013	which usually leads to decision preferring lower reg pressure. /*
3014
3015	static iv_inv_expr_ent *
3016	get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3017	{
3018	STRIP_NOPS (inv_expr);
3019
3020	if (poly_int_tree_p (t: inv_expr)
3021	\|\| TREE_CODE (inv_expr) == SSA_NAME)
3022	return NULL;
3023
3024	/ Don't strip constant part away as we used to. /
3025
3026	/ Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. /
3027	struct iv_inv_expr_ent ent;
3028	ent.expr = inv_expr;
3029	ent.hash = iterative_hash_expr (tree: inv_expr, seed: `0`);
3030	struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (value: &ent, insert: INSERT);
3031
3032	if (!*slot)
3033	{
3034	slot = XNEW (struct* iv_inv_expr_ent);
3035	(*slot)->expr = inv_expr;
3036	(*slot)->hash = ent.hash;
3037	(*slot)->id = ++data->max_inv_expr_id;
3038	}
3039
3040	return *slot;
3041	}
3042
3043
3044	/ Return TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3045	unsuitable as ivopts candidates for potentially involving undefined
3046	behavior. /*
3047
3048	static tree
3049	find_ssa_undef (tree tp, int* walk_subtrees, void* *bb_)
3050	{
3051	basic_block bb = (basic_block) bb_;
3052	if (TREE_CODE (*tp) == SSA_NAME
3053	&& ssa_name_maybe_undef_p (var: *tp)
3054	&& !ssa_name_any_use_dominates_bb_p (var: *tp, bb))
3055	return *tp;
3056	if (!EXPR_P (*tp))
3057	*walk_subtrees = `0`;
3058	return NULL;
3059	}
3060
3061	/ Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and*
3062	position to POS. If USE is not NULL, the candidate is set as related to
3063	it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3064	replacement of the final value of the iv by a direct computation. /*
3065
3066	static struct iv_cand *
3067	add_candidate_1 (struct ivopts_data data, tree base, tree step, bool* important,
3068	enum iv_position pos, struct iv_use *use,
3069	gimple incremented_at, struct* iv *orig_iv = NULL,
3070	bool doloop = false)
3071	{
3072	unsigned i;
3073	struct iv_cand *cand = NULL;
3074	tree type, orig_type;
3075
3076	gcc_assert (base && step);
3077
3078	/ -fkeep-gc-roots-live means that we have to keep a real pointer*
3079	live, but the ivopts code may replace a real pointer with one
3080	pointing before or after the memory block that is then adjusted
3081	into the memory block during the loop. FIXME: It would likely be
3082	better to actually force the pointer live and still use ivopts;
3083	for example, it would be enough to write the pointer into memory
3084	and keep it there until after the loop. /*
3085	if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3086	return NULL;
3087
3088	/ If BASE contains undefined SSA names make sure we only record*
3089	the original IV. /*
3090	bool involves_undefs = false;
3091	if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3092	{
3093	if (pos != IP_ORIGINAL)
3094	return NULL;
3095	important = false;
3096	involves_undefs = true;
3097	}
3098
3099	/ For non-original variables, make sure their values are computed in a type*
3100	that does not invoke undefined behavior on overflows (since in general,
3101	we cannot prove that these induction variables are non-wrapping). /*
3102	if (pos != IP_ORIGINAL)
3103	{
3104	orig_type = TREE_TYPE (base);
3105	type = generic_type_for (type: orig_type);
3106	if (type != orig_type)
3107	{
3108	base = fold_convert (type, base);
3109	step = fold_convert (type, step);
3110	}
3111	}
3112
3113	for (i = `0`; i < data->vcands.length (); i++)
3114	{
3115	cand = data->vcands [i];
3116
3117	if (cand->pos != pos)
3118	continue;
3119
3120	if (cand->incremented_at != incremented_at
3121	\|\| ((pos == IP_AFTER_USE \|\| pos == IP_BEFORE_USE)
3122	&& cand->ainc_use != use))
3123	continue;
3124
3125	if (operand_equal_p (base, cand->iv->base, flags: `0`)
3126	&& operand_equal_p (step, cand->iv->step, flags: `0`)
3127	&& (TYPE_PRECISION (TREE_TYPE (base))
3128	== TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3129	break;
3130	}
3131
3132	if (i == data->vcands.length ())
3133	{
3134	cand = XCNEW (struct iv_cand);
3135	cand->id = i;
3136	cand->iv = alloc_iv (data, base, step);
3137	cand->pos = pos;
3138	if (pos != IP_ORIGINAL)
3139	{
3140	if (doloop)
3141	cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3142	else
3143	cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3144	cand->var_after = cand->var_before;
3145	}
3146	cand->important = important;
3147	cand->involves_undefs = involves_undefs;
3148	cand->incremented_at = incremented_at;
3149	cand->doloop_p = doloop;
3150	data->vcands.safe_push (obj: cand);
3151
3152	if (!poly_int_tree_p (t: step))
3153	{
3154	find_inv_vars (data, expr_p: &step, inv_vars: &cand->inv_vars);
3155
3156	iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, inv_expr: step);
3157	/ Share bitmap between inv_vars and inv_exprs for cand. /
3158	if (inv_expr != NULL)
3159	{
3160	cand->inv_exprs = cand->inv_vars;
3161	cand->inv_vars = NULL;
3162	if (cand->inv_exprs)
3163	bitmap_clear (cand->inv_exprs);
3164	else
3165	cand->inv_exprs = BITMAP_ALLOC (NULL);
3166
3167	bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168	}
3169	}
3170
3171	if (pos == IP_AFTER_USE \|\| pos == IP_BEFORE_USE)
3172	cand->ainc_use = use;
3173	else
3174	cand->ainc_use = NULL;
3175
3176	cand->orig_iv = orig_iv;
3177	if (dump_file && (dump_flags & TDF_DETAILS))
3178	dump_cand (file: dump_file, cand);
3179	}
3180
3181	cand->important \|= important;
3182	cand->doloop_p \|= doloop;
3183
3184	/ Relate candidate to the group for which it is added. /
3185	if (use)
3186	bitmap_set_bit (data->vgroups [use->group_id]->related_cands, i);
3187
3188	return cand;
3189	}
3190
3191	/ Returns true if incrementing the induction variable at the end of the LOOP*
3192	is allowed.
3193
3194	The purpose is to avoid splitting latch edge with a biv increment, thus
3195	creating a jump, possibly confusing other optimization passes and leaving
3196	less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197	available (so we do not have a better alternative), or if the latch edge
3198	is already nonempty. /*
3199
3200	static bool
3201	allow_ip_end_pos_p (class loop *loop)
3202	{
3203	/ Do not allow IP_END when creating the IV would need to split the*
3204	latch edge as that makes all IP_NORMAL invalid. /*
3205	auto pos = gsi_last_bb (bb: ip_end_pos (loop));
3206	if (!gsi_end_p (i: pos) && stmt_ends_bb_p (*pos))
3207	return false;
3208
3209	if (!ip_normal_pos (loop))
3210	return true;
3211
3212	if (!empty_block_p (ip_end_pos (loop)))
3213	return true;
3214
3215	return false;
3216	}
3217
3218	/ If possible, adds autoincrement candidates BASE + STEP * i based on use USE.*
3219	Important field is set to IMPORTANT. /*
3220
3221	static void
3222	add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3223	bool important, struct iv_use *use)
3224	{
3225	basic_block use_bb = gimple_bb (g: use->stmt);
3226	machine_mode mem_mode;
3227	unsigned HOST_WIDE_INT cstepi;
3228
3229	/ If we insert the increment in any position other than the standard*
3230	ones, we must ensure that it is incremented once per iteration.
3231	It must not be in an inner nested loop, or one side of an if
3232	statement. /*
3233	if (use_bb->loop_father != data->current_loop
3234	\|\| !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3235	\|\| stmt_can_throw_internal (cfun, use->stmt)
3236	\|\| !cst_and_fits_in_hwi (step))
3237	return;
3238
3239	cstepi = int_cst_value (step);
3240
3241	mem_mode = TYPE_MODE (use->mem_type);
3242	if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3243	\|\| USE_STORE_PRE_INCREMENT (mem_mode))
3244	&& known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3245	\|\| ((USE_LOAD_PRE_DECREMENT (mem_mode)
3246	\|\| USE_STORE_PRE_DECREMENT (mem_mode))
3247	&& known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3248	{
3249	enum tree_code code = MINUS_EXPR;
3250	tree new_base;
3251	tree new_step = step;
3252
3253	if (POINTER_TYPE_P (TREE_TYPE (base)))
3254	{
3255	new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3256	code = POINTER_PLUS_EXPR;
3257	}
3258	else
3259	new_step = fold_convert (TREE_TYPE (base), new_step);
3260	new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3261	add_candidate_1 (data, base: new_base, step, important, pos: IP_BEFORE_USE, use,
3262	incremented_at: use->stmt);
3263	}
3264	if (((USE_LOAD_POST_INCREMENT (mem_mode)
3265	\|\| USE_STORE_POST_INCREMENT (mem_mode))
3266	&& known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3267	\|\| ((USE_LOAD_POST_DECREMENT (mem_mode)
3268	\|\| USE_STORE_POST_DECREMENT (mem_mode))
3269	&& known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3270	{
3271	add_candidate_1 (data, base, step, important, pos: IP_AFTER_USE, use,
3272	incremented_at: use->stmt);
3273	}
3274	}
3275
3276	/ Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and*
3277	position to POS. If USE is not NULL, the candidate is set as related to
3278	it. The candidate computation is scheduled before exit condition and at
3279	the end of loop. /*
3280
3281	static void
3282	add_candidate (struct ivopts_data data, tree base, tree step, bool* important,
3283	struct iv_use use, struct* iv *orig_iv = NULL,
3284	bool doloop = false)
3285	{
3286	if (ip_normal_pos (data->current_loop))
3287	add_candidate_1 (data, base, step, important, pos: IP_NORMAL, use, NULL, orig_iv,
3288	doloop);
3289	/ Exclude doloop candidate here since it requires decrement then comparison*
3290	and jump, the IP_END position doesn't match. /*
3291	if (!doloop && ip_end_pos (data->current_loop)
3292	&& allow_ip_end_pos_p (loop: data->current_loop))
3293	add_candidate_1 (data, base, step, important, pos: IP_END, use, NULL, orig_iv);
3294	}
3295
3296	/ Adds standard iv candidates. /
3297
3298	static void
3299	add_standard_iv_candidates (struct ivopts_data *data)
3300	{
3301	add_candidate (data, integer_zero_node, integer_one_node, important: true, NULL);
3302
3303	/ The same for a double-integer type if it is still fast enough. /
3304	if (TYPE_PRECISION
3305	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3306	&& TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3307	add_candidate (data, base: build_int_cst (long_integer_type_node, `0`),
3308	step: build_int_cst (long_integer_type_node, `1`), important: true, NULL);
3309
3310	/ The same for a double-integer type if it is still fast enough. /
3311	if (TYPE_PRECISION
3312	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3313	&& TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3314	add_candidate (data, base: build_int_cst (long_long_integer_type_node, `0`),
3315	step: build_int_cst (long_long_integer_type_node, `1`), important: true, NULL);
3316	}
3317
3318
3319	/ Adds candidates bases on the old induction variable IV. /
3320
3321	static void
3322	add_iv_candidate_for_biv (struct ivopts_data data, struct* iv *iv)
3323	{
3324	gimple *phi;
3325	tree def;
3326	struct iv_cand *cand;
3327
3328	/ Check if this biv is used in address type use. /
3329	if (iv->no_overflow && iv->have_address_use
3330	&& INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3331	&& TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3332	{
3333	tree base = fold_convert (sizetype, iv->base);
3334	tree step = fold_convert (sizetype, iv->step);
3335
3336	/ Add iv cand of same precision as index part in TARGET_MEM_REF. /
3337	add_candidate (data, base, step, important: true, NULL, orig_iv: iv);
3338	/ Add iv cand of the original type only if it has nonlinear use. /
3339	if (iv->nonlin_use)
3340	add_candidate (data, base: iv->base, step: iv->step, important: true, NULL);
3341	}
3342	else
3343	add_candidate (data, base: iv->base, step: iv->step, important: true, NULL);
3344
3345	/ The same, but with initial value zero. /
3346	if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3347	add_candidate (data, size_int (`0`), step: iv->step, important: true, NULL);
3348	else
3349	add_candidate (data, base: build_int_cst (TREE_TYPE (iv->base), `0`),
3350	step: iv->step, important: true, NULL);
3351
3352	phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3353	if (gimple_code (g: phi) == GIMPLE_PHI)
3354	{
3355	/ Additionally record the possibility of leaving the original iv*
3356	untouched. /*
3357	def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3358	/ Don't add candidate if it's from another PHI node because*
3359	it's an affine iv appearing in the form of PEELED_CHREC. /*
3360	phi = SSA_NAME_DEF_STMT (def);
3361	if (gimple_code (g: phi) != GIMPLE_PHI)
3362	{
3363	cand = add_candidate_1 (data,
3364	base: iv->base, step: iv->step, important: true, pos: IP_ORIGINAL, NULL,
3365	SSA_NAME_DEF_STMT (def));
3366	if (cand)
3367	{
3368	cand->var_before = iv->ssa_name;
3369	cand->var_after = def;
3370	}
3371	}
3372	else
3373	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3374	}
3375	}
3376
3377	/ Adds candidates based on the old induction variables. /
3378
3379	static void
3380	add_iv_candidate_for_bivs (struct ivopts_data *data)
3381	{
3382	unsigned i;
3383	struct iv *iv;
3384	bitmap_iterator bi;
3385
3386	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
3387	{
3388	iv = ver_info (data, ver: i)->iv;
3389	if (iv && iv->biv_p && !integer_zerop (iv->step))
3390	add_iv_candidate_for_biv (data, iv);
3391	}
3392	}
3393
3394	/ Record common candidate {BASE, STEP} derived from USE in hashtable. /
3395
3396	static void
3397	record_common_cand (struct ivopts_data *data, tree base,
3398	tree step, struct iv_use *use)
3399	{
3400	class iv_common_cand ent;
3401	class iv_common_cand **slot;
3402
3403	ent.base = base;
3404	ent.step = step;
3405	ent.hash = iterative_hash_expr (tree: base, seed: `0`);
3406	ent.hash = iterative_hash_expr (tree: step, seed: ent.hash);
3407
3408	slot = data->iv_common_cand_tab->find_slot (value: &ent, insert: INSERT);
3409	if (*slot == NULL)
3410	{
3411	slot = new* iv_common_cand ();
3412	(*slot)->base = base;
3413	(*slot)->step = step;
3414	(*slot)->uses.create (nelems: `8`);
3415	(*slot)->hash = ent.hash;
3416	data->iv_common_cands.safe_push (obj: (*slot));
3417	}
3418
3419	gcc_assert (use != NULL);
3420	(*slot)->uses.safe_push (obj: use);
3421	return;
3422	}
3423
3424	/ Comparison function used to sort common candidates. /
3425
3426	static int
3427	common_cand_cmp (const void p1, const* void *p2)
3428	{
3429	unsigned n1, n2;
3430	const class iv_common_cand *const *const ccand1
3431	= (const class iv_common_cand *const *)p1;
3432	const class iv_common_cand *const *const ccand2
3433	= (const class iv_common_cand *const *)p2;
3434
3435	n1 = (*ccand1)->uses.length ();
3436	n2 = (*ccand2)->uses.length ();
3437	return n2 - n1;
3438	}
3439
3440	/ Adds IV candidates based on common candidated recorded. /
3441
3442	static void
3443	add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3444	{
3445	unsigned i, j;
3446	struct iv_cand cand_1, cand_2;
3447
3448	data->iv_common_cands.qsort (common_cand_cmp);
3449	for (i = `0`; i < data->iv_common_cands.length (); i++)
3450	{
3451	class iv_common_cand *ptr = data->iv_common_cands [i];
3452
3453	/ Only add IV candidate if it's derived from multiple uses. /
3454	if (ptr->uses.length () <= `1`)
3455	break;
3456
3457	cand_1 = NULL;
3458	cand_2 = NULL;
3459	if (ip_normal_pos (data->current_loop))
3460	cand_1 = add_candidate_1 (data, base: ptr->base, step: ptr->step,
3461	important: false, pos: IP_NORMAL, NULL, NULL);
3462
3463	if (ip_end_pos (data->current_loop)
3464	&& allow_ip_end_pos_p (loop: data->current_loop))
3465	cand_2 = add_candidate_1 (data, base: ptr->base, step: ptr->step,
3466	important: false, pos: IP_END, NULL, NULL);
3467
3468	/ Bind deriving uses and the new candidates. /
3469	for (j = `0`; j < ptr->uses.length (); j++)
3470	{
3471	struct iv_group *group = data->vgroups [ptr->uses [j]->group_id];
3472	if (cand_1)
3473	bitmap_set_bit (group->related_cands, cand_1->id);
3474	if (cand_2)
3475	bitmap_set_bit (group->related_cands, cand_2->id);
3476	}
3477	}
3478
3479	/ Release data since it is useless from this point. /
3480	data->iv_common_cand_tab->empty ();
3481	data->iv_common_cands.truncate (size: `0`);
3482	}
3483
3484	/ Adds candidates based on the value of USE's iv. /
3485
3486	static void
3487	add_iv_candidate_for_use (struct ivopts_data data, struct* iv_use *use)
3488	{
3489	poly_uint64 offset;
3490	tree base;
3491	struct iv *iv = use->iv;
3492	tree basetype = TREE_TYPE (iv->base);
3493
3494	/ Don't add candidate for iv_use with non integer, pointer or non-mode*
3495	precision types, instead, add candidate for the corresponding scev in
3496	unsigned type with the same precision. See PR93674 for more info. /*
3497	if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3498	\|\| !type_has_mode_precision_p (t: basetype))
3499	{
3500	basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3501	TYPE_UNSIGNED (basetype));
3502	add_candidate (data, fold_convert (basetype, iv->base),
3503	fold_convert (basetype, iv->step), important: false, NULL);
3504	return;
3505	}
3506
3507	add_candidate (data, base: iv->base, step: iv->step, important: false, use);
3508
3509	/ Record common candidate for use in case it can be shared by others. /
3510	record_common_cand (data, base: iv->base, step: iv->step, use);
3511
3512	/ Record common candidate with initial value zero. /
3513	basetype = TREE_TYPE (iv->base);
3514	if (POINTER_TYPE_P (basetype))
3515	basetype = sizetype;
3516	record_common_cand (data, base: build_int_cst (basetype, `0`), step: iv->step, use);
3517
3518	/ Compare the cost of an address with an unscaled index with the cost of*
3519	an address with a scaled index and add candidate if useful. /*
3520	poly_int64 step;
3521	if (use != NULL
3522	&& poly_int_tree_p (t: iv->step, value: &step)
3523	&& address_p (type: use->type))
3524	{
3525	poly_int64 new_step;
3526	unsigned int fact = preferred_mem_scale_factor
3527	(base: use->iv->base,
3528	TYPE_MODE (use->mem_type),
3529	speed: optimize_loop_for_speed_p (data->current_loop));
3530
3531	if (fact != `1`
3532	&& multiple_p (a: step, b: fact, multiple: &new_step))
3533	add_candidate (data, size_int (`0`),
3534	step: wide_int_to_tree (sizetype, cst: new_step),
3535	important: true, NULL);
3536	}
3537
3538	/ Record common candidate with constant offset stripped in base.*
3539	Like the use itself, we also add candidate directly for it. /*
3540	base = strip_offset (expr: iv->base, offset: &offset);
3541	if (maybe_ne (a: offset, b: `0U`) \|\| base != iv->base)
3542	{
3543	record_common_cand (data, base, step: iv->step, use);
3544	add_candidate (data, base, step: iv->step, important: false, use);
3545	}
3546
3547	/ Record common candidate with base_object removed in base. /
3548	base = iv->base;
3549	STRIP_NOPS (base);
3550	if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3551	{
3552	tree step = iv->step;
3553
3554	STRIP_NOPS (step);
3555	base = TREE_OPERAND (base, `1`);
3556	step = fold_convert (sizetype, step);
3557	record_common_cand (data, base, step, use);
3558	/ Also record common candidate with offset stripped. /
3559	tree alt_base, alt_offset;
3560	split_constant_offset (base, &alt_base, &alt_offset);
3561	if (!integer_zerop (alt_offset))
3562	record_common_cand (data, base: alt_base, step, use);
3563	}
3564
3565	/ At last, add auto-incremental candidates. Make such variables*
3566	important since other iv uses with same base object may be based
3567	on it. /*
3568	if (use != NULL && address_p (type: use->type))
3569	add_autoinc_candidates (data, base: iv->base, step: iv->step, important: true, use);
3570	}
3571
3572	/ Adds candidates based on the uses. /
3573
3574	static void
3575	add_iv_candidate_for_groups (struct ivopts_data *data)
3576	{
3577	unsigned i;
3578
3579	/ Only add candidate for the first use in group. /
3580	for (i = `0`; i < data->vgroups.length (); i++)
3581	{
3582	struct iv_group *group = data->vgroups [i];
3583
3584	gcc_assert (group->vuses[`0`] != NULL);
3585	add_iv_candidate_for_use (data, use: group->vuses [`0`]);
3586	}
3587	add_iv_candidate_derived_from_uses (data);
3588	}
3589
3590	/ Record important candidates and add them to related_cands bitmaps. /
3591
3592	static void
3593	record_important_candidates (struct ivopts_data *data)
3594	{
3595	unsigned i;
3596	struct iv_group *group;
3597
3598	for (i = `0`; i < data->vcands.length (); i++)
3599	{
3600	struct iv_cand *cand = data->vcands [i];
3601
3602	if (cand->important)
3603	bitmap_set_bit (data->important_candidates, i);
3604	}
3605
3606	data->consider_all_candidates = (data->vcands.length ()
3607	<= CONSIDER_ALL_CANDIDATES_BOUND);
3608
3609	/ Add important candidates to groups' related_cands bitmaps. /
3610	for (i = `0`; i < data->vgroups.length (); i++)
3611	{
3612	group = data->vgroups [i];
3613	bitmap_ior_into (group->related_cands, data->important_candidates);
3614	}
3615	}
3616
3617	/ Allocates the data structure mapping the (use, candidate) pairs to costs.*
3618	If consider_all_candidates is true, we use a two-dimensional array, otherwise
3619	we allocate a simple list to every use. /*
3620
3621	static void
3622	alloc_use_cost_map (struct ivopts_data *data)
3623	{
3624	unsigned i, size, s;
3625
3626	for (i = `0`; i < data->vgroups.length (); i++)
3627	{
3628	struct iv_group *group = data->vgroups [i];
3629
3630	if (data->consider_all_candidates)
3631	size = data->vcands.length ();
3632	else
3633	{
3634	s = bitmap_count_bits (group->related_cands);
3635
3636	/ Round up to the power of two, so that moduling by it is fast. /
3637	size = s ? (`1` << ceil_log2 (x: s)) : `1`;
3638	}
3639
3640	group->n_map_members = size;
3641	group->cost_map = XCNEWVEC (class cost_pair, size);
3642	}
3643	}
3644
3645	/ Sets cost of (GROUP, CAND) pair to COST and record that it depends*
3646	on invariants INV_VARS and that the value used in expressing it is
3647	VALUE, and in case of iv elimination the comparison operator is COMP. /*
3648
3649	static void
3650	set_group_iv_cost (struct ivopts_data *data,
3651	struct iv_group group, struct* iv_cand *cand,
3652	comp_cost cost, bitmap inv_vars, tree value,
3653	enum tree_code comp, bitmap inv_exprs)
3654	{
3655	unsigned i, s;
3656
3657	if (cost.infinite_cost_p ())
3658	{
3659	BITMAP_FREE (inv_vars);
3660	BITMAP_FREE (inv_exprs);
3661	return;
3662	}
3663
3664	if (data->consider_all_candidates)
3665	{
3666	group->cost_map[cand->id].cand = cand;
3667	group->cost_map[cand->id].cost = cost;
3668	group->cost_map[cand->id].inv_vars = inv_vars;
3669	group->cost_map[cand->id].inv_exprs = inv_exprs;
3670	group->cost_map[cand->id].value = value;
3671	group->cost_map[cand->id].comp = comp;
3672	return;
3673	}
3674
3675	/ n_map_members is a power of two, so this computes modulo. /
3676	s = cand->id & (group->n_map_members - `1`);
3677	for (i = s; i < group->n_map_members; i++)
3678	if (!group->cost_map[i].cand)
3679	goto found;
3680	for (i = `0`; i < s; i++)
3681	if (!group->cost_map[i].cand)
3682	goto found;
3683
3684	gcc_unreachable ();
3685
3686	found:
3687	group->cost_map[i].cand = cand;
3688	group->cost_map[i].cost = cost;
3689	group->cost_map[i].inv_vars = inv_vars;
3690	group->cost_map[i].inv_exprs = inv_exprs;
3691	group->cost_map[i].value = value;
3692	group->cost_map[i].comp = comp;
3693	}
3694
3695	/ Gets cost of (GROUP, CAND) pair. /
3696
3697	static class cost_pair *
3698	get_group_iv_cost (struct ivopts_data data, struct* iv_group *group,
3699	struct iv_cand *cand)
3700	{
3701	unsigned i, s;
3702	class cost_pair *ret;
3703
3704	if (!cand)
3705	return NULL;
3706
3707	if (data->consider_all_candidates)
3708	{
3709	ret = group->cost_map + cand->id;
3710	if (!ret->cand)
3711	return NULL;
3712
3713	return ret;
3714	}
3715
3716	/ n_map_members is a power of two, so this computes modulo. /
3717	s = cand->id & (group->n_map_members - `1`);
3718	for (i = s; i < group->n_map_members; i++)
3719	if (group->cost_map[i].cand == cand)
3720	return group->cost_map + i;
3721	else if (group->cost_map[i].cand == NULL)
3722	return NULL;
3723	for (i = `0`; i < s; i++)
3724	if (group->cost_map[i].cand == cand)
3725	return group->cost_map + i;
3726	else if (group->cost_map[i].cand == NULL)
3727	return NULL;
3728
3729	return NULL;
3730	}
3731
3732	/ Produce DECL_RTL for object obj so it looks like it is stored in memory. /
3733	static rtx
3734	produce_memory_decl_rtl (tree obj, int *regno)
3735	{
3736	addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3737	machine_mode address_mode = targetm.addr_space.address_mode (as);
3738	rtx x;
3739
3740	gcc_assert (obj);
3741	if (TREE_STATIC (obj) \|\| DECL_EXTERNAL (obj))
3742	{
3743	const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3744	x = gen_rtx_SYMBOL_REF (address_mode, name);
3745	SET_SYMBOL_REF_DECL (x, obj);
3746	x = gen_rtx_MEM (DECL_MODE (obj), x);
3747	set_mem_addr_space (x, as);
3748	targetm.encode_section_info (obj, x, true);
3749	}
3750	else
3751	{
3752	x = gen_raw_REG (address_mode, (*regno)++);
3753	x = gen_rtx_MEM (DECL_MODE (obj), x);
3754	set_mem_addr_space (x, as);
3755	}
3756
3757	return x;
3758	}
3759
3760	/ Prepares decl_rtl for variables referred in EXPR_P. Callback for
3761	walk_tree. DATA contains the actual fake register number. /*
3762
3763	static tree
3764	prepare_decl_rtl (tree expr_p, int* ws, void* *data)
3765	{
3766	tree obj = NULL_TREE;
3767	rtx x = NULL_RTX;
3768	int regno = (int* *) data;
3769
3770	switch (TREE_CODE (*expr_p))
3771	{
3772	case ADDR_EXPR:
3773	for (expr_p = &TREE_OPERAND (*expr_p, `0`);
3774	handled_component_p (t: *expr_p);
3775	expr_p = &TREE_OPERAND (*expr_p, `0`))
3776	continue;
3777	obj = *expr_p;
3778	if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3779	x = produce_memory_decl_rtl (obj, regno);
3780	break;
3781
3782	case SSA_NAME:
3783	*ws = `0`;
3784	obj = SSA_NAME_VAR (*expr_p);
3785	/ Defer handling of anonymous SSA_NAMEs to the expander. /
3786	if (!obj)
3787	return NULL_TREE;
3788	if (!DECL_RTL_SET_P (obj))
3789	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3790	break;
3791
3792	case VAR_DECL:
3793	case PARM_DECL:
3794	case RESULT_DECL:
3795	*ws = `0`;
3796	obj = *expr_p;
3797
3798	if (DECL_RTL_SET_P (obj))
3799	break;
3800
3801	if (DECL_MODE (obj) == BLKmode)
3802	x = produce_memory_decl_rtl (obj, regno);
3803	else
3804	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3805
3806	break;
3807
3808	default:
3809	break;
3810	}
3811
3812	if (x)
3813	{
3814	decl_rtl_to_reset.safe_push (obj);
3815	SET_DECL_RTL (obj, x);
3816	}
3817
3818	return NULL_TREE;
3819	}
3820
3821	/ Predict whether the given loop will be transformed in the RTL*
3822	doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3823	This is only for target independent checks, see targetm.predict_doloop_p
3824	for the target dependent ones.
3825
3826	Note that according to some initial investigation, some checks like costly
3827	niter check and invalid stmt scanning don't have much gains among general
3828	cases, so keep this as simple as possible first.
3829
3830	Some RTL specific checks seems unable to be checked in gimple, if any new
3831	checks or easy checks _are_ missing here, please add them. /*
3832
3833	static bool
3834	generic_predict_doloop_p (struct ivopts_data *data)
3835	{
3836	class loop *loop = data->current_loop;
3837
3838	/ Call target hook for target dependent checks. /
3839	if (!targetm.predict_doloop_p (loop))
3840	{
3841	if (dump_file && (dump_flags & TDF_DETAILS))
3842	fprintf (stream: dump_file, format: "Predict doloop failure due to"
3843	" target specific checks.\n");
3844	return false;
3845	}
3846
3847	/ Similar to doloop_optimize, check iteration description to know it's*
3848	suitable or not. Keep it as simple as possible, feel free to extend it
3849	if you find any multiple exits cases matter. /*
3850	edge exit = single_dom_exit (loop);
3851	class tree_niter_desc *niter_desc;
3852	if (!exit \|\| !(niter_desc = niter_for_exit (data, exit)))
3853	{
3854	if (dump_file && (dump_flags & TDF_DETAILS))
3855	fprintf (stream: dump_file, format: "Predict doloop failure due to"
3856	" unexpected niters.\n");
3857	return false;
3858	}
3859
3860	/ Similar to doloop_optimize, check whether iteration count too small*
3861	and not profitable. /*
3862	HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3863	if (est_niter == -`1`)
3864	est_niter = get_likely_max_loop_iterations_int (loop);
3865	if (est_niter >= `0` && est_niter < `3`)
3866	{
3867	if (dump_file && (dump_flags & TDF_DETAILS))
3868	fprintf (stream: dump_file,
3869	format: "Predict doloop failure due to"
3870	" too few iterations (%u).\n",
3871	(unsigned int) est_niter);
3872	return false;
3873	}
3874
3875	return true;
3876	}
3877
3878	/ Determines cost of the computation of EXPR. /
3879
3880	static unsigned
3881	computation_cost (tree expr, bool speed)
3882	{
3883	rtx_insn *seq;
3884	rtx rslt;
3885	tree type = TREE_TYPE (expr);
3886	unsigned cost;
3887	/ Avoid using hard regs in ways which may be unsupported. /
3888	int regno = LAST_VIRTUAL_REGISTER + `1`;
3889	struct cgraph_node *node = cgraph_node::get (decl: current_function_decl);
3890	enum node_frequency real_frequency = node->frequency;
3891
3892	node->frequency = NODE_FREQUENCY_NORMAL;
3893	crtl->maybe_hot_insn_p = speed;
3894	walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3895	start_sequence ();
3896	rslt = expand_expr (exp: expr, NULL_RTX, TYPE_MODE (type), modifier: EXPAND_NORMAL);
3897	seq = end_sequence ();
3898	default_rtl_profile ();
3899	node->frequency = real_frequency;
3900
3901	cost = seq_cost (seq, speed);
3902	if (MEM_P (rslt))
3903	cost += address_cost (XEXP (rslt, `0`), TYPE_MODE (type),
3904	TYPE_ADDR_SPACE (type), speed);
3905	else if (!REG_P (rslt))
3906	cost += set_src_cost (x: rslt, TYPE_MODE (type), speed_p: speed);
3907
3908	return cost;
3909	}
3910
3911	/ Returns variable containing the value of candidate CAND at statement AT. /
3912
3913	static tree
3914	var_at_stmt (class loop loop, struct* iv_cand cand, gimple stmt)
3915	{
3916	if (stmt_after_increment (loop, cand, stmt))
3917	return cand->var_after;
3918	else
3919	return cand->var_before;
3920	}
3921
3922	/ If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the*
3923	same precision that is at least as wide as the precision of TYPE, stores
3924	BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3925	type of A and B. /*
3926
3927	static tree
3928	determine_common_wider_type (tree a, tree b)
3929	{
3930	tree wider_type = NULL;
3931	tree suba, subb;
3932	tree atype = TREE_TYPE (*a);
3933
3934	if (CONVERT_EXPR_P (*a))
3935	{
3936	suba = TREE_OPERAND (*a, `0`);
3937	wider_type = TREE_TYPE (suba);
3938	if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3939	return atype;
3940	}
3941	else
3942	return atype;
3943
3944	if (CONVERT_EXPR_P (*b))
3945	{
3946	subb = TREE_OPERAND (*b, `0`);
3947	if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3948	return atype;
3949	}
3950	else
3951	return atype;
3952
3953	*a = suba;
3954	*b = subb;
3955	return wider_type;
3956	}
3957
3958	/ Determines the expression by that USE is expressed from induction variable*
3959	CAND at statement AT in DATA's current loop. The expression is stored in
3960	two parts in a decomposed form. The invariant part is stored in AFF_INV;
3961	while variant part in AFF_VAR. Store ratio of CAND.step over USE.step in
3962	PRAT if it's non-null. Returns false if USE cannot be expressed using
3963	CAND. /*
3964
3965	static bool
3966	get_computation_aff_1 (struct ivopts_data data, gimple at, struct iv_use *use,
3967	struct iv_cand cand, class* aff_tree *aff_inv,
3968	class aff_tree aff_var, widest_int prat = NULL)
3969	{
3970	tree ubase = use->iv->base, ustep = use->iv->step;
3971	tree cbase = cand->iv->base, cstep = cand->iv->step;
3972	tree common_type, uutype, var, cstep_common;
3973	tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3974	aff_tree aff_cbase;
3975	widest_int rat;
3976
3977	/ We must have a precision to express the values of use. /
3978	if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3979	return false;
3980
3981	var = var_at_stmt (loop: data->current_loop, cand, stmt: at);
3982	uutype = unsigned_type_for (utype);
3983
3984	/ If the conversion is not noop, perform it. /
3985	if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3986	{
3987	if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3988	&& (CONVERT_EXPR_P (cstep) \|\| poly_int_tree_p (t: cstep)))
3989	{
3990	tree inner_base, inner_step, inner_type;
3991	inner_base = TREE_OPERAND (cbase, `0`);
3992	if (CONVERT_EXPR_P (cstep))
3993	inner_step = TREE_OPERAND (cstep, `0`);
3994	else
3995	inner_step = cstep;
3996
3997	inner_type = TREE_TYPE (inner_base);
3998	/ If candidate is added from a biv whose type is smaller than*
3999	ctype, we know both candidate and the biv won't overflow.
4000	In this case, it's safe to skip the convertion in candidate.
4001	As an example, (unsigned short)((unsigned long)A) equals to
4002	(unsigned short)A, if A has a type no larger than short. /*
4003	if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4004	{
4005	cbase = inner_base;
4006	cstep = inner_step;
4007	}
4008	}
4009	cbase = fold_convert (uutype, cbase);
4010	cstep = fold_convert (uutype, cstep);
4011	var = fold_convert (uutype, var);
4012	}
4013
4014	/ Ratio is 1 when computing the value of biv cand by itself.*
4015	We can't rely on constant_multiple_of in this case because the
4016	use is created after the original biv is selected. The call
4017	could fail because of inconsistent fold behavior. See PR68021
4018	for more information. /*
4019	if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4020	{
4021	gcc_assert (is_gimple_assign (use->stmt));
4022	gcc_assert (use->iv->ssa_name == cand->var_after);
4023	gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4024	rat = `1`;
4025	}
4026	else if (!constant_multiple_of (top: ustep, bot: cstep, mul: &rat, data))
4027	return false;
4028
4029	if (prat)
4030	*prat = rat;
4031
4032	/ In case both UBASE and CBASE are shortened to UUTYPE from some common*
4033	type, we achieve better folding by computing their difference in this
4034	wider type, and cast the result to UUTYPE. We do not need to worry about
4035	overflows, as all the arithmetics will in the end be performed in UUTYPE
4036	anyway. /*
4037	common_type = determine_common_wider_type (a: &ubase, b: &cbase);
4038
4039	/ use = ubase - ratio * cbase + ratio * var. /
4040	tree_to_aff_combination (ubase, common_type, aff_inv);
4041	tree_to_aff_combination (cbase, common_type, &aff_cbase);
4042	tree_to_aff_combination (var, uutype, aff_var);
4043
4044	/ We need to shift the value if we are after the increment. /
4045	if (stmt_after_increment (loop: data->current_loop, cand, stmt: at))
4046	{
4047	aff_tree cstep_aff;
4048
4049	if (common_type != uutype)
4050	cstep_common = fold_convert (common_type, cstep);
4051	else
4052	cstep_common = cstep;
4053
4054	tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4055	aff_combination_add (&aff_cbase, &cstep_aff);
4056	}
4057
4058	aff_combination_scale (&aff_cbase, -rat);
4059	aff_combination_add (aff_inv, &aff_cbase);
4060	if (common_type != uutype)
4061	aff_combination_convert (aff_inv, uutype);
4062
4063	aff_combination_scale (aff_var, rat);
4064	return true;
4065	}
4066
4067	/ Determines the expression by that USE is expressed from induction variable*
4068	CAND at statement AT in DATA's current loop. The expression is stored in a
4069	decomposed form into AFF. Returns false if USE cannot be expressed using
4070	CAND. /*
4071
4072	static bool
4073	get_computation_aff (struct ivopts_data data, gimple at, struct iv_use *use,
4074	struct iv_cand cand, class* aff_tree *aff)
4075	{
4076	aff_tree aff_var;
4077
4078	if (!get_computation_aff_1 (data, at, use, cand, aff_inv: aff, aff_var: &aff_var))
4079	return false;
4080
4081	aff_combination_add (aff, &aff_var);
4082	return true;
4083	}
4084
4085	/ Return the type of USE. /
4086
4087	static tree
4088	get_use_type (struct iv_use *use)
4089	{
4090	tree base_type = TREE_TYPE (use->iv->base);
4091	tree type;
4092
4093	if (use->type == USE_REF_ADDRESS)
4094	{
4095	/ The base_type may be a void pointer. Create a pointer type based on*
4096	the mem_ref instead. /*
4097	type = build_pointer_type (TREE_TYPE (*use->op_p));
4098	gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4099	== TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4100	}
4101	else
4102	type = base_type;
4103
4104	return type;
4105	}
4106
4107	/ Determines the expression by that USE is expressed from induction variable*
4108	CAND at statement AT in DATA's current loop. The computation is
4109	unshared. /*
4110
4111	static tree
4112	get_computation_at (struct ivopts_data data, gimple at,
4113	struct iv_use use, struct* iv_cand *cand)
4114	{
4115	aff_tree aff;
4116	tree type = get_use_type (use);
4117
4118	if (!get_computation_aff (data, at, use, cand, aff: &aff))
4119	return NULL_TREE;
4120	unshare_aff_combination (&aff);
4121	return fold_convert (type, aff_combination_to_tree (&aff));
4122	}
4123
4124	/ Like get_computation_at, but try harder, even if the computation*
4125	is more expensive. Intended for debug stmts. /*
4126
4127	static tree
4128	get_debug_computation_at (struct ivopts_data data, gimple at,
4129	struct iv_use use, struct* iv_cand *cand)
4130	{
4131	if (tree ret = get_computation_at (data, at, use, cand))
4132	return ret;
4133
4134	tree ubase = use->iv->base, ustep = use->iv->step;
4135	tree cbase = cand->iv->base, cstep = cand->iv->step;
4136	tree var;
4137	tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4138	widest_int rat;
4139
4140	/ We must have a precision to express the values of use. /
4141	if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4142	return NULL_TREE;
4143
4144	/ Try to handle the case that get_computation_at doesn't,*
4145	try to express
4146	use = ubase + (var - cbase) / ratio. /*
4147	if (!constant_multiple_of (top: cstep, fold_convert (TREE_TYPE (cstep), ustep),
4148	mul: &rat, data))
4149	return NULL_TREE;
4150
4151	bool neg_p = false;
4152	if (wi::neg_p (x: rat))
4153	{
4154	if (TYPE_UNSIGNED (ctype))
4155	return NULL_TREE;
4156	neg_p = true;
4157	rat = wi::neg (x: rat);
4158	}
4159
4160	/ If both IVs can wrap around and CAND doesn't have a power of two step,*
4161	it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4162	the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4163	uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4164	... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. /*
4165	if (!use->iv->no_overflow
4166	&& !cand->iv->no_overflow
4167	&& !integer_pow2p (cstep))
4168	return NULL_TREE;
4169
4170	int bits = wi::exact_log2 (rat);
4171	if (bits == -`1`)
4172	bits = wi::floor_log2 (rat) + `1`;
4173	if (!cand->iv->no_overflow
4174	&& TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4175	return NULL_TREE;
4176
4177	var = var_at_stmt (loop: data->current_loop, cand, stmt: at);
4178
4179	if (POINTER_TYPE_P (ctype))
4180	{
4181	ctype = unsigned_type_for (ctype);
4182	cbase = fold_convert (ctype, cbase);
4183	cstep = fold_convert (ctype, cstep);
4184	var = fold_convert (ctype, var);
4185	}
4186
4187	if (stmt_after_increment (loop: data->current_loop, cand, stmt: at))
4188	var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4189	unshare_expr (cstep));
4190
4191	var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4192	var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4193	wide_int_to_tree (TREE_TYPE (var), rat));
4194	if (POINTER_TYPE_P (utype))
4195	{
4196	var = fold_convert (sizetype, var);
4197	if (neg_p)
4198	var = fold_build1 (NEGATE_EXPR, sizetype, var);
4199	var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4200	}
4201	else
4202	{
4203	var = fold_convert (utype, var);
4204	var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4205	ubase, var);
4206	}
4207	return var;
4208	}
4209
4210	/ Adjust the cost COST for being in loop setup rather than loop body.*
4211	If we're optimizing for space, the loop setup overhead is constant;
4212	if we're optimizing for speed, amortize it over the per-iteration cost.
4213	If ROUND_UP_P is true, the result is round up rather than to zero when
4214	optimizing for speed. /*
4215	static int64_t
4216	adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4217	bool round_up_p = false)
4218	{
4219	if (cost == INFTY)
4220	return cost;
4221	else if (optimize_loop_for_speed_p (data->current_loop))
4222	{
4223	uint64_t niters = avg_loop_niter (loop: data->current_loop);
4224	if (niters > (uint64_t) cost)
4225	return (round_up_p && cost != `0`) ? `1` : `0`;
4226	return (cost + (round_up_p ? niters - `1` : `0`)) / niters;
4227	}
4228	else
4229	return cost;
4230	}
4231
4232	/ Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the*
4233	EXPR operand holding the shift. COST0 and COST1 are the costs for
4234	calculating the operands of EXPR. Returns true if successful, and returns
4235	the cost in COST. /*
4236
4237	static bool
4238	get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4239	comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4240	{
4241	comp_cost res;
4242	tree op1 = TREE_OPERAND (expr, `1`);
4243	tree cst = TREE_OPERAND (mult, `1`);
4244	tree multop = TREE_OPERAND (mult, `0`);
4245	int m = exact_log2 (x: int_cst_value (cst));
4246	int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4247	int as_cost, sa_cost;
4248	bool mult_in_op1;
4249
4250	if (!(m >= `0` && m < maxm))
4251	return false;
4252
4253	STRIP_NOPS (op1);
4254	mult_in_op1 = operand_equal_p (op1, mult, flags: `0`);
4255
4256	as_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m);
4257
4258	/ If the target has a cheap shift-and-add or shift-and-sub instruction,*
4259	use that in preference to a shift insn followed by an add insn. /*
4260	sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4261	? shiftadd_cost (speed, mode, bits: m)
4262	: (mult_in_op1
4263	? shiftsub1_cost (speed, mode, bits: m)
4264	: shiftsub0_cost (speed, mode, bits: m)));
4265
4266	res = comp_cost (MIN (as_cost, sa_cost), `0`);
4267	res += (mult_in_op1 ? cost0 : cost1);
4268
4269	STRIP_NOPS (multop);
4270	if (!is_gimple_val (multop))
4271	res += force_expr_to_var_cost (multop, speed);
4272
4273	*cost = res;
4274	return true;
4275	}
4276
4277	/ Estimates cost of forcing expression EXPR into a variable. /
4278
4279	static comp_cost
4280	force_expr_to_var_cost (tree expr, bool speed)
4281	{
4282	static bool costs_initialized = false;
4283	static unsigned integer_cost [`2`];
4284	static unsigned symbol_cost [`2`];
4285	static unsigned address_cost [`2`];
4286	tree op0, op1;
4287	comp_cost cost0, cost1, cost;
4288	machine_mode mode;
4289	scalar_int_mode int_mode;
4290
4291	if (!costs_initialized)
4292	{
4293	tree type = build_pointer_type (integer_type_node);
4294	tree var, addr;
4295	rtx x;
4296	int i;
4297
4298	var = create_tmp_var_raw (integer_type_node, "test_var");
4299	TREE_STATIC (var) = `1`;
4300	x = produce_memory_decl_rtl (obj: var, NULL);
4301	SET_DECL_RTL (var, x);
4302
4303	addr = build1 (ADDR_EXPR, type, var);
4304
4305
4306	for (i = `0`; i < `2`; i++)
4307	{
4308	integer_cost[i] = computation_cost (expr: build_int_cst (integer_type_node,
4309	`2000`), speed: i);
4310
4311	symbol_cost[i] = computation_cost (expr: addr, speed: i) + `1`;
4312
4313	address_cost[i]
4314	= computation_cost (fold_build_pointer_plus_hwi (addr, `2000`), speed: i) + `1`;
4315	if (dump_file && (dump_flags & TDF_DETAILS))
4316	{
4317	fprintf (stream: dump_file, format: "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4318	fprintf (stream: dump_file, format: " integer %d\n", (int) integer_cost[i]);
4319	fprintf (stream: dump_file, format: " symbol %d\n", (int) symbol_cost[i]);
4320	fprintf (stream: dump_file, format: " address %d\n", (int) address_cost[i]);
4321	fprintf (stream: dump_file, format: " other %d\n", (int) target_spill_cost[i]);
4322	fprintf (stream: dump_file, format: "\n");
4323	}
4324	}
4325
4326	costs_initialized = true;
4327	}
4328
4329	STRIP_NOPS (expr);
4330
4331	if (SSA_VAR_P (expr))
4332	return no_cost;
4333
4334	if (is_gimple_min_invariant (expr))
4335	{
4336	if (poly_int_tree_p (t: expr))
4337	return comp_cost (integer_cost [speed], `0`);
4338
4339	if (TREE_CODE (expr) == ADDR_EXPR)
4340	{
4341	tree obj = TREE_OPERAND (expr, `0`);
4342
4343	if (VAR_P (obj)
4344	\|\| TREE_CODE (obj) == PARM_DECL
4345	\|\| TREE_CODE (obj) == RESULT_DECL)
4346	return comp_cost (symbol_cost [speed], `0`);
4347	}
4348
4349	return comp_cost (address_cost [speed], `0`);
4350	}
4351
4352	switch (TREE_CODE (expr))
4353	{
4354	case POINTER_PLUS_EXPR:
4355	case PLUS_EXPR:
4356	case MINUS_EXPR:
4357	case MULT_EXPR:
4358	case EXACT_DIV_EXPR:
4359	case TRUNC_DIV_EXPR:
4360	case BIT_AND_EXPR:
4361	case BIT_IOR_EXPR:
4362	case LSHIFT_EXPR:
4363	case RSHIFT_EXPR:
4364	op0 = TREE_OPERAND (expr, `0`);
4365	op1 = TREE_OPERAND (expr, `1`);
4366	STRIP_NOPS (op0);
4367	STRIP_NOPS (op1);
4368	break;
4369
4370	CASE_CONVERT:
4371	case NEGATE_EXPR:
4372	case BIT_NOT_EXPR:
4373	op0 = TREE_OPERAND (expr, `0`);
4374	STRIP_NOPS (op0);
4375	op1 = NULL_TREE;
4376	break;
4377	/ See add_iv_candidate_for_doloop, for doloop may_be_zero case, we*
4378	introduce COND_EXPR for IV base, need to support better cost estimation
4379	for this COND_EXPR and tcc_comparison. /*
4380	case COND_EXPR:
4381	op0 = TREE_OPERAND (expr, `1`);
4382	STRIP_NOPS (op0);
4383	op1 = TREE_OPERAND (expr, `2`);
4384	STRIP_NOPS (op1);
4385	break;
4386	case LT_EXPR:
4387	case LE_EXPR:
4388	case GT_EXPR:
4389	case GE_EXPR:
4390	case EQ_EXPR:
4391	case NE_EXPR:
4392	case UNORDERED_EXPR:
4393	case ORDERED_EXPR:
4394	case UNLT_EXPR:
4395	case UNLE_EXPR:
4396	case UNGT_EXPR:
4397	case UNGE_EXPR:
4398	case UNEQ_EXPR:
4399	case LTGT_EXPR:
4400	case MAX_EXPR:
4401	case MIN_EXPR:
4402	op0 = TREE_OPERAND (expr, `0`);
4403	STRIP_NOPS (op0);
4404	op1 = TREE_OPERAND (expr, `1`);
4405	STRIP_NOPS (op1);
4406	break;
4407
4408	default:
4409	/ Just an arbitrary value, FIXME. /
4410	return comp_cost (target_spill_cost[speed], `0`);
4411	}
4412
4413	if (op0 == NULL_TREE
4414	\|\| TREE_CODE (op0) == SSA_NAME \|\| CONSTANT_CLASS_P (op0))
4415	cost0 = no_cost;
4416	else
4417	cost0 = force_expr_to_var_cost (expr: op0, speed);
4418
4419	if (op1 == NULL_TREE
4420	\|\| TREE_CODE (op1) == SSA_NAME \|\| CONSTANT_CLASS_P (op1))
4421	cost1 = no_cost;
4422	else
4423	cost1 = force_expr_to_var_cost (expr: op1, speed);
4424
4425	mode = TYPE_MODE (TREE_TYPE (expr));
4426	switch (TREE_CODE (expr))
4427	{
4428	case POINTER_PLUS_EXPR:
4429	case PLUS_EXPR:
4430	case MINUS_EXPR:
4431	case NEGATE_EXPR:
4432	cost = comp_cost (add_cost (speed, mode), `0`);
4433	if (TREE_CODE (expr) != NEGATE_EXPR)
4434	{
4435	tree mult = NULL_TREE;
4436	comp_cost sa_cost;
4437	if (TREE_CODE (op1) == MULT_EXPR)
4438	mult = op1;
4439	else if (TREE_CODE (op0) == MULT_EXPR)
4440	mult = op0;
4441
4442	if (mult != NULL_TREE
4443	&& is_a <scalar_int_mode> (m: mode, result: &int_mode)
4444	&& cst_and_fits_in_hwi (TREE_OPERAND (mult, `1`))
4445	&& get_shiftadd_cost (expr, mode: int_mode, cost0, cost1, mult,
4446	speed, cost: &sa_cost))
4447	return sa_cost;
4448	}
4449	break;
4450
4451	CASE_CONVERT:
4452	{
4453	tree inner_mode, outer_mode;
4454	outer_mode = TREE_TYPE (expr);
4455	inner_mode = TREE_TYPE (op0);
4456	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4457	TYPE_MODE (inner_mode), speed), `0`);
4458	}
4459	break;
4460
4461	case MULT_EXPR:
4462	if (cst_and_fits_in_hwi (op0))
4463	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4464	mode, speed), `0`);
4465	else if (cst_and_fits_in_hwi (op1))
4466	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4467	mode, speed), `0`);
4468	else
4469	return comp_cost (target_spill_cost [speed], `0`);
4470	break;
4471
4472	case EXACT_DIV_EXPR:
4473	case TRUNC_DIV_EXPR:
4474	/ Division by power of two is usually cheap, so we allow it. Forbid*
4475	anything else. /*
4476	if (integer_pow2p (TREE_OPERAND (expr, `1`)))
4477	cost = comp_cost (add_cost (speed, mode), `0`);
4478	else
4479	cost = comp_cost (target_spill_cost[speed], `0`);
4480	break;
4481
4482	case BIT_AND_EXPR:
4483	case BIT_IOR_EXPR:
4484	case BIT_NOT_EXPR:
4485	case LSHIFT_EXPR:
4486	case RSHIFT_EXPR:
4487	cost = comp_cost (add_cost (speed, mode), `0`);
4488	break;
4489	case COND_EXPR:
4490	op0 = TREE_OPERAND (expr, `0`);
4491	STRIP_NOPS (op0);
4492	if (op0 == NULL_TREE \|\| TREE_CODE (op0) == SSA_NAME
4493	\|\| CONSTANT_CLASS_P (op0))
4494	cost = no_cost;
4495	else
4496	cost = force_expr_to_var_cost (expr: op0, speed);
4497	break;
4498	case LT_EXPR:
4499	case LE_EXPR:
4500	case GT_EXPR:
4501	case GE_EXPR:
4502	case EQ_EXPR:
4503	case NE_EXPR:
4504	case UNORDERED_EXPR:
4505	case ORDERED_EXPR:
4506	case UNLT_EXPR:
4507	case UNLE_EXPR:
4508	case UNGT_EXPR:
4509	case UNGE_EXPR:
4510	case UNEQ_EXPR:
4511	case LTGT_EXPR:
4512	case MAX_EXPR:
4513	case MIN_EXPR:
4514	/ Simply use add cost for now, FIXME if there is some more accurate cost*
4515	evaluation way. /*
4516	cost = comp_cost (add_cost (speed, mode), `0`);
4517	break;
4518
4519	default:
4520	gcc_unreachable ();
4521	}
4522
4523	cost += cost0;
4524	cost += cost1;
4525	return cost;
4526	}
4527
4528	/ Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the*
4529	invariants the computation depends on. /*
4530
4531	static comp_cost
4532	force_var_cost (struct ivopts_data data, tree expr, bitmap inv_vars)
4533	{
4534	if (!expr)
4535	return no_cost;
4536
4537	find_inv_vars (data, expr_p: &expr, inv_vars);
4538	return force_expr_to_var_cost (expr, speed: data->speed);
4539	}
4540
4541	/ Returns cost of auto-modifying address expression in shape base + offset.*
4542	AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4543	address expression. The address expression has ADDR_MODE in addr space
4544	AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4545	speed or size. /*
4546
4547	enum ainc_type
4548	{
4549	AINC_PRE_INC, / Pre increment. /
4550	AINC_PRE_DEC, / Pre decrement. /
4551	AINC_POST_INC, / Post increment. /
4552	AINC_POST_DEC, / Post decrement. /
4553	AINC_NONE / Also the number of auto increment types. /
4554	};
4555
4556	struct ainc_cost_data
4557	{
4558	int64_t costs[AINC_NONE];
4559	};
4560
4561	static comp_cost
4562	get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4563	machine_mode addr_mode, machine_mode mem_mode,
4564	addr_space_t as, bool speed)
4565	{
4566	if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4567	&& !USE_STORE_PRE_DECREMENT (mem_mode)
4568	&& !USE_LOAD_POST_DECREMENT (mem_mode)
4569	&& !USE_STORE_POST_DECREMENT (mem_mode)
4570	&& !USE_LOAD_PRE_INCREMENT (mem_mode)
4571	&& !USE_STORE_PRE_INCREMENT (mem_mode)
4572	&& !USE_LOAD_POST_INCREMENT (mem_mode)
4573	&& !USE_STORE_POST_INCREMENT (mem_mode))
4574	return infinite_cost;
4575
4576	static vec<ainc_cost_data *> ainc_cost_data_list;
4577	unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4578	if (idx >= ainc_cost_data_list.length ())
4579	{
4580	unsigned nsize = ((unsigned) as + `1`) *MAX_MACHINE_MODE;
4581
4582	gcc_assert (nsize > idx);
4583	ainc_cost_data_list.safe_grow_cleared (len: nsize, exact: true);
4584	}
4585
4586	ainc_cost_data *data = ainc_cost_data_list [idx];
4587	if (data == NULL)
4588	{
4589	rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + `1`);
4590
4591	data = (ainc_cost_data ) xcalloc (`1`, sizeof* (*data));
4592	data->costs[AINC_PRE_DEC] = INFTY;
4593	data->costs[AINC_POST_DEC] = INFTY;
4594	data->costs[AINC_PRE_INC] = INFTY;
4595	data->costs[AINC_POST_INC] = INFTY;
4596	if (USE_LOAD_PRE_DECREMENT (mem_mode)
4597	\|\| USE_STORE_PRE_DECREMENT (mem_mode))
4598	{
4599	rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4600
4601	if (memory_address_addr_space_p (mem_mode, addr, as))
4602	data->costs[AINC_PRE_DEC]
4603	= address_cost (addr, mem_mode, as, speed);
4604	}
4605	if (USE_LOAD_POST_DECREMENT (mem_mode)
4606	\|\| USE_STORE_POST_DECREMENT (mem_mode))
4607	{
4608	rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4609
4610	if (memory_address_addr_space_p (mem_mode, addr, as))
4611	data->costs[AINC_POST_DEC]
4612	= address_cost (addr, mem_mode, as, speed);
4613	}
4614	if (USE_LOAD_PRE_INCREMENT (mem_mode)
4615	\|\| USE_STORE_PRE_INCREMENT (mem_mode))
4616	{
4617	rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4618
4619	if (memory_address_addr_space_p (mem_mode, addr, as))
4620	data->costs[AINC_PRE_INC]
4621	= address_cost (addr, mem_mode, as, speed);
4622	}
4623	if (USE_LOAD_POST_INCREMENT (mem_mode)
4624	\|\| USE_STORE_POST_INCREMENT (mem_mode))
4625	{
4626	rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4627
4628	if (memory_address_addr_space_p (mem_mode, addr, as))
4629	data->costs[AINC_POST_INC]
4630	= address_cost (addr, mem_mode, as, speed);
4631	}
4632	ainc_cost_data_list [idx] = data;
4633	}
4634
4635	poly_int64 msize = GET_MODE_SIZE (mode: mem_mode);
4636	if (known_eq (ainc_offset, `0`) && known_eq (msize, ainc_step))
4637	return comp_cost (data->costs[AINC_POST_INC], `0`);
4638	if (known_eq (ainc_offset, `0`) && known_eq (msize, -ainc_step))
4639	return comp_cost (data->costs[AINC_POST_DEC], `0`);
4640	if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4641	return comp_cost (data->costs[AINC_PRE_INC], `0`);
4642	if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4643	return comp_cost (data->costs[AINC_PRE_DEC], `0`);
4644
4645	return infinite_cost;
4646	}
4647
4648	/ Return cost of computing USE's address expression by using CAND.*
4649	AFF_INV and AFF_VAR represent invariant and variant parts of the
4650	address expression, respectively. If AFF_INV is simple, store
4651	the loop invariant variables which are depended by it in INV_VARS;
4652	if AFF_INV is complicated, handle it as a new invariant expression
4653	and record it in INV_EXPR. RATIO indicates multiple times between
4654	steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4655	value to it indicating if this is an auto-increment address. /*
4656
4657	static comp_cost
4658	get_address_cost (struct ivopts_data data, struct* iv_use *use,
4659	struct iv_cand cand, aff_tree aff_inv,
4660	aff_tree *aff_var, HOST_WIDE_INT ratio,
4661	bitmap inv_vars, iv_inv_expr_ent *inv_expr,
4662	bool can_autoinc, bool* speed)
4663	{
4664	rtx addr;
4665	bool simple_inv = true;
4666	tree comp_inv = NULL_TREE, type = aff_var->type;
4667	comp_cost var_cost = no_cost, cost = no_cost;
4668	struct mem_address parts = {NULL_TREE, integer_one_node,
4669	NULL_TREE, NULL_TREE, NULL_TREE};
4670	machine_mode addr_mode = TYPE_MODE (type);
4671	machine_mode mem_mode = TYPE_MODE (use->mem_type);
4672	addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4673	/ Only true if ratio != 1. /
4674	bool ok_with_ratio_p = false;
4675	bool ok_without_ratio_p = false;
4676	code_helper code = ERROR_MARK;
4677
4678	if (use->type == USE_PTR_ADDRESS)
4679	{
4680	gcall call = as_a<gcall > (p: use->stmt);
4681	gcc_assert (gimple_call_internal_p (call));
4682	code = gimple_call_internal_fn (gs: call);
4683	}
4684
4685	if (!aff_combination_const_p (aff: aff_inv))
4686	{
4687	parts.index = integer_one_node;
4688	/ Addressing mode "base + index". /
4689	ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4690	if (ratio != `1`)
4691	{
4692	parts.step = wide_int_to_tree (type, cst: ratio);
4693	/ Addressing mode "base + index << scale". /
4694	ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4695	if (!ok_with_ratio_p)
4696	parts.step = NULL_TREE;
4697	}
4698	if (ok_with_ratio_p \|\| ok_without_ratio_p)
4699	{
4700	if (maybe_ne (a: aff_inv->offset, b: `0`))
4701	{
4702	parts.offset = wide_int_to_tree (sizetype, cst: aff_inv->offset);
4703	/ Addressing mode "base + index [<< scale] + offset". /
4704	if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4705	parts.offset = NULL_TREE;
4706	else
4707	aff_inv->offset = `0`;
4708	}
4709
4710	move_fixed_address_to_symbol (&parts, aff_inv);
4711	/ Base is fixed address and is moved to symbol part. /
4712	if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff: aff_inv))
4713	parts.base = NULL_TREE;
4714
4715	/ Addressing mode "symbol + base + index [<< scale] [+ offset]". /
4716	if (parts.symbol != NULL_TREE
4717	&& !valid_mem_ref_p (mem_mode, as, &parts, code))
4718	{
4719	aff_combination_add_elt (aff_inv, parts.symbol, `1`);
4720	parts.symbol = NULL_TREE;
4721	/ Reset SIMPLE_INV since symbol address needs to be computed*
4722	outside of address expression in this case. /*
4723	simple_inv = false;
4724	/ Symbol part is moved back to base part, it can't be NULL. /
4725	parts.base = integer_one_node;
4726	}
4727	}
4728	else
4729	parts.index = NULL_TREE;
4730	}
4731	else
4732	{
4733	poly_int64 ainc_step;
4734	if (can_autoinc
4735	&& ratio == `1`
4736	&& ptrdiff_tree_p (cand->iv->step, &ainc_step))
4737	{
4738	poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4739
4740	if (stmt_after_increment (loop: data->current_loop, cand, stmt: use->stmt))
4741	ainc_offset += ainc_step;
4742	cost = get_address_cost_ainc (ainc_step, ainc_offset,
4743	addr_mode, mem_mode, as, speed);
4744	if (!cost.infinite_cost_p ())
4745	{
4746	can_autoinc = true*;
4747	return cost;
4748	}
4749	cost = no_cost;
4750	}
4751	if (!aff_combination_zero_p (aff: aff_inv))
4752	{
4753	parts.offset = wide_int_to_tree (sizetype, cst: aff_inv->offset);
4754	/ Addressing mode "base + offset". /
4755	if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4756	parts.offset = NULL_TREE;
4757	else
4758	aff_inv->offset = `0`;
4759	}
4760	}
4761
4762	if (simple_inv)
4763	simple_inv = (aff_inv == NULL
4764	\|\| aff_combination_const_p (aff: aff_inv)
4765	\|\| aff_combination_singleton_var_p (aff: aff_inv));
4766	if (!aff_combination_zero_p (aff: aff_inv))
4767	comp_inv = aff_combination_to_tree (aff_inv);
4768	if (comp_inv != NULL_TREE)
4769	cost = force_var_cost (data, expr: comp_inv, inv_vars);
4770	if (ratio != `1` && parts.step == NULL_TREE)
4771	var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4772	if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4773	var_cost += add_cost (speed, mode: addr_mode);
4774
4775	if (comp_inv && inv_expr && !simple_inv)
4776	{
4777	*inv_expr = get_loop_invariant_expr (data, inv_expr: comp_inv);
4778	/ Clear depends on. /
4779	if (inv_expr != NULL && inv_vars && inv_vars)
4780	bitmap_clear (*inv_vars);
4781
4782	/ Cost of small invariant expression adjusted against loop niters*
4783	is usually zero, which makes it difficult to be differentiated
4784	from candidate based on loop invariant variables. Secondly, the
4785	generated invariant expression may not be hoisted out of loop by
4786	following pass. We penalize the cost by rounding up in order to
4787	neutralize such effects. /*
4788	cost.cost = adjust_setup_cost (data, cost: cost.cost, round_up_p: true);
4789	cost.scratch = cost.cost;
4790	}
4791
4792	cost += var_cost;
4793	addr = addr_for_mem_ref (&parts, as, false);
4794	gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4795	cost += address_cost (addr, mem_mode, as, speed);
4796
4797	if (parts.symbol != NULL_TREE)
4798	cost.complexity += `1`;
4799	/ Don't increase the complexity of adding a scaled index if it's*
4800	the only kind of index that the target allows. /*
4801	if (parts.step != NULL_TREE && ok_without_ratio_p)
4802	cost.complexity += `1`;
4803	if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4804	cost.complexity += `1`;
4805	if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4806	cost.complexity += `1`;
4807
4808	return cost;
4809	}
4810
4811	/ Scale (multiply) the computed COST (except scratch part that should be*
4812	hoisted out a loop) by header->frequency / AT->frequency, which makes
4813	expected cost more accurate. /*
4814
4815	static comp_cost
4816	get_scaled_computation_cost_at (ivopts_data data, gimple at, comp_cost cost)
4817	{
4818	if (data->speed
4819	&& data->current_loop->header->count.to_frequency (cfun) > `0`)
4820	{
4821	basic_block bb = gimple_bb (g: at);
4822	gcc_assert (cost.scratch <= cost.cost);
4823	int scale_factor = (int)(intptr_t) bb->aux;
4824	if (scale_factor == `1`)
4825	return cost;
4826
4827	int64_t scaled_cost
4828	= cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4829
4830	if (dump_file && (dump_flags & TDF_DETAILS))
4831	fprintf (stream: dump_file, format: "Scaling cost based on bb prob by %2.2f: "
4832	"%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4833	`1.0f` * scale_factor, cost.cost, cost.scratch, scaled_cost);
4834
4835	cost.cost = scaled_cost;
4836	}
4837
4838	return cost;
4839	}
4840
4841	/ Determines the cost of the computation by that USE is expressed*
4842	from induction variable CAND. If ADDRESS_P is true, we just need
4843	to create an address from it, otherwise we want to get it into
4844	register. A set of invariants we depend on is stored in INV_VARS.
4845	If CAN_AUTOINC is nonnull, use it to record whether autoinc
4846	addressing is likely. If INV_EXPR is nonnull, record invariant
4847	expr entry in it. /*
4848
4849	static comp_cost
4850	get_computation_cost (struct ivopts_data data, struct* iv_use *use,
4851	struct iv_cand cand, bool* address_p, bitmap *inv_vars,
4852	bool can_autoinc, iv_inv_expr_ent *inv_expr)
4853	{
4854	gimple *at = use->stmt;
4855	tree ubase = use->iv->base, cbase = cand->iv->base;
4856	tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4857	tree comp_inv = NULL_TREE;
4858	HOST_WIDE_INT ratio, aratio;
4859	comp_cost cost;
4860	widest_int rat;
4861	aff_tree aff_inv, aff_var;
4862	bool speed = optimize_bb_for_speed_p (gimple_bb (g: at));
4863
4864	if (inv_vars)
4865	*inv_vars = NULL;
4866	if (can_autoinc)
4867	can_autoinc = false*;
4868	if (inv_expr)
4869	*inv_expr = NULL;
4870
4871	/ Check if we have enough precision to express the values of use. /
4872	if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4873	return infinite_cost;
4874
4875	if (address_p
4876	\|\| (use->iv->base_object
4877	&& cand->iv->base_object
4878	&& POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4879	&& POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4880	{
4881	/ Do not try to express address of an object with computation based*
4882	on address of a different object. This may cause problems in rtl
4883	level alias analysis (that does not expect this to be happening,
4884	as this is illegal in C), and would be unlikely to be useful
4885	anyway. /*
4886	if (use->iv->base_object
4887	&& cand->iv->base_object
4888	&& !operand_equal_p (use->iv->base_object, cand->iv->base_object, flags: `0`))
4889	return infinite_cost;
4890	}
4891
4892	if (!get_computation_aff_1 (data, at, use, cand, aff_inv: &aff_inv, aff_var: &aff_var, prat: &rat)
4893	\|\| !wi::fits_shwi_p (x: rat))
4894	return infinite_cost;
4895
4896	ratio = rat.to_shwi ();
4897	if (address_p)
4898	{
4899	cost = get_address_cost (data, use, cand, aff_inv: &aff_inv, aff_var: &aff_var, ratio,
4900	inv_vars, inv_expr, can_autoinc, speed);
4901	cost = get_scaled_computation_cost_at (data, at, cost);
4902	/ For doloop IV cand, add on the extra cost. /
4903	cost += cand->doloop_p ? targetm.doloop_cost_for_address : `0`;
4904	return cost;
4905	}
4906
4907	bool simple_inv = (aff_combination_const_p (aff: &aff_inv)
4908	\|\| aff_combination_singleton_var_p (aff: &aff_inv));
4909	tree signed_type = signed_type_for (aff_combination_type (aff: &aff_inv));
4910	aff_combination_convert (&aff_inv, signed_type);
4911	if (!aff_combination_zero_p (aff: &aff_inv))
4912	comp_inv = aff_combination_to_tree (&aff_inv);
4913
4914	cost = force_var_cost (data, expr: comp_inv, inv_vars);
4915	if (comp_inv && inv_expr && !simple_inv)
4916	{
4917	*inv_expr = get_loop_invariant_expr (data, inv_expr: comp_inv);
4918	/ Clear depends on. /
4919	if (inv_expr != NULL && inv_vars && inv_vars)
4920	bitmap_clear (*inv_vars);
4921
4922	cost.cost = adjust_setup_cost (data, cost: cost.cost);
4923	/ Record setup cost in scratch field. /
4924	cost.scratch = cost.cost;
4925	}
4926	/ Cost of constant integer can be covered when adding invariant part to*
4927	variant part. /*
4928	else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4929	cost = no_cost;
4930
4931	/ Need type narrowing to represent use with cand. /
4932	if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4933	{
4934	machine_mode outer_mode = TYPE_MODE (utype);
4935	machine_mode inner_mode = TYPE_MODE (ctype);
4936	cost += comp_cost (convert_cost (to_mode: outer_mode, from_mode: inner_mode, speed), `0`);
4937	}
4938
4939	/ Turn a + i * (-c) into a - i * c. /
4940	if (ratio < `0` && comp_inv && !integer_zerop (comp_inv))
4941	aratio = -ratio;
4942	else
4943	aratio = ratio;
4944
4945	if (ratio != `1`)
4946	cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4947
4948	/ TODO: We may also need to check if we can compute a + i * 4 in one*
4949	instruction. /*
4950	/ Need to add up the invariant and variant parts. /
4951	if (comp_inv && !integer_zerop (comp_inv))
4952	cost += add_cost (speed, TYPE_MODE (utype));
4953
4954	cost = get_scaled_computation_cost_at (data, at, cost);
4955
4956	/ For doloop IV cand, add on the extra cost. /
4957	if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4958	cost += targetm.doloop_cost_for_generic;
4959
4960	return cost;
4961	}
4962
4963	/ Determines cost of computing the use in GROUP with CAND in a generic*
4964	expression. /*
4965
4966	static bool
4967	determine_group_iv_cost_generic (struct ivopts_data *data,
4968	struct iv_group group, struct* iv_cand *cand)
4969	{
4970	comp_cost cost;
4971	iv_inv_expr_ent *inv_expr = NULL;
4972	bitmap inv_vars = NULL, inv_exprs = NULL;
4973	struct iv_use *use = group->vuses [`0`];
4974
4975	/ The simple case first -- if we need to express value of the preserved*
4976	original biv, the cost is 0. This also prevents us from counting the
4977	cost of increment twice -- once at this use and once in the cost of
4978	the candidate. /*
4979	if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4980	cost = no_cost;
4981	/ If the IV candidate involves undefined SSA values and is not the*
4982	same IV as on the USE avoid using that candidate here. /*
4983	else if (cand->involves_undefs
4984	&& (!use->iv \|\| !operand_equal_p (cand->iv->base, use->iv->base, flags: `0`)))
4985	return false;
4986	else
4987	cost = get_computation_cost (data, use, cand, address_p: false,
4988	inv_vars: &inv_vars, NULL, inv_expr: &inv_expr);
4989
4990	if (inv_expr)
4991	{
4992	inv_exprs = BITMAP_ALLOC (NULL);
4993	bitmap_set_bit (inv_exprs, inv_expr->id);
4994	}
4995	set_group_iv_cost (data, group, cand, cost, inv_vars,
4996	NULL_TREE, comp: ERROR_MARK, inv_exprs);
4997	return !cost.infinite_cost_p ();
4998	}
4999
5000	/ Determines cost of computing uses in GROUP with CAND in addresses. /
5001
5002	static bool
5003	determine_group_iv_cost_address (struct ivopts_data *data,
5004	struct iv_group group, struct* iv_cand *cand)
5005	{
5006	unsigned i;
5007	bitmap inv_vars = NULL, inv_exprs = NULL;
5008	bool can_autoinc;
5009	iv_inv_expr_ent *inv_expr = NULL;
5010	struct iv_use *use = group->vuses [`0`];
5011	comp_cost sum_cost = no_cost, cost;
5012
5013	cost = get_computation_cost (data, use, cand, address_p: true,
5014	inv_vars: &inv_vars, can_autoinc: &can_autoinc, inv_expr: &inv_expr);
5015
5016	if (inv_expr)
5017	{
5018	inv_exprs = BITMAP_ALLOC (NULL);
5019	bitmap_set_bit (inv_exprs, inv_expr->id);
5020	}
5021	sum_cost = cost;
5022	if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5023	{
5024	if (can_autoinc)
5025	sum_cost -= cand->cost_step;
5026	/ If we generated the candidate solely for exploiting autoincrement*
5027	opportunities, and it turns out it can't be used, set the cost to
5028	infinity to make sure we ignore it. /*
5029	else if (cand->pos == IP_AFTER_USE \|\| cand->pos == IP_BEFORE_USE)
5030	sum_cost = infinite_cost;
5031	}
5032
5033	/ Compute and add costs for rest uses of this group. /
5034	for (i = `1`; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5035	{
5036	struct iv_use *next = group->vuses [i];
5037
5038	/ TODO: We could skip computing cost for sub iv_use when it has the*
5039	same cost as the first iv_use, but the cost really depends on the
5040	offset and where the iv_use is. /*
5041	cost = get_computation_cost (data, use: next, cand, address_p: true,
5042	NULL, can_autoinc: &can_autoinc, inv_expr: &inv_expr);
5043	if (inv_expr)
5044	{
5045	if (!inv_exprs)
5046	inv_exprs = BITMAP_ALLOC (NULL);
5047
5048	/ Uses in a group can share setup code,*
5049	so only add setup cost once. /*
5050	if (bitmap_bit_p (inv_exprs, inv_expr->id))
5051	cost -= cost.scratch;
5052	else
5053	bitmap_set_bit (inv_exprs, inv_expr->id);
5054	}
5055	sum_cost += cost;
5056	}
5057	set_group_iv_cost (data, group, cand, cost: sum_cost, inv_vars,
5058	NULL_TREE, comp: ERROR_MARK, inv_exprs);
5059
5060	return !sum_cost.infinite_cost_p ();
5061	}
5062
5063	/ Computes value of candidate CAND at position AT in iteration DESC->NITER,*
5064	and stores it to VAL. /*
5065
5066	static void
5067	cand_value_at (class loop loop, struct* iv_cand cand, gimple at,
5068	class tree_niter_desc desc, aff_tree val)
5069	{
5070	aff_tree step, delta, nit;
5071	struct iv *iv = cand->iv;
5072	tree type = TREE_TYPE (iv->base);
5073	tree niter = desc->niter;
5074	bool after_adjust = stmt_after_increment (loop, cand, stmt: at);
5075	tree steptype;
5076
5077	if (POINTER_TYPE_P (type))
5078	steptype = sizetype;
5079	else
5080	steptype = unsigned_type_for (type);
5081
5082	/ If AFTER_ADJUST is required, the code below generates the equivalent*
5083	of BASE + NITER STEP + STEP, when ideally we'd prefer the expression*
5084	BASE + (NITER + 1) STEP, especially when NITER is often of the form*
5085	SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5086	doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5087	class for common idioms that we know are safe. /*
5088	if (after_adjust
5089	&& desc->control.no_overflow
5090	&& integer_onep (desc->control.step)
5091	&& (desc->cmp == LT_EXPR
5092	\|\| desc->cmp == NE_EXPR)
5093	&& TREE_CODE (desc->bound) == SSA_NAME)
5094	{
5095	if (integer_onep (desc->control.base))
5096	{
5097	niter = desc->bound;
5098	after_adjust = false;
5099	}
5100	else if (TREE_CODE (niter) == MINUS_EXPR
5101	&& integer_onep (TREE_OPERAND (niter, `1`)))
5102	{
5103	niter = TREE_OPERAND (niter, `0`);
5104	after_adjust = false;
5105	}
5106	}
5107
5108	tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5109	aff_combination_convert (&step, steptype);
5110	tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5111	aff_combination_convert (&nit, steptype);
5112	aff_combination_mult (&nit, &step, &delta);
5113	if (after_adjust)
5114	aff_combination_add (&delta, &step);
5115
5116	tree_to_aff_combination (iv->base, type, val);
5117	if (!POINTER_TYPE_P (type))
5118	aff_combination_convert (val, steptype);
5119	aff_combination_add (val, &delta);
5120	}
5121
5122	/ Returns period of induction variable iv. /
5123
5124	static tree
5125	iv_period (struct iv *iv)
5126	{
5127	tree step = iv->step, period, type;
5128	tree pow2div;
5129
5130	gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5131
5132	type = unsigned_type_for (TREE_TYPE (step));
5133	/ Period of the iv is lcm (step, type_range)/step -1,*
5134	i.e., Ntype_range/step - 1. Since type range is power*
5135	of two, N == (step >> num_of_ending_zeros_binary (step),
5136	so the final result is
5137
5138	(type_range >> num_of_ending_zeros_binary (step)) - 1
5139
5140	*/
5141	pow2div = num_ending_zeros (step);
5142
5143	period = build_low_bits_mask (type,
5144	(TYPE_PRECISION (type)
5145	- tree_to_uhwi (pow2div)));
5146
5147	return period;
5148	}
5149
5150	/ Returns the comparison operator used when eliminating the iv USE. /
5151
5152	static enum tree_code
5153	iv_elimination_compare (struct ivopts_data data, struct* iv_use *use)
5154	{
5155	class loop *loop = data->current_loop;
5156	basic_block ex_bb;
5157	edge exit;
5158
5159	ex_bb = gimple_bb (g: use->stmt);
5160	exit = EDGE_SUCC (ex_bb, `0`);
5161	if (flow_bb_inside_loop_p (loop, exit->dest))
5162	exit = EDGE_SUCC (ex_bb, `1`);
5163
5164	return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5165	}
5166
5167	/ Returns true if we can prove that BASE - OFFSET does not overflow. For now,*
5168	we only detect the situation that BASE = SOMETHING + OFFSET, where the
5169	calculation is performed in non-wrapping type.
5170
5171	TODO: More generally, we could test for the situation that
5172	BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5173	This would require knowing the sign of OFFSET. /*
5174
5175	static bool
5176	difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5177	{
5178	enum tree_code code;
5179	tree e1, e2;
5180	aff_tree aff_e1, aff_e2, aff_offset;
5181
5182	if (!nowrap_type_p (TREE_TYPE (base)))
5183	return false;
5184
5185	base = expand_simple_operations (base);
5186
5187	if (TREE_CODE (base) == SSA_NAME)
5188	{
5189	gimple *stmt = SSA_NAME_DEF_STMT (base);
5190
5191	if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
5192	return false;
5193
5194	code = gimple_assign_rhs_code (gs: stmt);
5195	if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5196	return false;
5197
5198	e1 = gimple_assign_rhs1 (gs: stmt);
5199	e2 = gimple_assign_rhs2 (gs: stmt);
5200	}
5201	else
5202	{
5203	code = TREE_CODE (base);
5204	if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5205	return false;
5206	e1 = TREE_OPERAND (base, `0`);
5207	e2 = TREE_OPERAND (base, `1`);
5208	}
5209
5210	/ Use affine expansion as deeper inspection to prove the equality. /
5211	tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5212	&aff_e2, &data->name_expansion_cache);
5213	tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5214	&aff_offset, &data->name_expansion_cache);
5215	aff_combination_scale (&aff_offset, -`1`);
5216	switch (code)
5217	{
5218	case PLUS_EXPR:
5219	aff_combination_add (&aff_e2, &aff_offset);
5220	if (aff_combination_zero_p (aff: &aff_e2))
5221	return true;
5222
5223	tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5224	&aff_e1, &data->name_expansion_cache);
5225	aff_combination_add (&aff_e1, &aff_offset);
5226	return aff_combination_zero_p (aff: &aff_e1);
5227
5228	case POINTER_PLUS_EXPR:
5229	aff_combination_add (&aff_e2, &aff_offset);
5230	return aff_combination_zero_p (aff: &aff_e2);
5231
5232	default:
5233	return false;
5234	}
5235	}
5236
5237	/ Tries to replace loop exit by one formulated in terms of a LT_EXPR*
5238	comparison with CAND. NITER describes the number of iterations of
5239	the loops. If successful, the comparison in COMP_P is altered accordingly.
5240
5241	We aim to handle the following situation:
5242
5243	sometype base, p;
5244	int a, b, i;
5245
5246	i = a;
5247	p = p_0 = base + a;
5248
5249	do
5250	{
5251	bla (p);*
5252	p++;
5253	i++;
5254	}
5255	while (i < b);
5256
5257	Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5258	We aim to optimize this to
5259
5260	p = p_0 = base + a;
5261	do
5262	{
5263	bla (p);*
5264	p++;
5265	}
5266	while (p < p_0 - a + b);
5267
5268	This preserves the correctness, since the pointer arithmetics does not
5269	overflow. More precisely:
5270
5271	1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5272	overflow in computing it or the values of p.
5273	2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5274	overflow. To prove this, we use the fact that p_0 = base + a. /*
5275
5276	static bool
5277	iv_elimination_compare_lt (struct ivopts_data *data,
5278	struct iv_cand cand, enum* tree_code *comp_p,
5279	class tree_niter_desc *niter)
5280	{
5281	tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5282	class aff_tree nit, tmpa, tmpb;
5283	enum tree_code comp;
5284	HOST_WIDE_INT step;
5285
5286	/ We need to know that the candidate induction variable does not overflow.*
5287	While more complex analysis may be used to prove this, for now just
5288	check that the variable appears in the original program and that it
5289	is computed in a type that guarantees no overflows. /*
5290	cand_type = TREE_TYPE (cand->iv->base);
5291	if (cand->pos != IP_ORIGINAL \|\| !nowrap_type_p (cand_type))
5292	return false;
5293
5294	/ Make sure that the loop iterates till the loop bound is hit, as otherwise*
5295	the calculation of the BOUND could overflow, making the comparison
5296	invalid. /*
5297	if (!data->loop_single_exit_p)
5298	return false;
5299
5300	/ We need to be able to decide whether candidate is increasing or decreasing*
5301	in order to choose the right comparison operator. /*
5302	if (!cst_and_fits_in_hwi (cand->iv->step))
5303	return false;
5304	step = int_cst_value (cand->iv->step);
5305
5306	/ Check that the number of iterations matches the expected pattern:*
5307	a + 1 > b ? 0 : b - a - 1. /*
5308	mbz = niter->may_be_zero;
5309	if (TREE_CODE (mbz) == GT_EXPR)
5310	{
5311	/ Handle a + 1 > b. /
5312	tree op0 = TREE_OPERAND (mbz, `0`);
5313	if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, `1`)))
5314	{
5315	a = TREE_OPERAND (op0, `0`);
5316	b = TREE_OPERAND (mbz, `1`);
5317	}
5318	else
5319	return false;
5320	}
5321	else if (TREE_CODE (mbz) == LT_EXPR)
5322	{
5323	tree op1 = TREE_OPERAND (mbz, `1`);
5324
5325	/ Handle b < a + 1. /
5326	if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, `1`)))
5327	{
5328	a = TREE_OPERAND (op1, `0`);
5329	b = TREE_OPERAND (mbz, `0`);
5330	}
5331	else
5332	return false;
5333	}
5334	else
5335	return false;
5336
5337	/ Expected number of iterations is B - A - 1. Check that it matches*
5338	the actual number, i.e., that B - A - NITER = 1. /*
5339	tree_to_aff_combination (niter->niter, nit_type, &nit);
5340	tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5341	tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5342	aff_combination_scale (&nit, -`1`);
5343	aff_combination_scale (&tmpa, -`1`);
5344	aff_combination_add (&tmpb, &tmpa);
5345	aff_combination_add (&tmpb, &nit);
5346	if (tmpb.n != `0` \|\| maybe_ne (a: tmpb.offset, b: `1`))
5347	return false;
5348
5349	/ Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not*
5350	overflow. /*
5351	offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5352	cand->iv->step,
5353	fold_convert (TREE_TYPE (cand->iv->step), a));
5354	if (!difference_cannot_overflow_p (data, base: cand->iv->base, offset))
5355	return false;
5356
5357	/ Determine the new comparison operator. /
5358	comp = step < `0` ? GT_EXPR : LT_EXPR;
5359	if (*comp_p == NE_EXPR)
5360	*comp_p = comp;
5361	else if (*comp_p == EQ_EXPR)
5362	comp_p = invert_tree_comparison (comp, false*);
5363	else
5364	gcc_unreachable ();
5365
5366	return true;
5367	}
5368
5369	/ Check whether it is possible to express the condition in USE by comparison*
5370	of candidate CAND. If so, store the value compared with to BOUND, and the
5371	comparison operator to COMP. /*
5372
5373	static bool
5374	may_eliminate_iv (struct ivopts_data *data,
5375	struct iv_use use, struct* iv_cand cand, tree bound,
5376	enum tree_code *comp)
5377	{
5378	basic_block ex_bb;
5379	edge exit;
5380	tree period;
5381	class loop *loop = data->current_loop;
5382	aff_tree bnd;
5383	class tree_niter_desc *desc = NULL;
5384
5385	if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5386	return false;
5387
5388	/ For now works only for exits that dominate the loop latch.*
5389	TODO: extend to other conditions inside loop body. /*
5390	ex_bb = gimple_bb (g: use->stmt);
5391	if (use->stmt != last_nondebug_stmt (ex_bb)
5392	\|\| gimple_code (g: use->stmt) != GIMPLE_COND
5393	\|\| !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5394	return false;
5395
5396	exit = EDGE_SUCC (ex_bb, `0`);
5397	if (flow_bb_inside_loop_p (loop, exit->dest))
5398	exit = EDGE_SUCC (ex_bb, `1`);
5399	if (flow_bb_inside_loop_p (loop, exit->dest))
5400	return false;
5401
5402	desc = niter_for_exit (data, exit);
5403	if (!desc)
5404	return false;
5405
5406	/ Determine whether we can use the variable to test the exit condition.*
5407	This is the case iff the period of the induction variable is greater
5408	than the number of iterations for which the exit condition is true. /*
5409	period = iv_period (iv: cand->iv);
5410
5411	/ If the number of iterations is constant, compare against it directly. /
5412	if (TREE_CODE (desc->niter) == INTEGER_CST)
5413	{
5414	/ See cand_value_at. /
5415	if (stmt_after_increment (loop, cand, stmt: use->stmt))
5416	{
5417	if (!tree_int_cst_lt (t1: desc->niter, t2: period))
5418	return false;
5419	}
5420	else
5421	{
5422	if (tree_int_cst_lt (t1: period, t2: desc->niter))
5423	return false;
5424	}
5425	}
5426
5427	/ If not, and if this is the only possible exit of the loop, see whether*
5428	we can get a conservative estimate on the number of iterations of the
5429	entire loop and compare against that instead. /*
5430	else
5431	{
5432	widest_int period_value, max_niter;
5433
5434	max_niter = desc->max;
5435	if (stmt_after_increment (loop, cand, stmt: use->stmt))
5436	max_niter += `1`;
5437	period_value = wi::to_widest (t: period);
5438	if (wi::gtu_p (x: max_niter, y: period_value))
5439	{
5440	/ See if we can take advantage of inferred loop bound*
5441	information. /*
5442	if (data->loop_single_exit_p)
5443	{
5444	if (!max_loop_iterations (loop, &max_niter))
5445	return false;
5446	/ The loop bound is already adjusted by adding 1. /
5447	if (wi::gtu_p (x: max_niter, y: period_value))
5448	return false;
5449	}
5450	else
5451	return false;
5452	}
5453	}
5454
5455	/ For doloop IV cand, the bound would be zero. It's safe whether*
5456	may_be_zero set or not. /*
5457	if (cand->doloop_p)
5458	{
5459	*bound = build_int_cst (TREE_TYPE (cand->iv->base), `0`);
5460	*comp = iv_elimination_compare (data, use);
5461	return true;
5462	}
5463
5464	cand_value_at (loop, cand, at: use->stmt, desc, val: &bnd);
5465
5466	*bound = fold_convert (TREE_TYPE (cand->iv->base),
5467	aff_combination_to_tree (&bnd));
5468	*comp = iv_elimination_compare (data, use);
5469
5470	/ It is unlikely that computing the number of iterations using division*
5471	would be more profitable than keeping the original induction variable. /*
5472	bool cond_overflow_p;
5473	if (expression_expensive_p (*bound, &cond_overflow_p))
5474	return false;
5475
5476	/ Sometimes, it is possible to handle the situation that the number of*
5477	iterations may be zero unless additional assumptions by using <
5478	instead of != in the exit condition.
5479
5480	TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5481	base the exit condition on it. However, that is often too
5482	expensive. /*
5483	if (!integer_zerop (desc->may_be_zero))
5484	return iv_elimination_compare_lt (data, cand, comp_p: comp, niter: desc);
5485
5486	return true;
5487	}
5488
5489	/ Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must*
5490	be copied, if it is used in the loop body and DATA->body_includes_call. /*
5491
5492	static int
5493	parm_decl_cost (struct ivopts_data *data, tree bound)
5494	{
5495	tree sbound = bound;
5496	STRIP_NOPS (sbound);
5497
5498	if (TREE_CODE (sbound) == SSA_NAME
5499	&& SSA_NAME_IS_DEFAULT_DEF (sbound)
5500	&& TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5501	&& data->body_includes_call)
5502	return COSTS_N_INSNS (`1`);
5503
5504	return `0`;
5505	}
5506
5507	/ Determines cost of computing the use in GROUP with CAND in a condition. /
5508
5509	static bool
5510	determine_group_iv_cost_cond (struct ivopts_data *data,
5511	struct iv_group group, struct* iv_cand *cand)
5512	{
5513	tree bound = NULL_TREE;
5514	struct iv *cmp_iv;
5515	bitmap inv_exprs = NULL;
5516	bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5517	comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5518	enum comp_iv_rewrite rewrite_type;
5519	iv_inv_expr_ent inv_expr_elim = NULL, inv_expr_express = NULL, *inv_expr;
5520	tree control_var, bound_cst;
5521	enum tree_code comp = ERROR_MARK;
5522	struct iv_use *use = group->vuses [`0`];
5523
5524	/ Extract condition operands. /
5525	rewrite_type = extract_cond_operands (data, stmt: use->stmt, control_var: &control_var,
5526	bound: &bound_cst, NULL, iv_bound: &cmp_iv);
5527	gcc_assert (rewrite_type != COMP_IV_NA);
5528
5529	/ Try iv elimination. /
5530	if (rewrite_type == COMP_IV_ELIM
5531	&& may_eliminate_iv (data, use, cand, bound: &bound, comp: &comp))
5532	{
5533	elim_cost = force_var_cost (data, expr: bound, inv_vars: &inv_vars_elim);
5534	if (elim_cost.cost == `0`)
5535	elim_cost.cost = parm_decl_cost (data, bound);
5536	else if (TREE_CODE (bound) == INTEGER_CST)
5537	elim_cost.cost = `0`;
5538	/ If we replace a loop condition 'i < n' with 'p < base + n',*
5539	inv_vars_elim will have 'base' and 'n' set, which implies that both
5540	'base' and 'n' will be live during the loop. More likely,
5541	'base + n' will be loop invariant, resulting in only one live value
5542	during the loop. So in that case we clear inv_vars_elim and set
5543	inv_expr_elim instead. /*
5544	if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > `1`)
5545	{
5546	inv_expr_elim = get_loop_invariant_expr (data, inv_expr: bound);
5547	bitmap_clear (inv_vars_elim);
5548	}
5549	/ The bound is a loop invariant, so it will be only computed*
5550	once. /*
5551	elim_cost.cost = adjust_setup_cost (data, cost: elim_cost.cost);
5552	}
5553
5554	/ When the condition is a comparison of the candidate IV against*
5555	zero, prefer this IV.
5556
5557	TODO: The constant that we're subtracting from the cost should
5558	be target-dependent. This information should be added to the
5559	target costs for each backend. /*
5560	if (!elim_cost.infinite_cost_p () / Do not try to decrease infinite! /
5561	&& integer_zerop (*bound_cst)
5562	&& (operand_equal_p (*control_var, cand->var_after, flags: `0`)
5563	\|\| operand_equal_p (*control_var, cand->var_before, flags: `0`)))
5564	elim_cost -= `1`;
5565
5566	express_cost = get_computation_cost (data, use, cand, address_p: false,
5567	inv_vars: &inv_vars_express, NULL,
5568	inv_expr: &inv_expr_express);
5569	if (cmp_iv != NULL)
5570	find_inv_vars (data, expr_p: &cmp_iv->base, inv_vars: &inv_vars_express);
5571
5572	/ Count the cost of the original bound as well. /
5573	bound_cost = force_var_cost (data, expr: *bound_cst, NULL);
5574	if (bound_cost.cost == `0`)
5575	bound_cost.cost = parm_decl_cost (data, bound: *bound_cst);
5576	else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5577	bound_cost.cost = `0`;
5578	express_cost += bound_cost;
5579
5580	/ Choose the better approach, preferring the eliminated IV. /
5581	if (elim_cost <= express_cost)
5582	{
5583	cost = elim_cost;
5584	inv_vars = inv_vars_elim;
5585	inv_vars_elim = NULL;
5586	inv_expr = inv_expr_elim;
5587	/ For doloop candidate/use pair, adjust to zero cost. /
5588	if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5589	cost = no_cost;
5590	}
5591	else
5592	{
5593	cost = express_cost;
5594	inv_vars = inv_vars_express;
5595	inv_vars_express = NULL;
5596	bound = NULL_TREE;
5597	comp = ERROR_MARK;
5598	inv_expr = inv_expr_express;
5599	}
5600
5601	if (inv_expr)
5602	{
5603	inv_exprs = BITMAP_ALLOC (NULL);
5604	bitmap_set_bit (inv_exprs, inv_expr->id);
5605	}
5606	set_group_iv_cost (data, group, cand, cost,
5607	inv_vars, value: bound, comp, inv_exprs);
5608
5609	if (inv_vars_elim)
5610	BITMAP_FREE (inv_vars_elim);
5611	if (inv_vars_express)
5612	BITMAP_FREE (inv_vars_express);
5613
5614	return !cost.infinite_cost_p ();
5615	}
5616
5617	/ Determines cost of computing uses in GROUP with CAND. Returns false*
5618	if USE cannot be represented with CAND. /*
5619
5620	static bool
5621	determine_group_iv_cost (struct ivopts_data *data,
5622	struct iv_group group, struct* iv_cand *cand)
5623	{
5624	switch (group->type)
5625	{
5626	case USE_NONLINEAR_EXPR:
5627	return determine_group_iv_cost_generic (data, group, cand);
5628
5629	case USE_REF_ADDRESS:
5630	case USE_PTR_ADDRESS:
5631	return determine_group_iv_cost_address (data, group, cand);
5632
5633	case USE_COMPARE:
5634	return determine_group_iv_cost_cond (data, group, cand);
5635
5636	default:
5637	gcc_unreachable ();
5638	}
5639	}
5640
5641	/ Return true if get_computation_cost indicates that autoincrement is*
5642	a possibility for the pair of USE and CAND, false otherwise. /*
5643
5644	static bool
5645	autoinc_possible_for_pair (struct ivopts_data data, struct* iv_use *use,
5646	struct iv_cand *cand)
5647	{
5648	if (!address_p (type: use->type))
5649	return false;
5650
5651	bool can_autoinc = false;
5652	get_computation_cost (data, use, cand, address_p: true, NULL, can_autoinc: &can_autoinc, NULL);
5653	return can_autoinc;
5654	}
5655
5656	/ Examine IP_ORIGINAL candidates to see if they are incremented next to a*
5657	use that allows autoincrement, and set their AINC_USE if possible. /*
5658
5659	static void
5660	set_autoinc_for_original_candidates (struct ivopts_data *data)
5661	{
5662	unsigned i, j;
5663
5664	for (i = `0`; i < data->vcands.length (); i++)
5665	{
5666	struct iv_cand *cand = data->vcands [i];
5667	struct iv_use *closest_before = NULL;
5668	struct iv_use *closest_after = NULL;
5669	if (cand->pos != IP_ORIGINAL)
5670	continue;
5671
5672	for (j = `0`; j < data->vgroups.length (); j++)
5673	{
5674	struct iv_group *group = data->vgroups [j];
5675	struct iv_use *use = group->vuses [`0`];
5676	unsigned uid = gimple_uid (g: use->stmt);
5677
5678	if (gimple_bb (g: use->stmt) != gimple_bb (g: cand->incremented_at))
5679	continue;
5680
5681	if (uid < gimple_uid (g: cand->incremented_at)
5682	&& (closest_before == NULL
5683	\|\| uid > gimple_uid (g: closest_before->stmt)))
5684	closest_before = use;
5685
5686	if (uid > gimple_uid (g: cand->incremented_at)
5687	&& (closest_after == NULL
5688	\|\| uid < gimple_uid (g: closest_after->stmt)))
5689	closest_after = use;
5690	}
5691
5692	if (closest_before != NULL
5693	&& autoinc_possible_for_pair (data, use: closest_before, cand))
5694	cand->ainc_use = closest_before;
5695	else if (closest_after != NULL
5696	&& autoinc_possible_for_pair (data, use: closest_after, cand))
5697	cand->ainc_use = closest_after;
5698	}
5699	}
5700
5701	/ Relate compare use with all candidates. /
5702
5703	static void
5704	relate_compare_use_with_all_cands (struct ivopts_data *data)
5705	{
5706	unsigned i, count = data->vcands.length ();
5707	for (i = `0`; i < data->vgroups.length (); i++)
5708	{
5709	struct iv_group *group = data->vgroups [i];
5710
5711	if (group->type == USE_COMPARE)
5712	bitmap_set_range (group->related_cands, `0`, count);
5713	}
5714	}
5715
5716	/ If PREFERRED_MODE is suitable and profitable, use the preferred*
5717	PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. /*
5718
5719	static tree
5720	compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5721	const widest_int &iterations_max)
5722	{
5723	tree ntype = TREE_TYPE (niter);
5724	tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, `1`);
5725	if (!pref_type)
5726	return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5727	build_int_cst (ntype, `1`));
5728
5729	gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5730
5731	int prec = TYPE_PRECISION (ntype);
5732	int pref_prec = TYPE_PRECISION (pref_type);
5733
5734	tree base;
5735
5736	/ Check if the PREFERRED_MODED is able to present niter. /
5737	if (pref_prec > prec
5738	\|\| wi::ltu_p (x: iterations_max,
5739	y: widest_int::from (x: wi::max_value (pref_prec, UNSIGNED),
5740	sgn: UNSIGNED)))
5741	{
5742	/ No wrap, it is safe to use preferred type after niter + 1. /
5743	if (wi::ltu_p (x: iterations_max,
5744	y: widest_int::from (x: wi::max_value (prec, UNSIGNED),
5745	sgn: UNSIGNED)))
5746	{
5747	/ This could help to optimize "-1 +1" pair when niter looks*
5748	like "n-1": n is in original mode. "base = (n - 1) + 1"
5749	in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. /*
5750	base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5751	build_int_cst (ntype, `1`));
5752	base = fold_convert (pref_type, base);
5753	}
5754
5755	/ To avoid wrap, convert niter to preferred type before plus 1. /
5756	else
5757	{
5758	niter = fold_convert (pref_type, niter);
5759	base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5760	build_int_cst (pref_type, `1`));
5761	}
5762	}
5763	else
5764	base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5765	build_int_cst (ntype, `1`));
5766	return base;
5767	}
5768
5769	/ Add one doloop dedicated IV candidate:*
5770	- Base is (may_be_zero ? 1 : (niter + 1)).
5771	- Step is -1. /*
5772
5773	static void
5774	add_iv_candidate_for_doloop (struct ivopts_data *data)
5775	{
5776	tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5777	gcc_assert (niter_desc && niter_desc->assumptions);
5778
5779	tree niter = niter_desc->niter;
5780	tree ntype = TREE_TYPE (niter);
5781	gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5782
5783	tree may_be_zero = niter_desc->may_be_zero;
5784	if (may_be_zero && integer_zerop (may_be_zero))
5785	may_be_zero = NULL_TREE;
5786	if (may_be_zero)
5787	{
5788	if (COMPARISON_CLASS_P (may_be_zero))
5789	{
5790	niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5791	build_int_cst (ntype, `0`),
5792	rewrite_to_non_trapping_overflow (niter));
5793	}
5794	/ Don't try to obtain the iteration count expression when may_be_zero is*
5795	integer_nonzerop (actually iteration count is one) or else. /*
5796	else
5797	return;
5798	}
5799
5800	machine_mode mode = TYPE_MODE (ntype);
5801	machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5802
5803	tree base;
5804	if (mode != pref_mode)
5805	{
5806	base = compute_doloop_base_on_mode (preferred_mode: pref_mode, niter, iterations_max: niter_desc->max);
5807	ntype = TREE_TYPE (base);
5808	}
5809	else
5810	base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5811	build_int_cst (ntype, `1`));
5812
5813
5814	add_candidate (data, base, step: build_int_cst (ntype, -`1`), important: true, NULL, NULL, doloop: true);
5815	}
5816
5817	/ Finds the candidates for the induction variables. /
5818
5819	static void
5820	find_iv_candidates (struct ivopts_data *data)
5821	{
5822	/ Add commonly used ivs. /
5823	add_standard_iv_candidates (data);
5824
5825	/ Add doloop dedicated ivs. /
5826	if (data->doloop_use_p)
5827	add_iv_candidate_for_doloop (data);
5828
5829	/ Add old induction variables. /
5830	add_iv_candidate_for_bivs (data);
5831
5832	/ Add induction variables derived from uses. /
5833	add_iv_candidate_for_groups (data);
5834
5835	set_autoinc_for_original_candidates (data);
5836
5837	/ Record the important candidates. /
5838	record_important_candidates (data);
5839
5840	/ Relate compare iv_use with all candidates. /
5841	if (!data->consider_all_candidates)
5842	relate_compare_use_with_all_cands (data);
5843
5844	if (dump_file && (dump_flags & TDF_DETAILS))
5845	{
5846	unsigned i;
5847
5848	fprintf (stream: dump_file, format: "\n<Important Candidates>:\t");
5849	for (i = `0`; i < data->vcands.length (); i++)
5850	if (data->vcands [i]->important)
5851	fprintf (stream: dump_file, format: " %d,", data->vcands [i]->id);
5852	fprintf (stream: dump_file, format: "\n");
5853
5854	fprintf (stream: dump_file, format: "\n<Group, Cand> Related:\n");
5855	for (i = `0`; i < data->vgroups.length (); i++)
5856	{
5857	struct iv_group *group = data->vgroups [i];
5858
5859	if (group->related_cands)
5860	{
5861	fprintf (stream: dump_file, format: " Group %d:\t", group->id);
5862	dump_bitmap (file: dump_file, map: group->related_cands);
5863	}
5864	}
5865	fprintf (stream: dump_file, format: "\n");
5866	}
5867	}
5868
5869	/ Determines costs of computing use of iv with an iv candidate. /
5870
5871	static void
5872	determine_group_iv_costs (struct ivopts_data *data)
5873	{
5874	unsigned i, j;
5875	struct iv_cand *cand;
5876	struct iv_group *group;
5877	bitmap to_clear = BITMAP_ALLOC (NULL);
5878
5879	alloc_use_cost_map (data);
5880
5881	for (i = `0`; i < data->vgroups.length (); i++)
5882	{
5883	group = data->vgroups [i];
5884
5885	if (data->consider_all_candidates)
5886	{
5887	for (j = `0`; j < data->vcands.length (); j++)
5888	{
5889	cand = data->vcands [j];
5890	determine_group_iv_cost (data, group, cand);
5891	}
5892	}
5893	else
5894	{
5895	bitmap_iterator bi;
5896
5897	EXECUTE_IF_SET_IN_BITMAP (group->related_cands, `0`, j, bi)
5898	{
5899	cand = data->vcands [j];
5900	if (!determine_group_iv_cost (data, group, cand))
5901	bitmap_set_bit (to_clear, j);
5902	}
5903
5904	/ Remove the candidates for that the cost is infinite from*
5905	the list of related candidates. /*
5906	bitmap_and_compl_into (group->related_cands, to_clear);
5907	bitmap_clear (to_clear);
5908	}
5909	}
5910
5911	BITMAP_FREE (to_clear);
5912
5913	if (dump_file && (dump_flags & TDF_DETAILS))
5914	{
5915	bitmap_iterator bi;
5916
5917	/ Dump invariant variables. /
5918	fprintf (stream: dump_file, format: "\n<Invariant Vars>:\n");
5919	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
5920	{
5921	struct version_info *info = ver_info (data, ver: i);
5922	if (info->inv_id)
5923	{
5924	fprintf (stream: dump_file, format: "Inv %d:\t", info->inv_id);
5925	print_generic_expr (dump_file, info->name, TDF_SLIM);
5926	fprintf (stream: dump_file, format: "%s\n",
5927	info->has_nonlin_use ? "" : "\t(eliminable)");
5928	}
5929	}
5930
5931	/ Dump invariant expressions. /
5932	fprintf (stream: dump_file, format: "\n<Invariant Expressions>:\n");
5933	auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5934
5935	for (hash_table<iv_inv_expr_hasher>::iterator it
5936	= data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5937	++it)
5938	list.safe_push (obj: *it);
5939
5940	list.qsort (sort_iv_inv_expr_ent);
5941
5942	for (i = `0`; i < list.length (); ++i)
5943	{
5944	fprintf (stream: dump_file, format: "inv_expr %d: \t", list [i]->id);
5945	print_generic_expr (dump_file, list [i]->expr, TDF_SLIM);
5946	fprintf (stream: dump_file, format: "\n");
5947	}
5948
5949	fprintf (stream: dump_file, format: "\n<Group-candidate Costs>:\n");
5950
5951	for (i = `0`; i < data->vgroups.length (); i++)
5952	{
5953	group = data->vgroups [i];
5954
5955	fprintf (stream: dump_file, format: "Group %d:\n", i);
5956	fprintf (stream: dump_file, format: " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5957	for (j = `0`; j < group->n_map_members; j++)
5958	{
5959	if (!group->cost_map[j].cand
5960	\|\| group->cost_map[j].cost.infinite_cost_p ())
5961	continue;
5962
5963	fprintf (stream: dump_file, format: " %d\t%" PRId64 "\t%d\t",
5964	group->cost_map[j].cand->id,
5965	group->cost_map[j].cost.cost,
5966	group->cost_map[j].cost.complexity);
5967	if (!group->cost_map[j].inv_exprs
5968	\|\| bitmap_empty_p (map: group->cost_map[j].inv_exprs))
5969	fprintf (stream: dump_file, format: "NIL;\t");
5970	else
5971	bitmap_print (dump_file,
5972	group->cost_map[j].inv_exprs, "", ";\t");
5973	if (!group->cost_map[j].inv_vars
5974	\|\| bitmap_empty_p (map: group->cost_map[j].inv_vars))
5975	fprintf (stream: dump_file, format: "NIL;\n");
5976	else
5977	bitmap_print (dump_file,
5978	group->cost_map[j].inv_vars, "", "\n");
5979	}
5980
5981	fprintf (stream: dump_file, format: "\n");
5982	}
5983	fprintf (stream: dump_file, format: "\n");
5984	}
5985	}
5986
5987	/ Determines cost of the candidate CAND. /
5988
5989	static void
5990	determine_iv_cost (struct ivopts_data data, struct* iv_cand *cand)
5991	{
5992	comp_cost cost_base;
5993	int64_t cost, cost_step;
5994	tree base;
5995
5996	gcc_assert (cand->iv != NULL);
5997
5998	/ There are two costs associated with the candidate -- its increment*
5999	and its initialization. The second is almost negligible for any loop
6000	that rolls enough, so we take it just very little into account. /*
6001
6002	base = cand->iv->base;
6003	cost_base = force_var_cost (data, expr: base, NULL);
6004	/ It will be exceptional that the iv register happens to be initialized with*
6005	the proper value at no cost. In general, there will at least be a regcopy
6006	or a const set. /*
6007	if (cost_base.cost == `0`)
6008	cost_base.cost = COSTS_N_INSNS (`1`);
6009	/ Doloop decrement should be considered as zero cost. /
6010	if (cand->doloop_p)
6011	cost_step = `0`;
6012	else
6013	cost_step = add_cost (speed: data->speed, TYPE_MODE (TREE_TYPE (base)));
6014	cost = cost_step + adjust_setup_cost (data, cost: cost_base.cost);
6015
6016	/ Prefer the original ivs unless we may gain something by replacing it.*
6017	The reason is to make debugging simpler; so this is not relevant for
6018	artificial ivs created by other optimization passes. /*
6019	if ((cand->pos != IP_ORIGINAL
6020	\|\| !SSA_NAME_VAR (cand->var_before)
6021	\|\| DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6022	/ Prefer doloop as well. /
6023	&& !cand->doloop_p)
6024	cost++;
6025
6026	/ Prefer not to insert statements into latch unless there are some*
6027	already (so that we do not create unnecessary jumps). /*
6028	if (cand->pos == IP_END
6029	&& empty_block_p (ip_end_pos (data->current_loop)))
6030	cost++;
6031
6032	cand->cost = cost;
6033	cand->cost_step = cost_step;
6034	}
6035
6036	/ Determines costs of computation of the candidates. /
6037
6038	static void
6039	determine_iv_costs (struct ivopts_data *data)
6040	{
6041	unsigned i;
6042
6043	if (dump_file && (dump_flags & TDF_DETAILS))
6044	{
6045	fprintf (stream: dump_file, format: "<Candidate Costs>:\n");
6046	fprintf (stream: dump_file, format: " cand\tcost\n");
6047	}
6048
6049	for (i = `0`; i < data->vcands.length (); i++)
6050	{
6051	struct iv_cand *cand = data->vcands [i];
6052
6053	determine_iv_cost (data, cand);
6054
6055	if (dump_file && (dump_flags & TDF_DETAILS))
6056	fprintf (stream: dump_file, format: " %d\t%d\n", i, cand->cost);
6057	}
6058
6059	if (dump_file && (dump_flags & TDF_DETAILS))
6060	fprintf (stream: dump_file, format: "\n");
6061	}
6062
6063	/ Estimate register pressure for loop having N_INVS invariants and N_CANDS*
6064	induction variables. Note N_INVS includes both invariant variables and
6065	invariant expressions. /*
6066
6067	static unsigned
6068	ivopts_estimate_reg_pressure (struct ivopts_data data, unsigned* n_invs,
6069	unsigned n_cands)
6070	{
6071	unsigned cost;
6072	unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6073	unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6074	bool speed = data->speed;
6075
6076	/ If there is a call in the loop body, the call-clobbered registers*
6077	are not available for loop invariants. /*
6078	if (data->body_includes_call)
6079	available_regs = available_regs - target_clobbered_regs;
6080
6081	/ If we have enough registers. /
6082	if (regs_needed + target_res_regs < available_regs)
6083	cost = n_new;
6084	/ If close to running out of registers, try to preserve them. /
6085	else if (regs_needed <= available_regs)
6086	cost = target_reg_cost [speed] * regs_needed;
6087	/ If we run out of available registers but the number of candidates*
6088	does not, we penalize extra registers using target_spill_cost. /*
6089	else if (n_cands <= available_regs)
6090	cost = target_reg_cost [speed] * available_regs
6091	+ target_spill_cost [speed] * (regs_needed - available_regs);
6092	/ If the number of candidates runs out available registers, we penalize*
6093	extra candidate registers using target_spill_cost 2. Because it is*
6094	more expensive to spill induction variable than invariant. /*
6095	else
6096	cost = target_reg_cost [speed] * available_regs
6097	+ target_spill_cost [speed] * (n_cands - available_regs) * `2`
6098	+ target_spill_cost [speed] * (regs_needed - n_cands);
6099
6100	/ Finally, add the number of candidates, so that we prefer eliminating*
6101	induction variables if possible. /*
6102	return cost + n_cands;
6103	}
6104
6105	/ For each size of the induction variable set determine the penalty. /
6106
6107	static void
6108	determine_set_costs (struct ivopts_data *data)
6109	{
6110	unsigned j, n;
6111	gphi *phi;
6112	gphi_iterator psi;
6113	tree op;
6114	class loop *loop = data->current_loop;
6115	bitmap_iterator bi;
6116
6117	if (dump_file && (dump_flags & TDF_DETAILS))
6118	{
6119	fprintf (stream: dump_file, format: "<Global Costs>:\n");
6120	fprintf (stream: dump_file, format: " target_avail_regs %d\n", target_avail_regs);
6121	fprintf (stream: dump_file, format: " target_clobbered_regs %d\n", target_clobbered_regs);
6122	fprintf (stream: dump_file, format: " target_reg_cost %d\n", target_reg_cost[data->speed]);
6123	fprintf (stream: dump_file, format: " target_spill_cost %d\n", target_spill_cost[data->speed]);
6124	}
6125
6126	n = `0`;
6127	for (psi = gsi_start_phis (loop->header); !gsi_end_p (i: psi); gsi_next (i: &psi))
6128	{
6129	phi = psi.phi ();
6130	op = PHI_RESULT (phi);
6131
6132	if (virtual_operand_p (op))
6133	continue;
6134
6135	if (get_iv (data, var: op))
6136	continue;
6137
6138	if (!POINTER_TYPE_P (TREE_TYPE (op))
6139	&& !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6140	continue;
6141
6142	n++;
6143	}
6144
6145	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, j, bi)
6146	{
6147	struct version_info *info = ver_info (data, ver: j);
6148
6149	if (info->inv_id && info->has_nonlin_use)
6150	n++;
6151	}
6152
6153	data->regs_used = n;
6154	if (dump_file && (dump_flags & TDF_DETAILS))
6155	fprintf (stream: dump_file, format: " regs_used %d\n", n);
6156
6157	if (dump_file && (dump_flags & TDF_DETAILS))
6158	{
6159	fprintf (stream: dump_file, format: " cost for size:\n");
6160	fprintf (stream: dump_file, format: " ivs\tcost\n");
6161	for (j = `0`; j <= `2` * target_avail_regs; j++)
6162	fprintf (stream: dump_file, format: " %d\t%d\n", j,
6163	ivopts_estimate_reg_pressure (data, n_invs: `0`, n_cands: j));
6164	fprintf (stream: dump_file, format: "\n");
6165	}
6166	}
6167
6168	/ Returns true if A is a cheaper cost pair than B. /
6169
6170	static bool
6171	cheaper_cost_pair (class cost_pair a, class* cost_pair *b)
6172	{
6173	if (!a)
6174	return false;
6175
6176	if (!b)
6177	return true;
6178
6179	if (a->cost < b->cost)
6180	return true;
6181
6182	if (b->cost < a->cost)
6183	return false;
6184
6185	/ In case the costs are the same, prefer the cheaper candidate. /
6186	if (a->cand->cost < b->cand->cost)
6187	return true;
6188
6189	return false;
6190	}
6191
6192	/ Compare if A is a more expensive cost pair than B. Return 1, 0 and -1*
6193	for more expensive, equal and cheaper respectively. /*
6194
6195	static int
6196	compare_cost_pair (class cost_pair a, class* cost_pair *b)
6197	{
6198	if (cheaper_cost_pair (a, b))
6199	return -`1`;
6200	if (cheaper_cost_pair (a: b, b: a))
6201	return `1`;
6202
6203	return `0`;
6204	}
6205
6206	/ Returns candidate by that USE is expressed in IVS. /
6207
6208	static class cost_pair *
6209	iv_ca_cand_for_group (class iv_ca ivs, struct* iv_group *group)
6210	{
6211	return ivs->cand_for_group[group->id];
6212	}
6213
6214	/ Computes the cost field of IVS structure. /
6215
6216	static void
6217	iv_ca_recount_cost (struct ivopts_data data, class* iv_ca *ivs)
6218	{
6219	comp_cost cost = ivs->cand_use_cost;
6220
6221	cost += ivs->cand_cost;
6222	cost += ivopts_estimate_reg_pressure (data, n_invs: ivs->n_invs, n_cands: ivs->n_cands);
6223	ivs->cost = cost;
6224	}
6225
6226	/ Remove use of invariants in set INVS by decreasing counter in N_INV_USES*
6227	and IVS. /*
6228
6229	static void
6230	iv_ca_set_remove_invs (class iv_ca ivs, bitmap invs, unsigned* *n_inv_uses)
6231	{
6232	bitmap_iterator bi;
6233	unsigned iid;
6234
6235	if (!invs)
6236	return;
6237
6238	gcc_assert (n_inv_uses != NULL);
6239	EXECUTE_IF_SET_IN_BITMAP (invs, `0`, iid, bi)
6240	{
6241	n_inv_uses[iid]--;
6242	if (n_inv_uses[iid] == `0`)
6243	ivs->n_invs--;
6244	}
6245	}
6246
6247	/ Set USE not to be expressed by any candidate in IVS. /
6248
6249	static void
6250	iv_ca_set_no_cp (struct ivopts_data data, class* iv_ca *ivs,
6251	struct iv_group *group)
6252	{
6253	unsigned gid = group->id, cid;
6254	class cost_pair *cp;
6255
6256	cp = ivs->cand_for_group[gid];
6257	if (!cp)
6258	return;
6259	cid = cp->cand->id;
6260
6261	ivs->bad_groups++;
6262	ivs->cand_for_group[gid] = NULL;
6263	ivs->n_cand_uses[cid]--;
6264
6265	if (ivs->n_cand_uses[cid] == `0`)
6266	{
6267	bitmap_clear_bit (ivs->cands, cid);
6268	if (!cp->cand->doloop_p \|\| !targetm.have_count_reg_decr_p)
6269	ivs->n_cands--;
6270	ivs->cand_cost -= cp->cand->cost;
6271	iv_ca_set_remove_invs (ivs, invs: cp->cand->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6272	iv_ca_set_remove_invs (ivs, invs: cp->cand->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6273	}
6274
6275	ivs->cand_use_cost -= cp->cost;
6276	iv_ca_set_remove_invs (ivs, invs: cp->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6277	iv_ca_set_remove_invs (ivs, invs: cp->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6278	iv_ca_recount_cost (data, ivs);
6279	}
6280
6281	/ Add use of invariants in set INVS by increasing counter in N_INV_USES and*
6282	IVS. /*
6283
6284	static void
6285	iv_ca_set_add_invs (class iv_ca ivs, bitmap invs, unsigned* *n_inv_uses)
6286	{
6287	bitmap_iterator bi;
6288	unsigned iid;
6289
6290	if (!invs)
6291	return;
6292
6293	gcc_assert (n_inv_uses != NULL);
6294	EXECUTE_IF_SET_IN_BITMAP (invs, `0`, iid, bi)
6295	{
6296	n_inv_uses[iid]++;
6297	if (n_inv_uses[iid] == `1`)
6298	ivs->n_invs++;
6299	}
6300	}
6301
6302	/ Set cost pair for GROUP in set IVS to CP. /
6303
6304	static void
6305	iv_ca_set_cp (struct ivopts_data data, class* iv_ca *ivs,
6306	struct iv_group group, class* cost_pair *cp)
6307	{
6308	unsigned gid = group->id, cid;
6309
6310	if (ivs->cand_for_group[gid] == cp)
6311	return;
6312
6313	if (ivs->cand_for_group[gid])
6314	iv_ca_set_no_cp (data, ivs, group);
6315
6316	if (cp)
6317	{
6318	cid = cp->cand->id;
6319
6320	ivs->bad_groups--;
6321	ivs->cand_for_group[gid] = cp;
6322	ivs->n_cand_uses[cid]++;
6323	if (ivs->n_cand_uses[cid] == `1`)
6324	{
6325	bitmap_set_bit (ivs->cands, cid);
6326	if (!cp->cand->doloop_p \|\| !targetm.have_count_reg_decr_p)
6327	ivs->n_cands++;
6328	ivs->cand_cost += cp->cand->cost;
6329	iv_ca_set_add_invs (ivs, invs: cp->cand->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6330	iv_ca_set_add_invs (ivs, invs: cp->cand->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6331	}
6332
6333	ivs->cand_use_cost += cp->cost;
6334	iv_ca_set_add_invs (ivs, invs: cp->inv_vars, n_inv_uses: ivs->n_inv_var_uses);
6335	iv_ca_set_add_invs (ivs, invs: cp->inv_exprs, n_inv_uses: ivs->n_inv_expr_uses);
6336	iv_ca_recount_cost (data, ivs);
6337	}
6338	}
6339
6340	/ Extend set IVS by expressing USE by some of the candidates in it*
6341	if possible. Consider all important candidates if candidates in
6342	set IVS don't give any result. /*
6343
6344	static void
6345	iv_ca_add_group (struct ivopts_data data, class* iv_ca *ivs,
6346	struct iv_group *group)
6347	{
6348	class cost_pair best_cp = NULL, cp;
6349	bitmap_iterator bi;
6350	unsigned i;
6351	struct iv_cand *cand;
6352
6353	gcc_assert (ivs->upto >= group->id);
6354	ivs->upto++;
6355	ivs->bad_groups++;
6356
6357	EXECUTE_IF_SET_IN_BITMAP (ivs->cands, `0`, i, bi)
6358	{
6359	cand = data->vcands [i];
6360	cp = get_group_iv_cost (data, group, cand);
6361	if (cheaper_cost_pair (a: cp, b: best_cp))
6362	best_cp = cp;
6363	}
6364
6365	if (best_cp == NULL)
6366	{
6367	EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, `0`, i, bi)
6368	{
6369	cand = data->vcands [i];
6370	cp = get_group_iv_cost (data, group, cand);
6371	if (cheaper_cost_pair (a: cp, b: best_cp))
6372	best_cp = cp;
6373	}
6374	}
6375
6376	iv_ca_set_cp (data, ivs, group, cp: best_cp);
6377	}
6378
6379	/ Get cost for assignment IVS. /
6380
6381	static comp_cost
6382	iv_ca_cost (class iv_ca *ivs)
6383	{
6384	/ This was a conditional expression but it triggered a bug in*
6385	Sun C 5.5. /*
6386	if (ivs->bad_groups)
6387	return infinite_cost;
6388	else
6389	return ivs->cost;
6390	}
6391
6392	/ Compare if applying NEW_CP to GROUP for IVS introduces more invariants*
6393	than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6394	respectively. /*
6395
6396	static int
6397	iv_ca_compare_deps (struct ivopts_data data, class* iv_ca *ivs,
6398	struct iv_group group, class* cost_pair *old_cp,
6399	class cost_pair *new_cp)
6400	{
6401	gcc_assert (old_cp && new_cp && old_cp != new_cp);
6402	unsigned old_n_invs = ivs->n_invs;
6403	iv_ca_set_cp (data, ivs, group, cp: new_cp);
6404	unsigned new_n_invs = ivs->n_invs;
6405	iv_ca_set_cp (data, ivs, group, cp: old_cp);
6406
6407	return new_n_invs > old_n_invs ? `1` : (new_n_invs < old_n_invs ? -`1` : `0`);
6408	}
6409
6410	/ Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains*
6411	it before NEXT. /*
6412
6413	static struct iv_ca_delta *
6414	iv_ca_delta_add (struct iv_group group, class* cost_pair *old_cp,
6415	class cost_pair new_cp, struct* iv_ca_delta *next)
6416	{
6417	struct iv_ca_delta change = XNEW (struct* iv_ca_delta);
6418
6419	change->group = group;
6420	change->old_cp = old_cp;
6421	change->new_cp = new_cp;
6422	change->next = next;
6423
6424	return change;
6425	}
6426
6427	/ Joins two lists of changes L1 and L2. Destructive -- old lists*
6428	are rewritten. /*
6429
6430	static struct iv_ca_delta *
6431	iv_ca_delta_join (struct iv_ca_delta l1, struct* iv_ca_delta *l2)
6432	{
6433	struct iv_ca_delta *last;
6434
6435	if (!l2)
6436	return l1;
6437
6438	if (!l1)
6439	return l2;
6440
6441	for (last = l1; last->next; last = last->next)
6442	continue;
6443	last->next = l2;
6444
6445	return l1;
6446	}
6447
6448	/ Reverse the list of changes DELTA, forming the inverse to it. /
6449
6450	static struct iv_ca_delta *
6451	iv_ca_delta_reverse (struct iv_ca_delta *delta)
6452	{
6453	struct iv_ca_delta act, next, *prev = NULL;
6454
6455	for (act = delta; act; act = next)
6456	{
6457	next = act->next;
6458	act->next = prev;
6459	prev = act;
6460
6461	std::swap (a&: act->old_cp, b&: act->new_cp);
6462	}
6463
6464	return prev;
6465	}
6466
6467	/ Commit changes in DELTA to IVS. If FORWARD is false, the changes are*
6468	reverted instead. /*
6469
6470	static void
6471	iv_ca_delta_commit (struct ivopts_data data, class* iv_ca *ivs,
6472	struct iv_ca_delta delta, bool* forward)
6473	{
6474	class cost_pair from, to;
6475	struct iv_ca_delta *act;
6476
6477	if (!forward)
6478	delta = iv_ca_delta_reverse (delta);
6479
6480	for (act = delta; act; act = act->next)
6481	{
6482	from = act->old_cp;
6483	to = act->new_cp;
6484	gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6485	iv_ca_set_cp (data, ivs, group: act->group, cp: to);
6486	}
6487
6488	if (!forward)
6489	iv_ca_delta_reverse (delta);
6490	}
6491
6492	/ Returns true if CAND is used in IVS. /
6493
6494	static bool
6495	iv_ca_cand_used_p (class iv_ca ivs, struct* iv_cand *cand)
6496	{
6497	return ivs->n_cand_uses[cand->id] > `0`;
6498	}
6499
6500	/ Returns number of induction variable candidates in the set IVS. /
6501
6502	static unsigned
6503	iv_ca_n_cands (class iv_ca *ivs)
6504	{
6505	return ivs->n_cands;
6506	}
6507
6508	/ Free the list of changes DELTA. /
6509
6510	static void
6511	iv_ca_delta_free (struct iv_ca_delta **delta)
6512	{
6513	struct iv_ca_delta act, next;
6514
6515	for (act = *delta; act; act = next)
6516	{
6517	next = act->next;
6518	free (ptr: act);
6519	}
6520
6521	*delta = NULL;
6522	}
6523
6524	/ Allocates new iv candidates assignment. /
6525
6526	static class iv_ca *
6527	iv_ca_new (struct ivopts_data *data)
6528	{
6529	class iv_ca nw = XNEW (class* iv_ca);
6530
6531	nw->upto = `0`;
6532	nw->bad_groups = `0`;
6533	nw->cand_for_group = XCNEWVEC (class cost_pair *,
6534	data->vgroups.length ());
6535	nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6536	nw->cands = BITMAP_ALLOC (NULL);
6537	nw->n_cands = `0`;
6538	nw->n_invs = `0`;
6539	nw->cand_use_cost = no_cost;
6540	nw->cand_cost = `0`;
6541	nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + `1`);
6542	nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + `1`);
6543	nw->cost = no_cost;
6544
6545	return nw;
6546	}
6547
6548	/ Free memory occupied by the set IVS. /
6549
6550	static void
6551	iv_ca_free (class iv_ca **ivs)
6552	{
6553	free (ptr: (*ivs)->cand_for_group);
6554	free (ptr: (*ivs)->n_cand_uses);
6555	BITMAP_FREE ((*ivs)->cands);
6556	free (ptr: (*ivs)->n_inv_var_uses);
6557	free (ptr: (*ivs)->n_inv_expr_uses);
6558	free (ptr: *ivs);
6559	*ivs = NULL;
6560	}
6561
6562	/ Dumps IVS to FILE. /
6563
6564	static void
6565	iv_ca_dump (struct ivopts_data data, FILE file, class iv_ca *ivs)
6566	{
6567	unsigned i;
6568	comp_cost cost = iv_ca_cost (ivs);
6569
6570	fprintf (stream: file, format: " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6571	cost.complexity);
6572	fprintf (stream: file, format: " reg_cost: %d\n",
6573	ivopts_estimate_reg_pressure (data, n_invs: ivs->n_invs, n_cands: ivs->n_cands));
6574	fprintf (stream: file, format: " cand_cost: %" PRId64 "\n cand_group_cost: "
6575	"%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6576	ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6577	bitmap_print (file, ivs->cands, " candidates: ","\n");
6578
6579	for (i = `0`; i < ivs->upto; i++)
6580	{
6581	struct iv_group *group = data->vgroups [i];
6582	class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6583	if (cp)
6584	fprintf (stream: file, format: " group:%d --> iv_cand:%d, cost=("
6585	"%" PRId64 ",%d)\n", group->id, cp->cand->id,
6586	cp->cost.cost, cp->cost.complexity);
6587	else
6588	fprintf (stream: file, format: " group:%d --> ??\n", group->id);
6589	}
6590
6591	const char *pref = "";
6592	fprintf (stream: file, format: " invariant variables: ");
6593	for (i = `1`; i <= data->max_inv_var_id; i++)
6594	if (ivs->n_inv_var_uses[i])
6595	{
6596	fprintf (stream: file, format: "%s%d", pref, i);
6597	pref = ", ";
6598	}
6599
6600	pref = "";
6601	fprintf (stream: file, format: "\n invariant expressions: ");
6602	for (i = `1`; i <= data->max_inv_expr_id; i++)
6603	if (ivs->n_inv_expr_uses[i])
6604	{
6605	fprintf (stream: file, format: "%s%d", pref, i);
6606	pref = ", ";
6607	}
6608
6609	fprintf (stream: file, format: "\n\n");
6610	}
6611
6612	/ Try changing candidate in IVS to CAND for each use. Return cost of the*
6613	new set, and store differences in DELTA. Number of induction variables
6614	in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6615	the function will try to find a solution with mimimal iv candidates. /*
6616
6617	static comp_cost
6618	iv_ca_extend (struct ivopts_data data, class* iv_ca *ivs,
6619	struct iv_cand cand, struct* iv_ca_delta **delta,
6620	unsigned n_ivs, bool* min_ncand)
6621	{
6622	unsigned i;
6623	comp_cost cost;
6624	struct iv_group *group;
6625	class cost_pair old_cp, new_cp;
6626
6627	*delta = NULL;
6628	for (i = `0`; i < ivs->upto; i++)
6629	{
6630	group = data->vgroups [i];
6631	old_cp = iv_ca_cand_for_group (ivs, group);
6632
6633	if (old_cp
6634	&& old_cp->cand == cand)
6635	continue;
6636
6637	new_cp = get_group_iv_cost (data, group, cand);
6638	if (!new_cp)
6639	continue;
6640
6641	if (!min_ncand)
6642	{
6643	int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6644	/ Skip if new_cp depends on more invariants. /
6645	if (cmp_invs > `0`)
6646	continue;
6647
6648	int cmp_cost = compare_cost_pair (a: new_cp, b: old_cp);
6649	/ Skip if new_cp is not cheaper. /
6650	if (cmp_cost > `0` \|\| (cmp_cost == `0` && cmp_invs == `0`))
6651	continue;
6652	}
6653
6654	delta = iv_ca_delta_add (group, old_cp, new_cp, next: delta);
6655	}
6656
6657	iv_ca_delta_commit (data, ivs, delta: delta, forward: true*);
6658	cost = iv_ca_cost (ivs);
6659	if (n_ivs)
6660	*n_ivs = iv_ca_n_cands (ivs);
6661	iv_ca_delta_commit (data, ivs, delta: delta, forward: false*);
6662
6663	return cost;
6664	}
6665
6666	/ Try narrowing set IVS by removing CAND. Return the cost of*
6667	the new set and store the differences in DELTA. START is
6668	the candidate with which we start narrowing. /*
6669
6670	static comp_cost
6671	iv_ca_narrow (struct ivopts_data data, class* iv_ca *ivs,
6672	struct iv_cand cand, struct* iv_cand *start,
6673	struct iv_ca_delta **delta)
6674	{
6675	unsigned i, ci;
6676	struct iv_group *group;
6677	class cost_pair old_cp, new_cp, *cp;
6678	bitmap_iterator bi;
6679	struct iv_cand *cnd;
6680	comp_cost cost, best_cost, acost;
6681
6682	*delta = NULL;
6683	for (i = `0`; i < data->vgroups.length (); i++)
6684	{
6685	group = data->vgroups [i];
6686
6687	old_cp = iv_ca_cand_for_group (ivs, group);
6688	if (old_cp->cand != cand)
6689	continue;
6690
6691	best_cost = iv_ca_cost (ivs);
6692	/ Start narrowing with START. /
6693	new_cp = get_group_iv_cost (data, group, cand: start);
6694
6695	if (data->consider_all_candidates)
6696	{
6697	EXECUTE_IF_SET_IN_BITMAP (ivs->cands, `0`, ci, bi)
6698	{
6699	if (ci == cand->id \|\| (start && ci == start->id))
6700	continue;
6701
6702	cnd = data->vcands [ci];
6703
6704	cp = get_group_iv_cost (data, group, cand: cnd);
6705	if (!cp)
6706	continue;
6707
6708	iv_ca_set_cp (data, ivs, group, cp);
6709	acost = iv_ca_cost (ivs);
6710
6711	if (acost < best_cost)
6712	{
6713	best_cost = acost;
6714	new_cp = cp;
6715	}
6716	}
6717	}
6718	else
6719	{
6720	EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, `0`, ci, bi)
6721	{
6722	if (ci == cand->id \|\| (start && ci == start->id))
6723	continue;
6724
6725	cnd = data->vcands [ci];
6726
6727	cp = get_group_iv_cost (data, group, cand: cnd);
6728	if (!cp)
6729	continue;
6730
6731	iv_ca_set_cp (data, ivs, group, cp);
6732	acost = iv_ca_cost (ivs);
6733
6734	if (acost < best_cost)
6735	{
6736	best_cost = acost;
6737	new_cp = cp;
6738	}
6739	}
6740	}
6741	/ Restore to old cp for use. /
6742	iv_ca_set_cp (data, ivs, group, cp: old_cp);
6743
6744	if (!new_cp)
6745	{
6746	iv_ca_delta_free (delta);
6747	return infinite_cost;
6748	}
6749
6750	delta = iv_ca_delta_add (group, old_cp, new_cp, next: delta);
6751	}
6752
6753	iv_ca_delta_commit (data, ivs, delta: delta, forward: true*);
6754	cost = iv_ca_cost (ivs);
6755	iv_ca_delta_commit (data, ivs, delta: delta, forward: false*);
6756
6757	return cost;
6758	}
6759
6760	/ Try optimizing the set of candidates IVS by removing candidates different*
6761	from to EXCEPT_CAND from it. Return cost of the new set, and store
6762	differences in DELTA. /*
6763
6764	static comp_cost
6765	iv_ca_prune (struct ivopts_data data, class* iv_ca *ivs,
6766	struct iv_cand except_cand, struct* iv_ca_delta **delta)
6767	{
6768	bitmap_iterator bi;
6769	struct iv_ca_delta act_delta, best_delta;
6770	unsigned i;
6771	comp_cost best_cost, acost;
6772	struct iv_cand *cand;
6773
6774	best_delta = NULL;
6775	best_cost = iv_ca_cost (ivs);
6776
6777	EXECUTE_IF_SET_IN_BITMAP (ivs->cands, `0`, i, bi)
6778	{
6779	cand = data->vcands [i];
6780
6781	if (cand == except_cand)
6782	continue;
6783
6784	acost = iv_ca_narrow (data, ivs, cand, start: except_cand, delta: &act_delta);
6785
6786	if (acost < best_cost)
6787	{
6788	best_cost = acost;
6789	iv_ca_delta_free (delta: &best_delta);
6790	best_delta = act_delta;
6791	}
6792	else
6793	iv_ca_delta_free (delta: &act_delta);
6794	}
6795
6796	if (!best_delta)
6797	{
6798	*delta = NULL;
6799	return best_cost;
6800	}
6801
6802	/ Recurse to possibly remove other unnecessary ivs. /
6803	iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
6804	best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6805	iv_ca_delta_commit (data, ivs, delta: best_delta, forward: false);
6806	delta = iv_ca_delta_join (l1: best_delta, l2: delta);
6807	return best_cost;
6808	}
6809
6810	/ Check if CAND_IDX is a candidate other than OLD_CAND and has*
6811	cheaper local cost for GROUP than BEST_CP. Return pointer to
6812	the corresponding cost_pair, otherwise just return BEST_CP. /*
6813
6814	static class cost_pair*
6815	cheaper_cost_with_cand (struct ivopts_data data, struct* iv_group *group,
6816	unsigned int cand_idx, struct iv_cand *old_cand,
6817	class cost_pair *best_cp)
6818	{
6819	struct iv_cand *cand;
6820	class cost_pair *cp;
6821
6822	gcc_assert (old_cand != NULL && best_cp != NULL);
6823	if (cand_idx == old_cand->id)
6824	return best_cp;
6825
6826	cand = data->vcands [cand_idx];
6827	cp = get_group_iv_cost (data, group, cand);
6828	if (cp != NULL && cheaper_cost_pair (a: cp, b: best_cp))
6829	return cp;
6830
6831	return best_cp;
6832	}
6833
6834	/ Try breaking local optimal fixed-point for IVS by replacing candidates*
6835	which are used by more than one iv uses. For each of those candidates,
6836	this function tries to represent iv uses under that candidate using
6837	other ones with lower local cost, then tries to prune the new set.
6838	If the new set has lower cost, It returns the new cost after recording
6839	candidate replacement in list DELTA. /*
6840
6841	static comp_cost
6842	iv_ca_replace (struct ivopts_data data, class* iv_ca *ivs,
6843	struct iv_ca_delta **delta)
6844	{
6845	bitmap_iterator bi, bj;
6846	unsigned int i, j, k;
6847	struct iv_cand *cand;
6848	comp_cost orig_cost, acost;
6849	struct iv_ca_delta act_delta, tmp_delta;
6850	class cost_pair old_cp, best_cp = NULL;
6851
6852	*delta = NULL;
6853	orig_cost = iv_ca_cost (ivs);
6854
6855	EXECUTE_IF_SET_IN_BITMAP (ivs->cands, `0`, i, bi)
6856	{
6857	if (ivs->n_cand_uses[i] == `1`
6858	\|\| ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6859	continue;
6860
6861	cand = data->vcands [i];
6862
6863	act_delta = NULL;
6864	/ Represent uses under current candidate using other ones with*
6865	lower local cost. /*
6866	for (j = `0`; j < ivs->upto; j++)
6867	{
6868	struct iv_group *group = data->vgroups [j];
6869	old_cp = iv_ca_cand_for_group (ivs, group);
6870
6871	if (old_cp->cand != cand)
6872	continue;
6873
6874	best_cp = old_cp;
6875	if (data->consider_all_candidates)
6876	for (k = `0`; k < data->vcands.length (); k++)
6877	best_cp = cheaper_cost_with_cand (data, group, cand_idx: k,
6878	old_cand: old_cp->cand, best_cp);
6879	else
6880	EXECUTE_IF_SET_IN_BITMAP (group->related_cands, `0`, k, bj)
6881	best_cp = cheaper_cost_with_cand (data, group, cand_idx: k,
6882	old_cand: old_cp->cand, best_cp);
6883
6884	if (best_cp == old_cp)
6885	continue;
6886
6887	act_delta = iv_ca_delta_add (group, old_cp, new_cp: best_cp, next: act_delta);
6888	}
6889	/ No need for further prune. /
6890	if (!act_delta)
6891	continue;
6892
6893	/ Prune the new candidate set. /
6894	iv_ca_delta_commit (data, ivs, delta: act_delta, forward: true);
6895	acost = iv_ca_prune (data, ivs, NULL, delta: &tmp_delta);
6896	iv_ca_delta_commit (data, ivs, delta: act_delta, forward: false);
6897	act_delta = iv_ca_delta_join (l1: act_delta, l2: tmp_delta);
6898
6899	if (acost < orig_cost)
6900	{
6901	*delta = act_delta;
6902	return acost;
6903	}
6904	else
6905	iv_ca_delta_free (delta: &act_delta);
6906	}
6907
6908	return orig_cost;
6909	}
6910
6911	/ Tries to extend the sets IVS in the best possible way in order to*
6912	express the GROUP. If ORIGINALP is true, prefer candidates from
6913	the original set of IVs, otherwise favor important candidates not
6914	based on any memory object. /*
6915
6916	static bool
6917	try_add_cand_for (struct ivopts_data data, class* iv_ca *ivs,
6918	struct iv_group group, bool* originalp)
6919	{
6920	comp_cost best_cost, act_cost;
6921	unsigned i;
6922	bitmap_iterator bi;
6923	struct iv_cand *cand;
6924	struct iv_ca_delta best_delta = NULL, act_delta;
6925	class cost_pair *cp;
6926
6927	iv_ca_add_group (data, ivs, group);
6928	best_cost = iv_ca_cost (ivs);
6929	cp = iv_ca_cand_for_group (ivs, group);
6930	if (cp)
6931	{
6932	best_delta = iv_ca_delta_add (group, NULL, new_cp: cp, NULL);
6933	iv_ca_set_no_cp (data, ivs, group);
6934	}
6935
6936	/ If ORIGINALP is true, try to find the original IV for the use. Otherwise*
6937	first try important candidates not based on any memory object. Only if
6938	this fails, try the specific ones. Rationale -- in loops with many
6939	variables the best choice often is to use just one generic biv. If we
6940	added here many ivs specific to the uses, the optimization algorithm later
6941	would be likely to get stuck in a local minimum, thus causing us to create
6942	too many ivs. The approach from few ivs to more seems more likely to be
6943	successful -- starting from few ivs, replacing an expensive use by a
6944	specific iv should always be a win. /*
6945	EXECUTE_IF_SET_IN_BITMAP (group->related_cands, `0`, i, bi)
6946	{
6947	cand = data->vcands [i];
6948
6949	if (originalp && cand->pos !=IP_ORIGINAL)
6950	continue;
6951
6952	if (!originalp && cand->iv->base_object != NULL_TREE)
6953	continue;
6954
6955	if (iv_ca_cand_used_p (ivs, cand))
6956	continue;
6957
6958	cp = get_group_iv_cost (data, group, cand);
6959	if (!cp)
6960	continue;
6961
6962	iv_ca_set_cp (data, ivs, group, cp);
6963	act_cost = iv_ca_extend (data, ivs, cand, delta: &act_delta, NULL,
6964	min_ncand: true);
6965	iv_ca_set_no_cp (data, ivs, group);
6966	act_delta = iv_ca_delta_add (group, NULL, new_cp: cp, next: act_delta);
6967
6968	if (act_cost < best_cost)
6969	{
6970	best_cost = act_cost;
6971
6972	iv_ca_delta_free (delta: &best_delta);
6973	best_delta = act_delta;
6974	}
6975	else
6976	iv_ca_delta_free (delta: &act_delta);
6977	}
6978
6979	if (best_cost.infinite_cost_p ())
6980	{
6981	for (i = `0`; i < group->n_map_members; i++)
6982	{
6983	cp = group->cost_map + i;
6984	cand = cp->cand;
6985	if (!cand)
6986	continue;
6987
6988	/ Already tried this. /
6989	if (cand->important)
6990	{
6991	if (originalp && cand->pos == IP_ORIGINAL)
6992	continue;
6993	if (!originalp && cand->iv->base_object == NULL_TREE)
6994	continue;
6995	}
6996
6997	if (iv_ca_cand_used_p (ivs, cand))
6998	continue;
6999
7000	act_delta = NULL;
7001	iv_ca_set_cp (data, ivs, group, cp);
7002	act_cost = iv_ca_extend (data, ivs, cand, delta: &act_delta, NULL, min_ncand: true);
7003	iv_ca_set_no_cp (data, ivs, group);
7004	act_delta = iv_ca_delta_add (group,
7005	old_cp: iv_ca_cand_for_group (ivs, group),
7006	new_cp: cp, next: act_delta);
7007
7008	if (act_cost < best_cost)
7009	{
7010	best_cost = act_cost;
7011
7012	if (best_delta)
7013	iv_ca_delta_free (delta: &best_delta);
7014	best_delta = act_delta;
7015	}
7016	else
7017	iv_ca_delta_free (delta: &act_delta);
7018	}
7019	}
7020
7021	iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
7022	iv_ca_delta_free (delta: &best_delta);
7023
7024	return !best_cost.infinite_cost_p ();
7025	}
7026
7027	/ Finds an initial assignment of candidates to uses. /
7028
7029	static class iv_ca *
7030	get_initial_solution (struct ivopts_data data, bool* originalp)
7031	{
7032	unsigned i;
7033	class iv_ca *ivs = iv_ca_new (data);
7034
7035	for (i = `0`; i < data->vgroups.length (); i++)
7036	if (!try_add_cand_for (data, ivs, group: data->vgroups [i], originalp))
7037	{
7038	iv_ca_free (ivs: &ivs);
7039	return NULL;
7040	}
7041
7042	return ivs;
7043	}
7044
7045	/ Tries to improve set of induction variables IVS. TRY_REPLACE_P*
7046	points to a bool variable, this function tries to break local
7047	optimal fixed-point by replacing candidates in IVS if it's true. /*
7048
7049	static bool
7050	try_improve_iv_set (struct ivopts_data *data,
7051	class iv_ca ivs, bool* *try_replace_p)
7052	{
7053	unsigned i, n_ivs;
7054	comp_cost acost, best_cost = iv_ca_cost (ivs);
7055	struct iv_ca_delta best_delta = NULL, act_delta, *tmp_delta;
7056	struct iv_cand *cand;
7057
7058	/ Try extending the set of induction variables by one. /
7059	for (i = `0`; i < data->vcands.length (); i++)
7060	{
7061	cand = data->vcands [i];
7062
7063	if (iv_ca_cand_used_p (ivs, cand))
7064	continue;
7065
7066	acost = iv_ca_extend (data, ivs, cand, delta: &act_delta, n_ivs: &n_ivs, min_ncand: false);
7067	if (!act_delta)
7068	continue;
7069
7070	/ If we successfully added the candidate and the set is small enough,*
7071	try optimizing it by removing other candidates. /*
7072	if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7073	{
7074	iv_ca_delta_commit (data, ivs, delta: act_delta, forward: true);
7075	acost = iv_ca_prune (data, ivs, except_cand: cand, delta: &tmp_delta);
7076	iv_ca_delta_commit (data, ivs, delta: act_delta, forward: false);
7077	act_delta = iv_ca_delta_join (l1: act_delta, l2: tmp_delta);
7078	}
7079
7080	if (acost < best_cost)
7081	{
7082	best_cost = acost;
7083	iv_ca_delta_free (delta: &best_delta);
7084	best_delta = act_delta;
7085	}
7086	else
7087	iv_ca_delta_free (delta: &act_delta);
7088	}
7089
7090	if (!best_delta)
7091	{
7092	/ Try removing the candidates from the set instead. /
7093	best_cost = iv_ca_prune (data, ivs, NULL, delta: &best_delta);
7094
7095	if (!best_delta && *try_replace_p)
7096	{
7097	try_replace_p = false*;
7098	/ So far candidate selecting algorithm tends to choose fewer IVs*
7099	so that it can handle cases in which loops have many variables
7100	but the best choice is often to use only one general biv. One
7101	weakness is it can't handle opposite cases, in which different
7102	candidates should be chosen with respect to each use. To solve
7103	the problem, we replace candidates in a manner described by the
7104	comments of iv_ca_replace, thus give general algorithm a chance
7105	to break local optimal fixed-point in these cases. /*
7106	best_cost = iv_ca_replace (data, ivs, delta: &best_delta);
7107	}
7108
7109	if (!best_delta)
7110	return false;
7111	}
7112
7113	iv_ca_delta_commit (data, ivs, delta: best_delta, forward: true);
7114	iv_ca_delta_free (delta: &best_delta);
7115	return best_cost == iv_ca_cost (ivs);
7116	}
7117
7118	/ Attempts to find the optimal set of induction variables. We do simple*
7119	greedy heuristic -- we try to replace at most one candidate in the selected
7120	solution and remove the unused ivs while this improves the cost. /*
7121
7122	static class iv_ca *
7123	find_optimal_iv_set_1 (struct ivopts_data data, bool* originalp)
7124	{
7125	class iv_ca *set;
7126	bool try_replace_p = true;
7127
7128	/ Get the initial solution. /
7129	set = get_initial_solution (data, originalp);
7130	if (!set)
7131	{
7132	if (dump_file && (dump_flags & TDF_DETAILS))
7133	fprintf (stream: dump_file, format: "Unable to substitute for ivs, failed.\n");
7134	return NULL;
7135	}
7136
7137	if (dump_file && (dump_flags & TDF_DETAILS))
7138	{
7139	fprintf (stream: dump_file, format: "Initial set of candidates:\n");
7140	iv_ca_dump (data, file: dump_file, ivs: set);
7141	}
7142
7143	while (try_improve_iv_set (data, ivs: set, try_replace_p: &try_replace_p))
7144	{
7145	if (dump_file && (dump_flags & TDF_DETAILS))
7146	{
7147	fprintf (stream: dump_file, format: "Improved to:\n");
7148	iv_ca_dump (data, file: dump_file, ivs: set);
7149	}
7150	}
7151
7152	/ If the set has infinite_cost, it can't be optimal. /
7153	if (iv_ca_cost (ivs: set).infinite_cost_p ())
7154	{
7155	if (dump_file && (dump_flags & TDF_DETAILS))
7156	fprintf (stream: dump_file,
7157	format: "Overflow to infinite cost in try_improve_iv_set.\n");
7158	iv_ca_free (ivs: &set);
7159	}
7160	return set;
7161	}
7162
7163	static class iv_ca *
7164	find_optimal_iv_set (struct ivopts_data *data)
7165	{
7166	unsigned i;
7167	comp_cost cost, origcost;
7168	class iv_ca set, origset;
7169
7170	/ Determine the cost based on a strategy that starts with original IVs,*
7171	and try again using a strategy that prefers candidates not based
7172	on any IVs. /*
7173	origset = find_optimal_iv_set_1 (data, originalp: true);
7174	set = find_optimal_iv_set_1 (data, originalp: false);
7175
7176	if (!origset && !set)
7177	return NULL;
7178
7179	origcost = origset ? iv_ca_cost (ivs: origset) : infinite_cost;
7180	cost = set ? iv_ca_cost (ivs: set) : infinite_cost;
7181
7182	if (dump_file && (dump_flags & TDF_DETAILS))
7183	{
7184	fprintf (stream: dump_file, format: "Original cost %" PRId64 " (complexity %d)\n\n",
7185	origcost.cost, origcost.complexity);
7186	fprintf (stream: dump_file, format: "Final cost %" PRId64 " (complexity %d)\n\n",
7187	cost.cost, cost.complexity);
7188	}
7189
7190	/ Choose the one with the best cost. /
7191	if (origcost <= cost)
7192	{
7193	if (set)
7194	iv_ca_free (ivs: &set);
7195	set = origset;
7196	}
7197	else if (origset)
7198	iv_ca_free (ivs: &origset);
7199
7200	for (i = `0`; i < data->vgroups.length (); i++)
7201	{
7202	struct iv_group *group = data->vgroups [i];
7203	group->selected = iv_ca_cand_for_group (ivs: set, group)->cand;
7204	}
7205
7206	return set;
7207	}
7208
7209	/ Creates a new induction variable corresponding to CAND. /
7210
7211	static void
7212	create_new_iv (struct ivopts_data data, struct* iv_cand *cand)
7213	{
7214	gimple_stmt_iterator incr_pos;
7215	tree base;
7216	struct iv_use *use;
7217	struct iv_group *group;
7218	bool after = false;
7219
7220	gcc_assert (cand->iv != NULL);
7221
7222	switch (cand->pos)
7223	{
7224	case IP_NORMAL:
7225	incr_pos = gsi_last_bb (bb: ip_normal_pos (data->current_loop));
7226	break;
7227
7228	case IP_END:
7229	incr_pos = gsi_last_bb (bb: ip_end_pos (data->current_loop));
7230	after = true;
7231	gcc_assert (gsi_end_p (incr_pos) \|\| !stmt_ends_bb_p (*incr_pos));
7232	break;
7233
7234	case IP_AFTER_USE:
7235	after = true;
7236	/ fall through /
7237	case IP_BEFORE_USE:
7238	incr_pos = gsi_for_stmt (cand->incremented_at);
7239	break;
7240
7241	case IP_ORIGINAL:
7242	/ Mark that the iv is preserved. /
7243	name_info (data, name: cand->var_before)->preserve_biv = true;
7244	name_info (data, name: cand->var_after)->preserve_biv = true;
7245
7246	/ Rewrite the increment so that it uses var_before directly. /
7247	use = find_interesting_uses_op (data, op: cand->var_after);
7248	group = data->vgroups [use->group_id];
7249	group->selected = cand;
7250	return;
7251	}
7252
7253	gimple_add_tmp_var (cand->var_before);
7254
7255	base = unshare_expr (cand->iv->base);
7256
7257	/ The step computation could invoke UB when the loop does not iterate.*
7258	Avoid inserting it on the preheader in its native form but rewrite
7259	it to a well-defined form. This also helps masking SCEV issues
7260	which freely re-associates the IV computations when building up
7261	CHRECs without much regard for signed overflow invoking UB. /*
7262	gimple_seq stmts = NULL;
7263	tree step = force_gimple_operand (unshare_expr (cand->iv->step), &stmts,
7264	true, NULL_TREE);
7265	if (stmts)
7266	{
7267	for (auto gsi = gsi_start (seq&: stmts); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
7268	if (gimple_needing_rewrite_undefined (gsi_stmt (i: gsi)))
7269	rewrite_to_defined_unconditional (&gsi);
7270	gsi_insert_seq_on_edge_immediate
7271	(loop_preheader_edge (data->current_loop), stmts);
7272	}
7273
7274	create_iv (base, PLUS_EXPR, step,
7275	cand->var_before, data->current_loop,
7276	&incr_pos, after, &cand->var_before, &cand->var_after);
7277	}
7278
7279	/ Creates new induction variables described in SET. /
7280
7281	static void
7282	create_new_ivs (struct ivopts_data data, class* iv_ca *set)
7283	{
7284	unsigned i;
7285	struct iv_cand *cand;
7286	bitmap_iterator bi;
7287
7288	EXECUTE_IF_SET_IN_BITMAP (set->cands, `0`, i, bi)
7289	{
7290	cand = data->vcands [i];
7291	create_new_iv (data, cand);
7292	}
7293
7294	if (dump_file && (dump_flags & TDF_DETAILS))
7295	{
7296	fprintf (stream: dump_file, format: "Selected IV set for loop %d",
7297	data->current_loop->num);
7298	if (data->loop_loc != UNKNOWN_LOCATION)
7299	fprintf (stream: dump_file, format: " at %s:%d", LOCATION_FILE (data->loop_loc),
7300	LOCATION_LINE (data->loop_loc));
7301	fprintf (stream: dump_file, format: ", " HOST_WIDE_INT_PRINT_UNSIGNED " avg niters",
7302	avg_loop_niter (loop: data->current_loop));
7303	fprintf (stream: dump_file, format: ", %lu IVs:\n", bitmap_count_bits (set->cands));
7304	EXECUTE_IF_SET_IN_BITMAP (set->cands, `0`, i, bi)
7305	{
7306	cand = data->vcands [i];
7307	dump_cand (file: dump_file, cand);
7308	}
7309	fprintf (stream: dump_file, format: "\n");
7310	}
7311	}
7312
7313	/ Rewrites USE (definition of iv used in a nonlinear expression)*
7314	using candidate CAND. /*
7315
7316	static void
7317	rewrite_use_nonlinear_expr (struct ivopts_data *data,
7318	struct iv_use use, struct* iv_cand *cand)
7319	{
7320	gassign *ass;
7321	gimple_stmt_iterator bsi;
7322	tree comp, type = get_use_type (use), tgt;
7323
7324	/ An important special case -- if we are asked to express value of*
7325	the original iv by itself, just exit; there is no need to
7326	introduce a new computation (that might also need casting the
7327	variable to unsigned and back). /*
7328	if (cand->pos == IP_ORIGINAL
7329	&& cand->incremented_at == use->stmt)
7330	{
7331	tree op = NULL_TREE;
7332	enum tree_code stmt_code;
7333
7334	gcc_assert (is_gimple_assign (use->stmt));
7335	gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7336
7337	/ Check whether we may leave the computation unchanged.*
7338	This is the case only if it does not rely on other
7339	computations in the loop -- otherwise, the computation
7340	we rely upon may be removed in remove_unused_ivs,
7341	thus leading to ICE. /*
7342	stmt_code = gimple_assign_rhs_code (gs: use->stmt);
7343	if (stmt_code == PLUS_EXPR
7344	\|\| stmt_code == MINUS_EXPR
7345	\|\| stmt_code == POINTER_PLUS_EXPR)
7346	{
7347	if (gimple_assign_rhs1 (gs: use->stmt) == cand->var_before)
7348	op = gimple_assign_rhs2 (gs: use->stmt);
7349	else if (gimple_assign_rhs2 (gs: use->stmt) == cand->var_before)
7350	op = gimple_assign_rhs1 (gs: use->stmt);
7351	}
7352
7353	if (op != NULL_TREE)
7354	{
7355	if (expr_invariant_in_loop_p (loop: data->current_loop, expr: op))
7356	return;
7357	if (TREE_CODE (op) == SSA_NAME)
7358	{
7359	struct iv *iv = get_iv (data, var: op);
7360	if (iv != NULL && integer_zerop (iv->step))
7361	return;
7362	}
7363	}
7364	}
7365
7366	switch (gimple_code (g: use->stmt))
7367	{
7368	case GIMPLE_PHI:
7369	tgt = PHI_RESULT (use->stmt);
7370
7371	/ If we should keep the biv, do not replace it. /
7372	if (name_info (data, name: tgt)->preserve_biv)
7373	return;
7374
7375	bsi = gsi_after_labels (bb: gimple_bb (g: use->stmt));
7376	break;
7377
7378	case GIMPLE_ASSIGN:
7379	tgt = gimple_assign_lhs (gs: use->stmt);
7380	bsi = gsi_for_stmt (use->stmt);
7381	break;
7382
7383	default:
7384	gcc_unreachable ();
7385	}
7386
7387	aff_tree aff_inv, aff_var;
7388	if (!get_computation_aff_1 (data, at: use->stmt, use, cand, aff_inv: &aff_inv, aff_var: &aff_var))
7389	gcc_unreachable ();
7390
7391	unshare_aff_combination (&aff_inv);
7392	unshare_aff_combination (&aff_var);
7393	/ Prefer CSE opportunity than loop invariant by adding offset at last*
7394	so that iv_uses have different offsets can be CSEed. /*
7395	poly_widest_int offset = aff_inv.offset;
7396	aff_inv.offset = `0`;
7397
7398	gimple_seq stmt_list = NULL, seq = NULL;
7399	tree comp_op1 = aff_combination_to_tree (&aff_inv);
7400	tree comp_op2 = aff_combination_to_tree (&aff_var);
7401	gcc_assert (comp_op1 && comp_op2);
7402
7403	comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7404	gimple_seq_add_seq (&stmt_list, seq);
7405	comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7406	gimple_seq_add_seq (&stmt_list, seq);
7407
7408	if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7409	std::swap (a&: comp_op1, b&: comp_op2);
7410
7411	if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7412	{
7413	comp = fold_build_pointer_plus (comp_op1,
7414	fold_convert (sizetype, comp_op2));
7415	comp = fold_build_pointer_plus (comp,
7416	wide_int_to_tree (sizetype, offset));
7417	}
7418	else
7419	{
7420	comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7421	fold_convert (TREE_TYPE (comp_op1), comp_op2));
7422	comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7423	wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7424	}
7425
7426	comp = fold_convert (type, comp);
7427	comp = force_gimple_operand (comp, &seq, false, NULL);
7428	gimple_seq_add_seq (&stmt_list, seq);
7429	if (gimple_code (g: use->stmt) != GIMPLE_PHI
7430	/ We can't allow re-allocating the stmt as it might be pointed*
7431	to still. /*
7432	&& (get_gimple_rhs_num_ops (TREE_CODE (comp))
7433	>= gimple_num_ops (gs: gsi_stmt (i: bsi))))
7434	{
7435	comp = force_gimple_operand (comp, &seq, true, NULL);
7436	gimple_seq_add_seq (&stmt_list, seq);
7437	if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7438	{
7439	duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7440	/ As this isn't a plain copy we have to reset alignment*
7441	information. /*
7442	if (SSA_NAME_PTR_INFO (comp))
7443	mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7444	}
7445	}
7446
7447	gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7448	if (gimple_code (g: use->stmt) == GIMPLE_PHI)
7449	{
7450	ass = gimple_build_assign (tgt, comp);
7451	gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7452
7453	bsi = gsi_for_stmt (use->stmt);
7454	remove_phi_node (&bsi, false);
7455	}
7456	else
7457	{
7458	gimple_assign_set_rhs_from_tree (&bsi, comp);
7459	use->stmt = gsi_stmt (i: bsi);
7460	}
7461	}
7462
7463	/ Performs a peephole optimization to reorder the iv update statement with*
7464	a mem ref to enable instruction combining in later phases. The mem ref uses
7465	the iv value before the update, so the reordering transformation requires
7466	adjustment of the offset. CAND is the selected IV_CAND.
7467
7468	Example:
7469
7470	t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7471	iv2 = iv1 + 1;
7472
7473	if (t < val) (1)
7474	goto L;
7475	goto Head;
7476
7477
7478	directly propagating t over to (1) will introduce overlapping live range
7479	thus increase register pressure. This peephole transform it into:
7480
7481
7482	iv2 = iv1 + 1;
7483	t = MEM_REF (base, iv2, 8, 8);
7484	if (t < val)
7485	goto L;
7486	goto Head;
7487	*/
7488
7489	static void
7490	adjust_iv_update_pos (struct iv_cand cand, struct* iv_use *use)
7491	{
7492	tree var_after;
7493	gimple iv_update, stmt;
7494	basic_block bb;
7495	gimple_stmt_iterator gsi, gsi_iv;
7496
7497	if (cand->pos != IP_NORMAL)
7498	return;
7499
7500	var_after = cand->var_after;
7501	iv_update = SSA_NAME_DEF_STMT (var_after);
7502
7503	bb = gimple_bb (g: iv_update);
7504	gsi = gsi_last_nondebug_bb (bb);
7505	stmt = gsi_stmt (i: gsi);
7506
7507	/ Only handle conditional statement for now. /
7508	if (gimple_code (g: stmt) != GIMPLE_COND)
7509	return;
7510
7511	gsi_prev_nondebug (i: &gsi);
7512	stmt = gsi_stmt (i: gsi);
7513	if (stmt != iv_update)
7514	return;
7515
7516	gsi_prev_nondebug (i: &gsi);
7517	if (gsi_end_p (i: gsi))
7518	return;
7519
7520	stmt = gsi_stmt (i: gsi);
7521	if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
7522	return;
7523
7524	if (stmt != use->stmt)
7525	return;
7526
7527	if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7528	return;
7529
7530	if (dump_file && (dump_flags & TDF_DETAILS))
7531	{
7532	fprintf (stream: dump_file, format: "Reordering \n");
7533	print_gimple_stmt (dump_file, iv_update, `0`);
7534	print_gimple_stmt (dump_file, use->stmt, `0`);
7535	fprintf (stream: dump_file, format: "\n");
7536	}
7537
7538	gsi = gsi_for_stmt (use->stmt);
7539	gsi_iv = gsi_for_stmt (iv_update);
7540	gsi_move_before (&gsi_iv, &gsi);
7541
7542	cand->pos = IP_BEFORE_USE;
7543	cand->incremented_at = use->stmt;
7544	}
7545
7546	/ Return the alias pointer type that should be used for a MEM_REF*
7547	associated with USE, which has type USE_PTR_ADDRESS. /*
7548
7549	static tree
7550	get_alias_ptr_type_for_ptr_address (iv_use *use)
7551	{
7552	gcall call = as_a <gcall > (p: use->stmt);
7553	switch (gimple_call_internal_fn (gs: call))
7554	{
7555	case IFN_MASK_LOAD:
7556	case IFN_MASK_STORE:
7557	case IFN_MASK_LOAD_LANES:
7558	case IFN_MASK_STORE_LANES:
7559	case IFN_MASK_LEN_LOAD_LANES:
7560	case IFN_MASK_LEN_STORE_LANES:
7561	case IFN_LEN_LOAD:
7562	case IFN_LEN_STORE:
7563	case IFN_MASK_LEN_LOAD:
7564	case IFN_MASK_LEN_STORE:
7565	/ The second argument contains the correct alias type. /
7566	gcc_assert (use->op_p == gimple_call_arg_ptr (call, `0`));
7567	return TREE_TYPE (gimple_call_arg (call, `1`));
7568
7569	default:
7570	gcc_unreachable ();
7571	}
7572	}
7573
7574
7575	/ Rewrites USE (address that is an iv) using candidate CAND. /
7576
7577	static void
7578	rewrite_use_address (struct ivopts_data *data,
7579	struct iv_use use, struct* iv_cand *cand)
7580	{
7581	aff_tree aff;
7582	bool ok;
7583
7584	adjust_iv_update_pos (cand, use);
7585	ok = get_computation_aff (data, at: use->stmt, use, cand, aff: &aff);
7586	gcc_assert (ok);
7587	unshare_aff_combination (&aff);
7588
7589	/ To avoid undefined overflow problems, all IV candidates use unsigned*
7590	integer types. The drawback is that this makes it impossible for
7591	create_mem_ref to distinguish an IV that is based on a memory object
7592	from one that represents simply an offset.
7593
7594	To work around this problem, we pass a hint to create_mem_ref that
7595	indicates which variable (if any) in aff is an IV based on a memory
7596	object. Note that we only consider the candidate. If this is not
7597	based on an object, the base of the reference is in some subexpression
7598	of the use -- but these will use pointer types, so they are recognized
7599	by the create_mem_ref heuristics anyway. /*
7600	tree iv = var_at_stmt (loop: data->current_loop, cand, stmt: use->stmt);
7601	tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7602	gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7603	tree type = use->mem_type;
7604	tree alias_ptr_type;
7605	if (use->type == USE_PTR_ADDRESS)
7606	alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7607	else
7608	{
7609	gcc_assert (type == TREE_TYPE (*use->op_p));
7610	unsigned int align = get_object_alignment (*use->op_p);
7611	if (align != TYPE_ALIGN (type))
7612	type = build_aligned_type (type, align);
7613	alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7614	}
7615	tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7616	iv, base_hint, data->speed);
7617
7618	if (use->type == USE_PTR_ADDRESS)
7619	{
7620	ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7621	ref = fold_convert (get_use_type (use), ref);
7622	ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7623	true, GSI_SAME_STMT);
7624	}
7625	else
7626	{
7627	/ When we end up confused enough and have no suitable base but*
7628	stuffed everything to index2 use a LEA for the address and
7629	create a plain MEM_REF to avoid basing a memory reference
7630	on address zero which create_mem_ref_raw does as fallback. /*
7631	if (TREE_CODE (ref) == TARGET_MEM_REF
7632	&& TMR_INDEX2 (ref) != NULL_TREE
7633	&& integer_zerop (TREE_OPERAND (ref, `0`)))
7634	{
7635	ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, `0`)), ref);
7636	ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7637	true, GSI_SAME_STMT);
7638	ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7639	}
7640	copy_ref_info (ref, *use->op_p);
7641	}
7642
7643	*use->op_p = ref;
7644	}
7645
7646	/ Rewrites USE (the condition such that one of the arguments is an iv) using*
7647	candidate CAND. /*
7648
7649	static void
7650	rewrite_use_compare (struct ivopts_data *data,
7651	struct iv_use use, struct* iv_cand *cand)
7652	{
7653	tree comp, op, bound;
7654	gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7655	enum tree_code compare;
7656	struct iv_group *group = data->vgroups [use->group_id];
7657	class cost_pair *cp = get_group_iv_cost (data, group, cand);
7658
7659	bound = cp->value;
7660	if (bound)
7661	{
7662	tree var = var_at_stmt (loop: data->current_loop, cand, stmt: use->stmt);
7663	tree var_type = TREE_TYPE (var);
7664	gimple_seq stmts;
7665
7666	if (dump_file && (dump_flags & TDF_DETAILS))
7667	{
7668	fprintf (stream: dump_file, format: "Replacing exit test: ");
7669	print_gimple_stmt (dump_file, use->stmt, `0`, TDF_SLIM);
7670	}
7671	compare = cp->comp;
7672	bound = unshare_expr (fold_convert (var_type, bound));
7673	op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7674	if (stmts)
7675	gsi_insert_seq_on_edge_immediate (
7676	loop_preheader_edge (data->current_loop),
7677	stmts);
7678
7679	gcond cond_stmt = as_a <gcond > (p: use->stmt);
7680	gimple_cond_set_lhs (gs: cond_stmt, lhs: var);
7681	gimple_cond_set_code (gs: cond_stmt, code: compare);
7682	gimple_cond_set_rhs (gs: cond_stmt, rhs: op);
7683	return;
7684	}
7685
7686	/ The induction variable elimination failed; just express the original*
7687	giv. /*
7688	comp = get_computation_at (data, at: use->stmt, use, cand);
7689	gcc_assert (comp != NULL_TREE);
7690	gcc_assert (use->op_p != NULL);
7691	use->op_p = force_gimple_operand_gsi (&bsi, comp, true*,
7692	SSA_NAME_VAR (*use->op_p),
7693	true, GSI_SAME_STMT);
7694	}
7695
7696	/ Rewrite the groups using the selected induction variables. /
7697
7698	static void
7699	rewrite_groups (struct ivopts_data *data)
7700	{
7701	unsigned i, j;
7702
7703	for (i = `0`; i < data->vgroups.length (); i++)
7704	{
7705	struct iv_group *group = data->vgroups [i];
7706	struct iv_cand *cand = group->selected;
7707
7708	gcc_assert (cand);
7709
7710	if (group->type == USE_NONLINEAR_EXPR)
7711	{
7712	for (j = `0`; j < group->vuses.length (); j++)
7713	{
7714	rewrite_use_nonlinear_expr (data, use: group->vuses [j], cand);
7715	update_stmt (s: group->vuses [j]->stmt);
7716	}
7717	}
7718	else if (address_p (type: group->type))
7719	{
7720	for (j = `0`; j < group->vuses.length (); j++)
7721	{
7722	rewrite_use_address (data, use: group->vuses [j], cand);
7723	update_stmt (s: group->vuses [j]->stmt);
7724	}
7725	}
7726	else
7727	{
7728	gcc_assert (group->type == USE_COMPARE);
7729
7730	for (j = `0`; j < group->vuses.length (); j++)
7731	{
7732	rewrite_use_compare (data, use: group->vuses [j], cand);
7733	update_stmt (s: group->vuses [j]->stmt);
7734	}
7735	}
7736	}
7737	}
7738
7739	/ Removes the ivs that are not used after rewriting. /
7740
7741	static void
7742	remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7743	{
7744	unsigned j;
7745	bitmap_iterator bi;
7746
7747	/ Figure out an order in which to release SSA DEFs so that we don't*
7748	release something that we'd have to propagate into a debug stmt
7749	afterwards. /*
7750	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, j, bi)
7751	{
7752	struct version_info *info;
7753
7754	info = ver_info (data, ver: j);
7755	if (info->iv
7756	&& !integer_zerop (info->iv->step)
7757	&& !info->inv_id
7758	&& !info->iv->nonlin_use
7759	&& !info->preserve_biv)
7760	{
7761	bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7762
7763	tree def = info->iv->ssa_name;
7764
7765	if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7766	{
7767	imm_use_iterator imm_iter;
7768	use_operand_p use_p;
7769	gimple *stmt;
7770	int count = `0`;
7771
7772	FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7773	{
7774	if (!gimple_debug_bind_p (s: stmt))
7775	continue;
7776
7777	/ We just want to determine whether to do nothing*
7778	(count == 0), to substitute the computed
7779	expression into a single use of the SSA DEF by
7780	itself (count == 1), or to use a debug temp
7781	because the SSA DEF is used multiple times or as
7782	part of a larger expression (count > 1). /*
7783	count++;
7784	if (gimple_debug_bind_get_value (dbg: stmt) != def)
7785	count++;
7786
7787	if (count > `1`)
7788	break;
7789	}
7790
7791	if (!count)
7792	continue;
7793
7794	struct iv_use dummy_use;
7795	struct iv_cand best_cand = NULL, cand;
7796	unsigned i, best_pref = `0`, cand_pref;
7797	tree comp = NULL_TREE;
7798
7799	memset (s: &dummy_use, c: `0`, n: sizeof (dummy_use));
7800	dummy_use.iv = info->iv;
7801	for (i = `0`; i < data->vgroups.length () && i < `64`; i++)
7802	{
7803	cand = data->vgroups [i]->selected;
7804	if (cand == best_cand)
7805	continue;
7806	cand_pref = operand_equal_p (cand->iv->step,
7807	info->iv->step, flags: `0`)
7808	? `4` : `0`;
7809	cand_pref
7810	+= TYPE_MODE (TREE_TYPE (cand->iv->base))
7811	== TYPE_MODE (TREE_TYPE (info->iv->base))
7812	? `2` : `0`;
7813	cand_pref
7814	+= TREE_CODE (cand->iv->base) == INTEGER_CST
7815	? `1` : `0`;
7816	if (best_cand == NULL \|\| best_pref < cand_pref)
7817	{
7818	tree this_comp
7819	= get_debug_computation_at (data,
7820	SSA_NAME_DEF_STMT (def),
7821	use: &dummy_use, cand);
7822	if (this_comp)
7823	{
7824	best_cand = cand;
7825	best_pref = cand_pref;
7826	comp = this_comp;
7827	}
7828	}
7829	}
7830
7831	if (!best_cand)
7832	continue;
7833
7834	comp = unshare_expr (comp);
7835	if (count > `1`)
7836	{
7837	tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7838	/ FIXME: Is setting the mode really necessary? /
7839	if (SSA_NAME_VAR (def))
7840	SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7841	else
7842	SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7843	gdebug *def_temp
7844	= gimple_build_debug_bind (vexpr, comp, NULL);
7845	gimple_stmt_iterator gsi;
7846
7847	if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7848	gsi = gsi_after_labels (bb: gimple_bb
7849	(SSA_NAME_DEF_STMT (def)));
7850	else
7851	gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7852
7853	gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7854	comp = vexpr;
7855	}
7856
7857	FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7858	{
7859	if (!gimple_debug_bind_p (s: stmt))
7860	continue;
7861
7862	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7863	SET_USE (use_p, comp);
7864
7865	update_stmt (s: stmt);
7866	}
7867	}
7868	}
7869	}
7870	}
7871
7872	/ Frees memory occupied by class tree_niter_desc in VALUE. Callback
7873	for hash_map::traverse. /*
7874
7875	bool
7876	free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7877	{
7878	if (value)
7879	{
7880	value->~tree_niter_desc ();
7881	free (ptr: value);
7882	}
7883	return true;
7884	}
7885
7886	/ Frees data allocated by the optimization of a single loop. /
7887
7888	static void
7889	free_loop_data (struct ivopts_data *data)
7890	{
7891	unsigned i, j;
7892	bitmap_iterator bi;
7893	tree obj;
7894
7895	if (data->niters)
7896	{
7897	data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7898	delete data->niters;
7899	data->niters = NULL;
7900	}
7901
7902	EXECUTE_IF_SET_IN_BITMAP (data->relevant, `0`, i, bi)
7903	{
7904	struct version_info *info;
7905
7906	info = ver_info (data, ver: i);
7907	info->iv = NULL;
7908	info->has_nonlin_use = false;
7909	info->preserve_biv = false;
7910	info->inv_id = `0`;
7911	}
7912	bitmap_clear (data->relevant);
7913	bitmap_clear (data->important_candidates);
7914
7915	for (i = `0`; i < data->vgroups.length (); i++)
7916	{
7917	struct iv_group *group = data->vgroups [i];
7918
7919	for (j = `0`; j < group->vuses.length (); j++)
7920	free (ptr: group->vuses [j]);
7921	group->vuses.release ();
7922
7923	BITMAP_FREE (group->related_cands);
7924	for (j = `0`; j < group->n_map_members; j++)
7925	{
7926	if (group->cost_map[j].inv_vars)
7927	BITMAP_FREE (group->cost_map[j].inv_vars);
7928	if (group->cost_map[j].inv_exprs)
7929	BITMAP_FREE (group->cost_map[j].inv_exprs);
7930	}
7931
7932	free (ptr: group->cost_map);
7933	free (ptr: group);
7934	}
7935	data->vgroups.truncate (size: `0`);
7936
7937	for (i = `0`; i < data->vcands.length (); i++)
7938	{
7939	struct iv_cand *cand = data->vcands [i];
7940
7941	if (cand->inv_vars)
7942	BITMAP_FREE (cand->inv_vars);
7943	if (cand->inv_exprs)
7944	BITMAP_FREE (cand->inv_exprs);
7945	free (ptr: cand);
7946	}
7947	data->vcands.truncate (size: `0`);
7948
7949	if (data->version_info_size < num_ssa_names)
7950	{
7951	data->version_info_size = `2` * num_ssa_names;
7952	free (ptr: data->version_info);
7953	data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7954	}
7955
7956	data->max_inv_var_id = `0`;
7957	data->max_inv_expr_id = `0`;
7958
7959	FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7960	SET_DECL_RTL (obj, NULL_RTX);
7961
7962	decl_rtl_to_reset.truncate (size: `0`);
7963
7964	data->inv_expr_tab->empty ();
7965
7966	data->iv_common_cand_tab->empty ();
7967	data->iv_common_cands.truncate (size: `0`);
7968	}
7969
7970	/ Finalizes data structures used by the iv optimization pass. LOOPS is the*
7971	loop tree. /*
7972
7973	static void
7974	tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7975	{
7976	free_loop_data (data);
7977	free (ptr: data->version_info);
7978	BITMAP_FREE (data->relevant);
7979	BITMAP_FREE (data->important_candidates);
7980
7981	decl_rtl_to_reset.release ();
7982	data->vgroups.release ();
7983	data->vcands.release ();
7984	delete data->inv_expr_tab;
7985	data->inv_expr_tab = NULL;
7986	free_affine_expand_cache (&data->name_expansion_cache);
7987	if (data->base_object_map)
7988	delete data->base_object_map;
7989	delete data->iv_common_cand_tab;
7990	data->iv_common_cand_tab = NULL;
7991	data->iv_common_cands.release ();
7992	obstack_free (&data->iv_obstack, NULL);
7993	}
7994
7995	/ Returns true if the loop body BODY includes any function calls. /
7996
7997	static bool
7998	loop_body_includes_call (basic_block body, unsigned* num_nodes)
7999	{
8000	gimple_stmt_iterator gsi;
8001	unsigned i;
8002
8003	for (i = `0`; i < num_nodes; i++)
8004	for (gsi = gsi_start_bb (bb: body[i]); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
8005	{
8006	gimple *stmt = gsi_stmt (i: gsi);
8007	if (is_gimple_call (gs: stmt)
8008	&& !gimple_call_internal_p (gs: stmt)
8009	&& !is_inexpensive_builtin (gimple_call_fndecl (gs: stmt)))
8010	return true;
8011	}
8012	return false;
8013	}
8014
8015	/ Determine cost scaling factor for basic blocks in loop. /
8016	#define COST_SCALING_FACTOR_BOUND (20)
8017
8018	static void
8019	determine_scaling_factor (struct ivopts_data data, basic_block body)
8020	{
8021	int lfreq = data->current_loop->header->count.to_frequency (cfun);
8022	if (!data->speed \|\| lfreq <= `0`)
8023	return;
8024
8025	int max_freq = lfreq;
8026	for (unsigned i = `0`; i < data->current_loop->num_nodes; i++)
8027	{
8028	body[i]->aux = (void *)(intptr_t) `1`;
8029	if (max_freq < body[i]->count.to_frequency (cfun))
8030	max_freq = body[i]->count.to_frequency (cfun);
8031	}
8032	if (max_freq > lfreq)
8033	{
8034	int divisor, factor;
8035	/ Check if scaling factor itself needs to be scaled by the bound. This*
8036	is to avoid overflow when scaling cost according to profile info. /*
8037	if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8038	{
8039	divisor = max_freq;
8040	factor = COST_SCALING_FACTOR_BOUND;
8041	}
8042	else
8043	{
8044	divisor = lfreq;
8045	factor = `1`;
8046	}
8047	for (unsigned i = `0`; i < data->current_loop->num_nodes; i++)
8048	{
8049	int bfreq = body[i]->count.to_frequency (cfun);
8050	if (bfreq <= lfreq)
8051	continue;
8052
8053	body[i]->aux = (void)(intptr_t) (factor bfreq / divisor);
8054	}
8055	}
8056	}
8057
8058	/ Find doloop comparison use and set its doloop_p on if found. /
8059
8060	static bool
8061	find_doloop_use (struct ivopts_data *data)
8062	{
8063	struct loop *loop = data->current_loop;
8064
8065	for (unsigned i = `0`; i < data->vgroups.length (); i++)
8066	{
8067	struct iv_group *group = data->vgroups [i];
8068	if (group->type == USE_COMPARE)
8069	{
8070	gcc_assert (group->vuses.length () == `1`);
8071	struct iv_use *use = group->vuses [`0`];
8072	gimple *stmt = use->stmt;
8073	if (gimple_code (g: stmt) == GIMPLE_COND)
8074	{
8075	basic_block bb = gimple_bb (g: stmt);
8076	edge true_edge, false_edge;
8077	extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8078	/ This comparison is used for loop latch. Require latch is empty*
8079	for now. /*
8080	if ((loop->latch == true_edge->dest
8081	\|\| loop->latch == false_edge->dest)
8082	&& empty_block_p (loop->latch))
8083	{
8084	group->doloop_p = true;
8085	if (dump_file && (dump_flags & TDF_DETAILS))
8086	{
8087	fprintf (stream: dump_file, format: "Doloop cmp iv use: ");
8088	print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8089	}
8090	return true;
8091	}
8092	}
8093	}
8094	}
8095
8096	return false;
8097	}
8098
8099	/ For the targets which support doloop, to predict whether later RTL doloop*
8100	transformation will perform on this loop, further detect the doloop use and
8101	mark the flag doloop_use_p if predicted. /*
8102
8103	void
8104	analyze_and_mark_doloop_use (struct ivopts_data *data)
8105	{
8106	data->doloop_use_p = false;
8107
8108	if (!flag_branch_on_count_reg)
8109	return;
8110
8111	if (data->current_loop->unroll == USHRT_MAX)
8112	return;
8113
8114	if (!generic_predict_doloop_p (data))
8115	return;
8116
8117	if (find_doloop_use (data))
8118	{
8119	data->doloop_use_p = true;
8120	if (dump_file && (dump_flags & TDF_DETAILS))
8121	{
8122	struct loop *loop = data->current_loop;
8123	fprintf (stream: dump_file,
8124	format: "Predict loop %d can perform"
8125	" doloop optimization later.\n",
8126	loop->num);
8127	flow_loop_dump (loop, dump_file, NULL, `1`);
8128	}
8129	}
8130	}
8131
8132	/ Optimizes the LOOP. Returns true if anything changed. /
8133
8134	static bool
8135	tree_ssa_iv_optimize_loop (struct ivopts_data data, class* loop *loop,
8136	bitmap toremove)
8137	{
8138	bool changed = false;
8139	class iv_ca *iv_ca;
8140	edge exit = single_dom_exit (loop);
8141	basic_block *body;
8142
8143	gcc_assert (!data->niters);
8144	data->current_loop = loop;
8145	data->loop_loc = find_loop_location (loop).get_location_t ();
8146	data->speed = optimize_loop_for_speed_p (loop);
8147
8148	if (dump_file && (dump_flags & TDF_DETAILS))
8149	{
8150	fprintf (stream: dump_file, format: "Processing loop %d", loop->num);
8151	if (data->loop_loc != UNKNOWN_LOCATION)
8152	fprintf (stream: dump_file, format: " at %s:%d", LOCATION_FILE (data->loop_loc),
8153	LOCATION_LINE (data->loop_loc));
8154	fprintf (stream: dump_file, format: "\n");
8155
8156	if (exit)
8157	{
8158	fprintf (stream: dump_file, format: " single exit %d -> %d, exit condition ",
8159	exit->src->index, exit->dest->index);
8160	print_gimple_stmt (dump_file, *gsi_last_bb (bb: exit->src),
8161	`0`, TDF_SLIM);
8162	fprintf (stream: dump_file, format: "\n");
8163	}
8164
8165	fprintf (stream: dump_file, format: "\n");
8166	}
8167
8168	body = get_loop_body (loop);
8169	data->body_includes_call = loop_body_includes_call (body, num_nodes: loop->num_nodes);
8170	renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8171
8172	data->loop_single_exit_p
8173	= exit != NULL && loop_only_exit_p (loop, body, exit);
8174
8175	/ For each ssa name determines whether it behaves as an induction variable*
8176	in some loop. /*
8177	if (!find_induction_variables (data, body))
8178	goto finish;
8179
8180	/ Finds interesting uses (item 1). /
8181	find_interesting_uses (data, body);
8182	if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8183	goto finish;
8184
8185	/ Determine cost scaling factor for basic blocks in loop. /
8186	determine_scaling_factor (data, body);
8187
8188	/ Analyze doloop possibility and mark the doloop use if predicted. /
8189	analyze_and_mark_doloop_use (data);
8190
8191	/ Finds candidates for the induction variables (item 2). /
8192	find_iv_candidates (data);
8193
8194	/ Calculates the costs (item 3, part 1). /
8195	determine_iv_costs (data);
8196	determine_group_iv_costs (data);
8197	determine_set_costs (data);
8198
8199	/ Find the optimal set of induction variables (item 3, part 2). /
8200	iv_ca = find_optimal_iv_set (data);
8201	/ Cleanup basic block aux field. /
8202	for (unsigned i = `0`; i < data->current_loop->num_nodes; i++)
8203	body[i]->aux = NULL;
8204	if (!iv_ca)
8205	goto finish;
8206	changed = true;
8207
8208	/ Create the new induction variables (item 4, part 1). /
8209	create_new_ivs (data, set: iv_ca);
8210	iv_ca_free (ivs: &iv_ca);
8211
8212	/ Rewrite the uses (item 4, part 2). /
8213	rewrite_groups (data);
8214
8215	/ Remove the ivs that are unused after rewriting. /
8216	remove_unused_ivs (data, toremove);
8217
8218	finish:
8219	free (ptr: body);
8220	free_loop_data (data);
8221
8222	return changed;
8223	}
8224
8225	/ Main entry point. Optimizes induction variables in loops. /
8226
8227	void
8228	tree_ssa_iv_optimize (void)
8229	{
8230	struct ivopts_data data;
8231	auto_bitmap toremove;
8232
8233	tree_ssa_iv_optimize_init (data: &data);
8234	mark_ssa_maybe_undefs ();
8235
8236	/ Optimize the loops starting with the innermost ones. /
8237	for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8238	{
8239	if (!dbg_cnt (index: ivopts_loop))
8240	continue;
8241
8242	if (dump_file && (dump_flags & TDF_DETAILS))
8243	flow_loop_dump (loop, dump_file, NULL, `1`);
8244
8245	tree_ssa_iv_optimize_loop (data: &data, loop, toremove);
8246	}
8247
8248	/ Remove eliminated IV defs. /
8249	release_defs_bitset (toremove);
8250
8251	/ We have changed the structure of induction variables; it might happen*
8252	that definitions in the scev database refer to some of them that were
8253	eliminated. /*
8254	scev_reset_htab ();
8255	/ Likewise niter and control-IV information. /
8256	free_numbers_of_iterations_estimates (cfun);
8257
8258	tree_ssa_iv_optimize_finalize (data: &data);
8259	}
8260
8261	#include "gt-tree-ssa-loop-ivopts.h"
8262

source code of gcc/tree-ssa-loop-ivopts.cc