tree-vect-loop-manip.cc source code [gcc/tree-vect-loop-manip.cc]

1	/ Vectorizer Specific Loop Manipulations*
2	Copyright (C) 2003-2026 Free Software Foundation, Inc.
3	Contributed by Dorit Naishlos <dorit@il.ibm.com>
4	and Ira Rosen <irar@il.ibm.com>
5
6	This file is part of GCC.
7
8	GCC is free software; you can redistribute it and/or modify it under
9	the terms of the GNU General Public License as published by the Free
10	Software Foundation; either version 3, or (at your option) any later
11	version.
12
13	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14	WARRANTY; without even the implied warranty of MERCHANTABILITY or
15	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16	for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GCC; see the file COPYING3. If not see
20	<http://www.gnu.org/licenses/>. /*
21
22	#include "config.h"
23	#include "system.h"
24	#include "coretypes.h"
25	#include "backend.h"
26	#include "tree.h"
27	#include "gimple.h"
28	#include "cfghooks.h"
29	#include "tree-pass.h"
30	#include "ssa.h"
31	#include "fold-const.h"
32	#include "cfganal.h"
33	#include "gimplify.h"
34	#include "gimple-iterator.h"
35	#include "gimplify-me.h"
36	#include "tree-cfg.h"
37	#include "tree-ssa-loop-manip.h"
38	#include "tree-into-ssa.h"
39	#include "tree-ssa.h"
40	#include "cfgloop.h"
41	#include "tree-scalar-evolution.h"
42	#include "tree-vectorizer.h"
43	#include "tree-ssa-loop-ivopts.h"
44	#include "gimple-fold.h"
45	#include "tree-ssa-loop-niter.h"
46	#include "internal-fn.h"
47	#include "stor-layout.h"
48	#include "optabs-query.h"
49	#include "vec-perm-indices.h"
50	#include "insn-config.h"
51	#include "rtl.h"
52	#include "recog.h"
53	#include "langhooks.h"
54	#include "tree-vector-builder.h"
55	#include "optabs-tree.h"
56	#include "hierarchical_discriminator.h"
57
58
59	/*************************************************************************
60	Simple Loop Peeling Utilities
61
62	Utilities to support loop peeling for vectorization purposes.
63	*************************************************************************/
64
65
66	/ Renames the use OP_P. /*
67
68	static void
69	rename_use_op (use_operand_p op_p)
70	{
71	tree new_name;
72
73	if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
74	return;
75
76	new_name = get_current_def (USE_FROM_PTR (op_p));
77
78	/ Something defined outside of the loop. /
79	if (!new_name)
80	return;
81
82	/ An ordinary ssa name defined in the loop. /
83
84	SET_USE (op_p, new_name);
85	}
86
87
88	/ Renames the variables in basic block BB. Allow renaming of PHI arguments*
89	on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
90	true. /*
91
92	static void
93	rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
94	{
95	gimple *stmt;
96	use_operand_p use_p;
97	ssa_op_iter iter;
98	edge e;
99	edge_iterator ei;
100	class loop *loop = bb->loop_father;
101	class loop *outer_loop = NULL;
102
103	if (rename_from_outer_loop)
104	{
105	gcc_assert (loop);
106	outer_loop = loop_outer (loop);
107	}
108
109	for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);
110	gsi_next (i: &gsi))
111	{
112	stmt = gsi_stmt (i: gsi);
113	FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
114	rename_use_op (op_p: use_p);
115	}
116
117	FOR_EACH_EDGE (e, ei, bb->preds)
118	{
119	if (!flow_bb_inside_loop_p (loop, e->src))
120	{
121	if (!rename_from_outer_loop)
122	continue;
123	if (e->src != outer_loop->header)
124	{
125	if (outer_loop->inner->next)
126	{
127	/ If outer_loop has 2 inner loops, allow there to*
128	be an extra basic block which decides which of the
129	two loops to use using LOOP_VECTORIZED. /*
130	if (!single_pred_p (bb: e->src)
131	\|\| single_pred (bb: e->src) != outer_loop->header)
132	continue;
133	}
134	}
135	}
136	for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
137	gsi_next (i: &gsi))
138	rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
139	}
140	}
141
142
143	struct adjust_info
144	{
145	tree from, to;
146	basic_block bb;
147	};
148
149	/ A stack of values to be adjusted in debug stmts. We have to*
150	process them LIFO, so that the closest substitution applies. If we
151	processed them FIFO, without the stack, we might substitute uses
152	with a PHI DEF that would soon become non-dominant, and when we got
153	to the suitable one, it wouldn't have anything to substitute any
154	more. /*
155	static vec<adjust_info, va_heap> adjust_vec;
156
157	/ Adjust any debug stmts that referenced AI->from values to use the*
158	loop-closed AI->to, if the references are dominated by AI->bb and
159	not by the definition of AI->from. /*
160
161	static void
162	adjust_debug_stmts_now (adjust_info *ai)
163	{
164	basic_block bbphi = ai->bb;
165	tree orig_def = ai->from;
166	tree new_def = ai->to;
167	imm_use_iterator imm_iter;
168	gimple *stmt;
169	basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
170
171	gcc_assert (dom_info_available_p (CDI_DOMINATORS));
172
173	/ Adjust any debug stmts that held onto non-loop-closed*
174	references. /*
175	FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
176	{
177	use_operand_p use_p;
178	basic_block bbuse;
179
180	if (!is_gimple_debug (gs: stmt))
181	continue;
182
183	gcc_assert (gimple_debug_bind_p (stmt));
184
185	bbuse = gimple_bb (g: stmt);
186
187	if ((bbuse == bbphi
188	\|\| dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
189	&& !(bbuse == bbdef
190	\|\| dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
191	{
192	if (new_def)
193	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
194	SET_USE (use_p, new_def);
195	else
196	{
197	gimple_debug_bind_reset_value (dbg: stmt);
198	update_stmt (s: stmt);
199	}
200	}
201	}
202	}
203
204	/ Adjust debug stmts as scheduled before. /
205
206	static void
207	adjust_vec_debug_stmts (void)
208	{
209	if (!MAY_HAVE_DEBUG_BIND_STMTS)
210	return;
211
212	gcc_assert (adjust_vec.exists ());
213
214	while (!adjust_vec.is_empty ())
215	{
216	adjust_debug_stmts_now (ai: &adjust_vec.last ());
217	adjust_vec.pop ();
218	}
219	}
220
221	/ Adjust any debug stmts that referenced FROM values to use the*
222	loop-closed TO, if the references are dominated by BB and not by
223	the definition of FROM. If adjust_vec is non-NULL, adjustments
224	will be postponed until adjust_vec_debug_stmts is called. /*
225
226	static void
227	adjust_debug_stmts (tree from, tree to, basic_block bb)
228	{
229	adjust_info ai;
230
231	if (MAY_HAVE_DEBUG_BIND_STMTS
232	&& TREE_CODE (from) == SSA_NAME
233	&& ! SSA_NAME_IS_DEFAULT_DEF (from)
234	&& ! virtual_operand_p (op: from))
235	{
236	ai.from = from;
237	ai.to = to;
238	ai.bb = bb;
239
240	if (adjust_vec.exists ())
241	adjust_vec.safe_push (obj: ai);
242	else
243	adjust_debug_stmts_now (ai: &ai);
244	}
245	}
246
247	/ Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information*
248	to adjust any debug stmts that referenced the old phi arg,
249	presumably non-loop-closed references left over from other
250	transformations. /*
251
252	static void
253	adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
254	{
255	tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
256
257	gcc_assert (TREE_CODE (orig_def) != SSA_NAME
258	\|\| orig_def != new_def);
259
260	SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
261
262	if (MAY_HAVE_DEBUG_BIND_STMTS)
263	adjust_debug_stmts (from: orig_def, PHI_RESULT (update_phi),
264	bb: gimple_bb (g: update_phi));
265	}
266
267	/ Define one loop rgroup control CTRL from loop LOOP. INIT_CTRL is the value*
268	that the control should have during the first iteration and NEXT_CTRL is the
269	value that it should have on subsequent iterations. /*
270
271	static void
272	vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
273	tree next_ctrl)
274	{
275	gphi *phi = create_phi_node (ctrl, loop->header);
276	add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
277	add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
278	}
279
280	/ Add SEQ to the end of LOOP's preheader block. /
281
282	static void
283	add_preheader_seq (class loop *loop, gimple_seq seq)
284	{
285	if (seq)
286	{
287	edge pe = loop_preheader_edge (loop);
288	basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
289	gcc_assert (!new_bb);
290	}
291	}
292
293	/ Add SEQ to the beginning of LOOP's header block. /
294
295	static void
296	add_header_seq (class loop *loop, gimple_seq seq)
297	{
298	if (seq)
299	{
300	gimple_stmt_iterator gsi = gsi_after_labels (bb: loop->header);
301	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
302	}
303	}
304
305	/ Return true if the target can interleave elements of two vectors.*
306	OFFSET is 0 if the first half of the vectors should be interleaved
307	or 1 if the second half should. When returning true, store the
308	associated permutation in INDICES. /*
309
310	static bool
311	interleave_supported_p (vec_perm_indices *indices, tree vectype,
312	unsigned int offset)
313	{
314	poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (node: vectype);
315	poly_uint64 base = exact_div (a: nelts, b: `2`) * offset;
316	vec_perm_builder sel (nelts, `2`, `3`);
317	for (unsigned int i = `0`; i < `3`; ++i)
318	{
319	sel.quick_push (obj: base + i);
320	sel.quick_push (obj: base + i + nelts);
321	}
322	indices->new_vector (sel, `2`, nelts);
323	return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
324	*indices);
325	}
326
327	/ Try to use permutes to define the masks in DEST_RGM using the masks*
328	in SRC_RGM, given that the former has twice as many masks as the
329	latter. Return true on success, adding any new statements to SEQ. /*
330
331	static bool
332	vect_maybe_permute_loop_masks (gimple_seq seq, rgroup_controls dest_rgm,
333	rgroup_controls *src_rgm)
334	{
335	tree src_masktype = src_rgm->type;
336	tree dest_masktype = dest_rgm->type;
337	machine_mode src_mode = TYPE_MODE (src_masktype);
338	insn_code icode1, icode2;
339	if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
340	&& (icode1 = optab_handler (op: vec_unpacku_hi_optab,
341	mode: src_mode)) != CODE_FOR_nothing
342	&& (icode2 = optab_handler (op: vec_unpacku_lo_optab,
343	mode: src_mode)) != CODE_FOR_nothing)
344	{
345	/ Unpacking the source masks gives at least as many mask bits as*
346	we need. We can then VIEW_CONVERT any excess bits away. /*
347	machine_mode dest_mode = insn_data[icode1].operand[`0`].mode;
348	gcc_assert (dest_mode == insn_data[icode2].operand[`0`].mode);
349	tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
350	for (unsigned int i = `0`; i < dest_rgm->controls.length (); ++i)
351	{
352	tree src = src_rgm->controls [i / `2`];
353	tree dest = dest_rgm->controls [i];
354	tree_code code = ((i & `1`) == (BYTES_BIG_ENDIAN ? `0` : `1`)
355	? VEC_UNPACK_HI_EXPR
356	: VEC_UNPACK_LO_EXPR);
357	gassign *stmt;
358	if (dest_masktype == unpack_masktype)
359	stmt = gimple_build_assign (dest, code, src);
360	else
361	{
362	tree temp = make_ssa_name (var: unpack_masktype);
363	stmt = gimple_build_assign (temp, code, src);
364	gimple_seq_add_stmt (seq, stmt);
365	stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
366	build1 (VIEW_CONVERT_EXPR,
367	dest_masktype, temp));
368	}
369	gimple_seq_add_stmt (seq, stmt);
370	}
371	return true;
372	}
373	vec_perm_indices indices[`2`];
374	if (dest_masktype == src_masktype
375	&& interleave_supported_p (indices: &indices[`0`], vectype: src_masktype, offset: `0`)
376	&& interleave_supported_p (indices: &indices[`1`], vectype: src_masktype, offset: `1`))
377	{
378	/ The destination requires twice as many mask bits as the source, so*
379	we can use interleaving permutes to double up the number of bits. /*
380	tree masks[`2`];
381	for (unsigned int i = `0`; i < `2`; ++i)
382	masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
383	for (unsigned int i = `0`; i < dest_rgm->controls.length (); ++i)
384	{
385	tree src = src_rgm->controls [i / `2`];
386	tree dest = dest_rgm->controls [i];
387	gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
388	src, src, masks[i & `1`]);
389	gimple_seq_add_stmt (seq, stmt);
390	}
391	return true;
392	}
393	return false;
394	}
395
396	/ Populate DEST_RGM->controls, given that they should add up to STEP.*
397
398	STEP = MIN_EXPR <ivtmp_34, VF>;
399
400	First length (MIN (X, VF/N)):
401	loop_len_15 = MIN_EXPR <STEP, VF/N>;
402
403	Second length:
404	tmp = STEP - loop_len_15;
405	loop_len_16 = MIN (tmp, VF/N);
406
407	Third length:
408	tmp2 = tmp - loop_len_16;
409	loop_len_17 = MIN (tmp2, VF/N);
410
411	Last length:
412	loop_len_18 = tmp2 - loop_len_17;
413	*/
414
415	static void
416	vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
417	rgroup_controls *dest_rgm, tree step)
418	{
419	tree ctrl_type = dest_rgm->type;
420	poly_uint64 nitems_per_ctrl
421	= TYPE_VECTOR_SUBPARTS (node: ctrl_type) * dest_rgm->factor;
422	tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
423
424	for (unsigned int i = `0`; i < dest_rgm->controls.length (); ++i)
425	{
426	tree ctrl = dest_rgm->controls [i];
427	if (i == `0`)
428	{
429	/ First iteration: MIN (X, VF/N) capped to the range [0, VF/N]. /
430	gassign *assign
431	= gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
432	gimple_seq_add_stmt (seq, assign);
433	}
434	else if (i == dest_rgm->controls.length () - `1`)
435	{
436	/ Last iteration: Remain capped to the range [0, VF/N]. /
437	gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
438	dest_rgm->controls [i - `1`]);
439	gimple_seq_add_stmt (seq, assign);
440	}
441	else
442	{
443	/ (MIN (remain, VFI/N)) capped to the range [0, VF/N]. /*
444	step = gimple_build (seq, code: MINUS_EXPR, type: iv_type, ops: step,
445	ops: dest_rgm->controls [i - `1`]);
446	gassign *assign
447	= gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
448	gimple_seq_add_stmt (seq, assign);
449	}
450	}
451	}
452
453	/ Stores the standard position for induction variable increment in belonging to*
454	LOOP_EXIT (just before the exit condition of the given exit to BSI.
455	INSERT_AFTER is set to true if the increment should be inserted after
456	BSI. /
457
458	void
459	vect_iv_increment_position (edge loop_exit, gimple_stmt_iterator *bsi,
460	bool *insert_after)
461	{
462	basic_block bb = loop_exit->src;
463	*bsi = gsi_last_bb (bb);
464	insert_after = false*;
465	}
466
467	/ Helper for vect_set_loop_condition_partial_vectors. Generate definitions*
468	for all the rgroup controls in RGC and return a control that is nonzero
469	when the loop needs to iterate. Add any new preheader statements to
470	PREHEADER_SEQ. Use LOOP_COND_GSI to insert code before the exit gcond.
471
472	RGC belongs to loop LOOP. The loop originally iterated NITERS
473	times and has been vectorized according to LOOP_VINFO.
474
475	If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
476	starts with NITERS_SKIP dummy iterations of the scalar loop before
477	the real work starts. The mask elements for these dummy iterations
478	must be 0, to ensure that the extra iterations do not have an effect.
479
480	It is known that:
481
482	NITERS RGC->max_nscalars_per_iter * RGC->factor*
483
484	does not overflow. However, MIGHT_WRAP_P says whether an induction
485	variable that starts at 0 and has step:
486
487	VF RGC->max_nscalars_per_iter * RGC->factor*
488
489	might overflow before hitting a value above:
490
491	(NITERS + NITERS_SKIP) RGC->max_nscalars_per_iter * RGC->factor*
492
493	This means that we cannot guarantee that such an induction variable
494	would ever hit a value that produces a set of all-false masks or zero
495	lengths for RGC.
496
497	Note: the cost of the code generated by this function is modeled
498	by vect_estimate_min_profitable_iters, so changes here may need
499	corresponding changes there. /*
500
501	static tree
502	vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
503	gimple_seq *preheader_seq,
504	gimple_seq *header_seq,
505	gimple_stmt_iterator loop_cond_gsi,
506	rgroup_controls *rgc, tree niters,
507	tree niters_skip, bool might_wrap_p,
508	tree iv_step, tree compare_step)
509	{
510	tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
511	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
512	bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
513
514	tree ctrl_type = rgc->type;
515	unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
516	poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (node: ctrl_type) * rgc->factor;
517	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
518	tree length_limit = NULL_TREE;
519	/ For length, we need length_limit to ensure length in range. /
520	if (!use_masks_p)
521	length_limit = build_int_cst (compare_type, nitems_per_ctrl);
522
523	/ Calculate the maximum number of item values that the rgroup*
524	handles in total, the number that it handles for each iteration
525	of the vector loop, and the number that it should skip during the
526	first iteration of the vector loop. /*
527	tree nitems_total = niters;
528	tree nitems_step = build_int_cst (iv_type, vf);
529	tree nitems_skip = niters_skip;
530	if (nitems_per_iter != `1`)
531	{
532	/ We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that*
533	these multiplications don't overflow. /*
534	tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
535	tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
536	nitems_total = gimple_build (seq: preheader_seq, code: MULT_EXPR, type: compare_type,
537	ops: nitems_total, ops: compare_factor);
538	nitems_step = gimple_build (seq: preheader_seq, code: MULT_EXPR, type: iv_type,
539	ops: nitems_step, ops: iv_factor);
540	if (nitems_skip)
541	nitems_skip = gimple_build (seq: preheader_seq, code: MULT_EXPR, type: compare_type,
542	ops: nitems_skip, ops: compare_factor);
543	}
544
545	/ Create an induction variable that counts the number of items*
546	processed. /*
547	tree index_before_incr, index_after_incr;
548	gimple_stmt_iterator incr_gsi;
549	bool insert_after;
550	edge exit_e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
551	vect_iv_increment_position (loop_exit: exit_e, bsi: &incr_gsi, insert_after: &insert_after);
552	if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
553	{
554	/ Create an IV that counts down from niters_total and whose step*
555	is the (variable) amount processed in the current iteration:
556	...
557	_10 = (unsigned long) count_12(D);
558	...
559	# ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
560	_36 = (MIN_EXPR \| SELECT_VL) <ivtmp_9, POLY_INT_CST [4, 4]>;
561	...
562	vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
563	...
564	ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
565	...
566	if (ivtmp_9 > POLY_INT_CST [4, 4])
567	goto <bb 4>; [83.33%]
568	else
569	goto <bb 5>; [16.67%]
570	*/
571	nitems_total = gimple_convert (seq: preheader_seq, type: iv_type, op: nitems_total);
572	tree step = rgc->controls.length () == `1` ? rgc->controls [`0`]
573	: make_ssa_name (var: iv_type);
574	/ Create decrement IV. /
575	if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
576	{
577	create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
578	insert_after, &index_before_incr, &index_after_incr);
579	tree vectype = build_zero_cst (rgc->type);
580	tree len = gimple_build (seq: header_seq, code: IFN_SELECT_VL, type: iv_type,
581	ops: index_before_incr, ops: nitems_step,
582	ops: vectype);
583	gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
584	}
585	else
586	{
587	create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
588	&incr_gsi, insert_after, &index_before_incr,
589	&index_after_incr);
590	gimple_seq_add_stmt (header_seq,
591	gimple_build_assign (step, MIN_EXPR,
592	index_before_incr,
593	nitems_step));
594	}
595	*iv_step = step;
596	*compare_step = nitems_step;
597	return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
598	: index_before_incr;
599	}
600
601	/ Create increment IV. /
602	create_iv (build_int_cst (iv_type, `0`), PLUS_EXPR, nitems_step, NULL_TREE,
603	loop, &incr_gsi, insert_after, &index_before_incr,
604	&index_after_incr);
605
606	tree zero_index = build_int_cst (compare_type, `0`);
607	tree test_index, test_limit, first_limit;
608	gimple_stmt_iterator *test_gsi;
609	if (might_wrap_p)
610	{
611	/ In principle the loop should stop iterating once the incremented*
612	IV reaches a value greater than or equal to:
613
614	NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
615
616	However, there's no guarantee that this addition doesn't overflow
617	the comparison type, or that the IV hits a value above it before
618	wrapping around. We therefore adjust the limit down by one
619	IV step:
620
621	(NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
622	-[infinite-prec] NITEMS_STEP
623
624	and compare the IV against this limit _before_ incrementing it.
625	Since the comparison type is unsigned, we actually want the
626	subtraction to saturate at zero:
627
628	(NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
629	-[sat] NITEMS_STEP
630
631	And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
632
633	NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
634
635	where the rightmost subtraction can be done directly in
636	COMPARE_TYPE. /*
637	test_index = index_before_incr;
638	tree adjust = gimple_convert (seq: preheader_seq, type: compare_type,
639	op: nitems_step);
640	if (nitems_skip)
641	adjust = gimple_build (seq: preheader_seq, code: MINUS_EXPR, type: compare_type,
642	ops: adjust, ops: nitems_skip);
643	test_limit = gimple_build (seq: preheader_seq, code: MAX_EXPR, type: compare_type,
644	ops: nitems_total, ops: adjust);
645	test_limit = gimple_build (seq: preheader_seq, code: MINUS_EXPR, type: compare_type,
646	ops: test_limit, ops: adjust);
647	test_gsi = &incr_gsi;
648
649	/ Get a safe limit for the first iteration. /
650	if (nitems_skip)
651	{
652	/ The first vector iteration can handle at most NITEMS_STEP*
653	items. NITEMS_STEP <= CONST_LIMIT, and adding
654	NITEMS_SKIP to that cannot overflow. /*
655	tree const_limit = build_int_cst (compare_type,
656	LOOP_VINFO_VECT_FACTOR (loop_vinfo)
657	* nitems_per_iter);
658	first_limit = gimple_build (seq: preheader_seq, code: MIN_EXPR, type: compare_type,
659	ops: nitems_total, ops: const_limit);
660	first_limit = gimple_build (seq: preheader_seq, code: PLUS_EXPR, type: compare_type,
661	ops: first_limit, ops: nitems_skip);
662	}
663	else
664	/ For the first iteration it doesn't matter whether the IV hits*
665	a value above NITEMS_TOTAL. That only matters for the latch
666	condition. /*
667	first_limit = nitems_total;
668	}
669	else
670	{
671	/ Test the incremented IV, which will always hit a value above*
672	the bound before wrapping. /*
673	test_index = index_after_incr;
674	test_limit = nitems_total;
675	if (nitems_skip)
676	test_limit = gimple_build (seq: preheader_seq, code: PLUS_EXPR, type: compare_type,
677	ops: test_limit, ops: nitems_skip);
678	test_gsi = &loop_cond_gsi;
679
680	first_limit = test_limit;
681	}
682
683	/ Convert the IV value to the comparison type (either a no-op or*
684	a demotion). /*
685	gimple_seq test_seq = NULL;
686	test_index = gimple_convert (seq: &test_seq, type: compare_type, op: test_index);
687	gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
688
689	/ Provide a definition of each control in the group. /
690	tree next_ctrl = NULL_TREE;
691	tree ctrl;
692	unsigned int i;
693	FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
694	{
695	/ Previous controls will cover BIAS items. This control covers the*
696	next batch. /*
697	poly_uint64 bias = nitems_per_ctrl * i;
698	tree bias_tree = build_int_cst (compare_type, bias);
699
700	/ See whether the first iteration of the vector loop is known*
701	to have a full control. /*
702	poly_uint64 const_limit;
703	bool first_iteration_full
704	= (poly_int_tree_p (t: first_limit, value: &const_limit)
705	&& known_ge (const_limit, (i + `1`) * nitems_per_ctrl));
706
707	/ Rather than have a new IV that starts at BIAS and goes up to*
708	TEST_LIMIT, prefer to use the same 0-based IV for each control
709	and adjust the bound down by BIAS. /*
710	tree this_test_limit = test_limit;
711	if (i != `0`)
712	{
713	this_test_limit = gimple_build (seq: preheader_seq, code: MAX_EXPR,
714	type: compare_type, ops: this_test_limit,
715	ops: bias_tree);
716	this_test_limit = gimple_build (seq: preheader_seq, code: MINUS_EXPR,
717	type: compare_type, ops: this_test_limit,
718	ops: bias_tree);
719	}
720
721	/ Create the initial control. First include all items that*
722	are within the loop limit. /*
723	tree init_ctrl = NULL_TREE;
724	if (!first_iteration_full)
725	{
726	tree start, end;
727	if (first_limit == test_limit)
728	{
729	/ Use a natural test between zero (the initial IV value)*
730	and the loop limit. The "else" block would be valid too,
731	but this choice can avoid the need to load BIAS_TREE into
732	a register. /*
733	start = zero_index;
734	end = this_test_limit;
735	}
736	else
737	{
738	/ FIRST_LIMIT is the maximum number of items handled by the*
739	first iteration of the vector loop. Test the portion
740	associated with this control. /*
741	start = bias_tree;
742	end = first_limit;
743	}
744
745	if (use_masks_p)
746	init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
747	start, end, "max_mask");
748	else
749	{
750	init_ctrl = make_temp_ssa_name (type: compare_type, NULL, name: "max_len");
751	gimple_seq seq = vect_gen_len (init_ctrl, start,
752	end, length_limit);
753	gimple_seq_add_seq (preheader_seq, seq);
754	}
755	}
756
757	/ Now AND out the bits that are within the number of skipped*
758	items. /*
759	poly_uint64 const_skip;
760	if (nitems_skip
761	&& !(poly_int_tree_p (t: nitems_skip, value: &const_skip)
762	&& known_le (const_skip, bias)))
763	{
764	gcc_assert (use_masks_p);
765	tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
766	bias_tree, nitems_skip);
767	if (init_ctrl)
768	init_ctrl = gimple_build (seq: preheader_seq, code: BIT_AND_EXPR, type: ctrl_type,
769	ops: init_ctrl, ops: unskipped_mask);
770	else
771	init_ctrl = unskipped_mask;
772	}
773
774	if (!init_ctrl)
775	{
776	/ First iteration is full. /
777	if (use_masks_p)
778	init_ctrl = build_minus_one_cst (ctrl_type);
779	else
780	init_ctrl = length_limit;
781	}
782
783	/ Get the control value for the next iteration of the loop. /
784	if (use_masks_p)
785	{
786	gimple_seq stmts = NULL;
787	next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
788	this_test_limit, "next_mask");
789	gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
790	}
791	else
792	{
793	next_ctrl = make_temp_ssa_name (type: compare_type, NULL, name: "next_len");
794	gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
795	length_limit);
796	gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
797	}
798
799	vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
800	}
801
802	int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
803	if (partial_load_bias != `0`)
804	{
805	tree adjusted_len = rgc->bias_adjusted_ctrl;
806	gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
807	rgc->controls [`0`],
808	build_int_cst
809	(TREE_TYPE (rgc->controls[`0`]),
810	partial_load_bias));
811	gimple_seq_add_stmt (header_seq, minus);
812	}
813
814	return next_ctrl;
815	}
816
817	/ Set up the iteration condition and rgroup controls for LOOP, given*
818	that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
819	loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
820	the number of iterations of the original scalar loop that should be
821	handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are as
822	for vect_set_loop_condition.
823
824	Insert the branch-back condition before LOOP_COND_GSI and return the
825	final gcond. /*
826
827	static gcond *
828	vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge,
829	loop_vec_info loop_vinfo, tree niters,
830	tree final_iv, bool niters_maybe_zero,
831	gimple_stmt_iterator loop_cond_gsi)
832	{
833	gimple_seq preheader_seq = NULL;
834	gimple_seq header_seq = NULL;
835
836	bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
837	tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
838	unsigned int compare_precision = TYPE_PRECISION (compare_type);
839	tree orig_niters = niters;
840
841	/ Type of the initial value of NITERS. /
842	tree ni_actual_type = TREE_TYPE (niters);
843	unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
844	tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
845	if (niters_skip)
846	niters_skip = gimple_convert (seq: &preheader_seq, type: compare_type, op: niters_skip);
847
848	/ Convert NITERS to the same size as the compare. /
849	if (compare_precision > ni_actual_precision
850	&& niters_maybe_zero)
851	{
852	/ We know that there is always at least one iteration, so if the*
853	count is zero then it must have wrapped. Cope with this by
854	subtracting 1 before the conversion and adding 1 to the result. /*
855	gcc_assert (TYPE_UNSIGNED (ni_actual_type));
856	niters = gimple_build (seq: &preheader_seq, code: PLUS_EXPR, type: ni_actual_type,
857	ops: niters, ops: build_minus_one_cst (ni_actual_type));
858	niters = gimple_convert (seq: &preheader_seq, type: compare_type, op: niters);
859	niters = gimple_build (seq: &preheader_seq, code: PLUS_EXPR, type: compare_type,
860	ops: niters, ops: build_one_cst (compare_type));
861	}
862	else
863	niters = gimple_convert (seq: &preheader_seq, type: compare_type, op: niters);
864
865	/ Iterate over all the rgroups and fill in their controls. We could use*
866	the first control from any rgroup for the loop condition; here we
867	arbitrarily pick the last. /*
868	tree test_ctrl = NULL_TREE;
869	tree iv_step = NULL_TREE;
870	tree compare_step = NULL_TREE;
871	rgroup_controls *rgc;
872	rgroup_controls iv_rgc = nullptr*;
873	unsigned int i;
874	auto_vec<rgroup_controls> *controls = use_masks_p
875	? &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec
876	: &LOOP_VINFO_LENS (loop_vinfo);
877	FOR_EACH_VEC_ELT (*controls, i, rgc)
878	if (!rgc->controls.is_empty ())
879	{
880	/ First try using permutes. This adds a single vector*
881	instruction to the loop for each mask, but needs no extra
882	loop invariants or IVs. /*
883	unsigned int nmasks = i + `1`;
884	if (use_masks_p && (nmasks & `1`) == `0`)
885	{
886	rgroup_controls half_rgc = &(controls)[nmasks / `2` - `1`];
887	if (!half_rgc->controls.is_empty ()
888	&& vect_maybe_permute_loop_masks (seq: &header_seq, dest_rgm: rgc, src_rgm: half_rgc))
889	continue;
890	}
891
892	if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
893	\|\| !iv_rgc
894	\|\| (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
895	!= rgc->max_nscalars_per_iter * rgc->factor))
896	{
897	/ See whether zero-based IV would ever generate all-false masks*
898	or zero length before wrapping around. /*
899	bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
900
901	/ Set up all controls for this group. /
902	test_ctrl
903	= vect_set_loop_controls_directly (loop, loop_vinfo,
904	preheader_seq: &preheader_seq, header_seq: &header_seq,
905	loop_cond_gsi, rgc, niters,
906	niters_skip, might_wrap_p,
907	iv_step: &iv_step, compare_step: &compare_step);
908
909	iv_rgc = rgc;
910	}
911
912	if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
913	&& rgc->controls.length () > `1`)
914	{
915	/ vect_set_loop_controls_directly creates an IV whose step*
916	is equal to the expected sum of RGC->controls. Use that
917	information to populate RGC->controls. /*
918	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
919	gcc_assert (iv_step);
920	vect_adjust_loop_lens_control (iv_type, seq: &header_seq, dest_rgm: rgc, step: iv_step);
921	}
922	}
923
924	/ Emit all accumulated statements. /
925	add_preheader_seq (loop, seq: preheader_seq);
926	add_header_seq (loop, seq: header_seq);
927
928	/ Get a boolean result that tells us whether to iterate. /
929	gcond *cond_stmt;
930	if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
931	&& !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
932	{
933	gcc_assert (compare_step);
934	tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
935	cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
936	NULL_TREE);
937	}
938	else
939	{
940	tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
941	tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
942	cond_stmt
943	= gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
944	}
945	gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
946
947	/ The loop iterates (NITERS - 1) / VF + 1 times.*
948	Subtract one from this to get the latch count. /*
949	tree step = build_int_cst (compare_type,
950	LOOP_VINFO_VECT_FACTOR (loop_vinfo));
951	tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
952	build_minus_one_cst (compare_type));
953	loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
954	niters_minus_one, step);
955
956	if (final_iv)
957	{
958	gassign *assign;
959	/ If vectorizing an inverted early break loop we have to restart the*
960	scalar loop at niters - vf. This matches what we do in
961	vect_gen_vector_loop_niters_mult_vf for non-masked loops. /*
962	if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
963	{
964	tree ftype = TREE_TYPE (orig_niters);
965	tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
966	assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
967	}
968	else
969	assign = gimple_build_assign (final_iv, orig_niters);
970	gsi_insert_on_edge_immediate (exit_edge, assign);
971	}
972
973	return cond_stmt;
974	}
975
976	/ Set up the iteration condition and rgroup controls for LOOP in AVX512*
977	style, given that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the
978	vectorized loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
979	the number of iterations of the original scalar loop that should be
980	handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are as
981	for vect_set_loop_condition.
982
983	Insert the branch-back condition before LOOP_COND_GSI and return the
984	final gcond. /*
985
986	static gcond *
987	vect_set_loop_condition_partial_vectors_avx512 (class loop *loop,
988	edge exit_edge,
989	loop_vec_info loop_vinfo, tree niters,
990	tree final_iv,
991	bool niters_maybe_zero,
992	gimple_stmt_iterator loop_cond_gsi)
993	{
994	tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
995	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
996	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
997	tree orig_niters = niters;
998	gimple_seq preheader_seq = NULL;
999
1000	/ Create an IV that counts down from niters and whose step*
1001	is the number of iterations processed in the current iteration.
1002	Produce the controls with compares like the following.
1003
1004	# iv_2 = PHI <niters, iv_3>
1005	rem_4 = MIN <iv_2, VF>;
1006	remv_6 = { rem_4, rem_4, rem_4, ... }
1007	mask_5 = { 0, 0, 1, 1, 2, 2, ... } < remv6;
1008	iv_3 = iv_2 - VF;
1009	if (iv_2 > VF)
1010	continue;
1011
1012	Where the constant is built with elements at most VF - 1 and
1013	repetitions according to max_nscalars_per_iter which is guarnateed
1014	to be the same within a group. /*
1015
1016	/ Convert NITERS to the determined IV type. /
1017	if (TYPE_PRECISION (iv_type) > TYPE_PRECISION (TREE_TYPE (niters))
1018	&& niters_maybe_zero)
1019	{
1020	/ We know that there is always at least one iteration, so if the*
1021	count is zero then it must have wrapped. Cope with this by
1022	subtracting 1 before the conversion and adding 1 to the result. /*
1023	gcc_assert (TYPE_UNSIGNED (TREE_TYPE (niters)));
1024	niters = gimple_build (seq: &preheader_seq, code: PLUS_EXPR, TREE_TYPE (niters),
1025	ops: niters, ops: build_minus_one_cst (TREE_TYPE (niters)));
1026	niters = gimple_convert (seq: &preheader_seq, type: iv_type, op: niters);
1027	niters = gimple_build (seq: &preheader_seq, code: PLUS_EXPR, type: iv_type,
1028	ops: niters, ops: build_one_cst (iv_type));
1029	}
1030	else
1031	niters = gimple_convert (seq: &preheader_seq, type: iv_type, op: niters);
1032
1033	/ Bias the initial value of the IV in case we need to skip iterations*
1034	at the beginning. /*
1035	tree niters_adj = niters;
1036	if (niters_skip)
1037	{
1038	tree skip = gimple_convert (seq: &preheader_seq, type: iv_type, op: niters_skip);
1039	niters_adj = gimple_build (seq: &preheader_seq, code: PLUS_EXPR,
1040	type: iv_type, ops: niters, ops: skip);
1041	}
1042
1043	/ The iteration step is the vectorization factor. /
1044	tree iv_step = build_int_cst (iv_type, vf);
1045
1046	/ Create the decrement IV. /
1047	tree index_before_incr, index_after_incr;
1048	gimple_stmt_iterator incr_gsi;
1049	bool insert_after;
1050	vect_iv_increment_position (loop_exit: exit_edge, bsi: &incr_gsi, insert_after: &insert_after);
1051	create_iv (niters_adj, MINUS_EXPR, iv_step, NULL_TREE, loop,
1052	&incr_gsi, insert_after, &index_before_incr,
1053	&index_after_incr);
1054
1055	/ Iterate over all the rgroups and fill in their controls. /
1056	for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
1057	{
1058	if (rgc.controls.is_empty ())
1059	continue;
1060
1061	tree ctrl_type = rgc.type;
1062	poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (node: ctrl_type);
1063
1064	tree vectype = rgc.compare_type;
1065
1066	/ index_after_incr is the IV specifying the remaining iterations in*
1067	the next iteration. /*
1068	tree rem = index_after_incr;
1069	/ When the data type for the compare to produce the mask is*
1070	smaller than the IV type we need to saturate. Saturate to
1071	the smallest possible value (IV_TYPE) so we only have to
1072	saturate once (CSE will catch redundant ones we add). /*
1073	if (TYPE_PRECISION (TREE_TYPE (vectype)) < TYPE_PRECISION (iv_type))
1074	rem = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1075	UNKNOWN_LOCATION,
1076	MIN_EXPR, TREE_TYPE (rem), rem, iv_step);
1077	rem = gimple_convert (&incr_gsi, false, GSI_CONTINUE_LINKING,
1078	UNKNOWN_LOCATION, TREE_TYPE (vectype), rem);
1079
1080	/ Build a data vector composed of the remaining iterations. /
1081	rem = gimple_build_vector_from_val (&incr_gsi, false, GSI_CONTINUE_LINKING,
1082	UNKNOWN_LOCATION, vectype, rem);
1083
1084	/ Provide a definition of each vector in the control group. /
1085	tree next_ctrl = NULL_TREE;
1086	tree first_rem = NULL_TREE;
1087	tree ctrl;
1088	unsigned int i;
1089	FOR_EACH_VEC_ELT_REVERSE (rgc.controls, i, ctrl)
1090	{
1091	/ Previous controls will cover BIAS items. This control covers the*
1092	next batch. /*
1093	poly_uint64 bias = nitems_per_ctrl * i;
1094
1095	/ Build the constant to compare the remaining iters against,*
1096	this is sth like { 0, 0, 1, 1, 2, 2, 3, 3, ... } appropriately
1097	split into pieces. /*
1098	unsigned n = TYPE_VECTOR_SUBPARTS (node: ctrl_type).to_constant ();
1099	tree_vector_builder builder (vectype, n, `1`);
1100	for (unsigned i = `0`; i < n; ++i)
1101	{
1102	unsigned HOST_WIDE_INT val
1103	= (i + bias.to_constant ()) / rgc.max_nscalars_per_iter;
1104	gcc_assert (val < vf.to_constant ());
1105	builder.quick_push (obj: build_int_cst (TREE_TYPE (vectype), val));
1106	}
1107	tree cmp_series = builder.build ();
1108
1109	/ Create the initial control. First include all items that*
1110	are within the loop limit. /*
1111	tree init_ctrl = NULL_TREE;
1112	poly_uint64 const_limit;
1113	/ See whether the first iteration of the vector loop is known*
1114	to have a full control. /*
1115	if (poly_int_tree_p (t: niters, value: &const_limit)
1116	&& known_ge (const_limit, (i + `1`) * nitems_per_ctrl))
1117	init_ctrl = build_minus_one_cst (ctrl_type);
1118	else
1119	{
1120	/ The remaining work items initially are niters. Saturate,*
1121	splat and compare. /*
1122	if (!first_rem)
1123	{
1124	first_rem = niters;
1125	if (TYPE_PRECISION (TREE_TYPE (vectype))
1126	< TYPE_PRECISION (iv_type))
1127	first_rem = gimple_build (seq: &preheader_seq,
1128	code: MIN_EXPR, TREE_TYPE (first_rem),
1129	ops: first_rem, ops: iv_step);
1130	first_rem = gimple_convert (seq: &preheader_seq, TREE_TYPE (vectype),
1131	op: first_rem);
1132	first_rem = gimple_build_vector_from_val (seq: &preheader_seq,
1133	type: vectype, op: first_rem);
1134	}
1135	init_ctrl = gimple_build (seq: &preheader_seq, code: LT_EXPR, type: ctrl_type,
1136	ops: cmp_series, ops: first_rem);
1137	}
1138
1139	/ Now AND out the bits that are within the number of skipped*
1140	items. /*
1141	poly_uint64 const_skip;
1142	if (niters_skip
1143	&& !(poly_int_tree_p (t: niters_skip, value: &const_skip)
1144	&& known_le (const_skip, bias)))
1145	{
1146	/ For integer mode masks it's cheaper to shift out the bits*
1147	since that avoids loading a constant. /*
1148	gcc_assert (GET_MODE_CLASS (TYPE_MODE (ctrl_type)) == MODE_INT);
1149	init_ctrl = gimple_build (seq: &preheader_seq, code: VIEW_CONVERT_EXPR,
1150	type: lang_hooks.types.type_for_mode
1151	(TYPE_MODE (ctrl_type), `1`),
1152	ops: init_ctrl);
1153	/ ??? But when the shift amount isn't constant this requires*
1154	a round-trip to GRPs. We could apply the bias to either
1155	side of the compare instead. /*
1156	tree shift = gimple_build (seq: &preheader_seq, code: MINUS_EXPR,
1157	TREE_TYPE (niters_skip), ops: niters_skip,
1158	ops: build_int_cst (TREE_TYPE (niters_skip),
1159	bias));
1160	shift = gimple_build (seq: &preheader_seq, code: MULT_EXPR,
1161	TREE_TYPE (niters_skip), ops: shift,
1162	ops: build_int_cst (TREE_TYPE (niters_skip),
1163	rgc.max_nscalars_per_iter));
1164	init_ctrl = gimple_build (seq: &preheader_seq, code: LSHIFT_EXPR,
1165	TREE_TYPE (init_ctrl),
1166	ops: init_ctrl, ops: shift);
1167	init_ctrl = gimple_build (seq: &preheader_seq, code: VIEW_CONVERT_EXPR,
1168	type: ctrl_type, ops: init_ctrl);
1169	}
1170
1171	/ Get the control value for the next iteration of the loop. /
1172	next_ctrl = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1173	UNKNOWN_LOCATION,
1174	LT_EXPR, ctrl_type, cmp_series, rem);
1175
1176	vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
1177	}
1178	}
1179
1180	/ Emit all accumulated statements. /
1181	add_preheader_seq (loop, seq: preheader_seq);
1182
1183	/ Adjust the exit test using the decrementing IV. /
1184	tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
1185	/ When we peel for alignment with niter_skip != 0 this can*
1186	cause niter + niter_skip to wrap and since we are comparing the
1187	value before the decrement here we get a false early exit.
1188	We can't compare the value after decrement either because that
1189	decrement could wrap as well as we're not doing a saturating
1190	decrement. To avoid this situation we force a larger
1191	iv_type. /*
1192	gcond *cond_stmt = gimple_build_cond (code, index_before_incr, iv_step,
1193	NULL_TREE, NULL_TREE);
1194	gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1195
1196	/ The loop iterates (NITERS - 1 + NITERS_SKIP) / VF + 1 times.*
1197	Subtract one from this to get the latch count. /*
1198	tree niters_minus_one
1199	= fold_build2 (PLUS_EXPR, TREE_TYPE (orig_niters), orig_niters,
1200	build_minus_one_cst (TREE_TYPE (orig_niters)));
1201	tree niters_adj2 = fold_convert (iv_type, niters_minus_one);
1202	if (niters_skip)
1203	niters_adj2 = fold_build2 (PLUS_EXPR, iv_type, niters_minus_one,
1204	fold_convert (iv_type, niters_skip));
1205	loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, iv_type,
1206	niters_adj2, iv_step);
1207
1208	if (final_iv)
1209	{
1210	gassign *assign;
1211	/ If vectorizing an inverted early break loop we have to restart the*
1212	scalar loop at niters - vf. This matches what we do in
1213	vect_gen_vector_loop_niters_mult_vf for non-masked loops. /*
1214	if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
1215	{
1216	tree ftype = TREE_TYPE (orig_niters);
1217	tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1218	assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
1219	}
1220	else
1221	assign = gimple_build_assign (final_iv, orig_niters);
1222	gsi_insert_on_edge_immediate (exit_edge, assign);
1223	}
1224
1225	return cond_stmt;
1226	}
1227
1228
1229	/ Like vect_set_loop_condition, but handle the case in which the vector*
1230	loop handles exactly VF scalars per iteration. /*
1231
1232	static gcond *
1233	vect_set_loop_condition_normal (loop_vec_info / loop_vinfo /, edge exit_edge,
1234	class loop *loop, tree niters, tree step,
1235	tree final_iv, bool niters_maybe_zero,
1236	gimple_stmt_iterator loop_cond_gsi)
1237	{
1238	tree indx_before_incr, indx_after_incr;
1239	gcond *cond_stmt;
1240	gcond *orig_cond;
1241	edge pe = loop_preheader_edge (loop);
1242	gimple_stmt_iterator incr_gsi;
1243	bool insert_after;
1244	enum tree_code code;
1245	tree niters_type = TREE_TYPE (niters);
1246
1247	orig_cond = get_loop_exit_condition (exit_edge);
1248	gcc_assert (orig_cond);
1249	loop_cond_gsi = gsi_for_stmt (orig_cond);
1250
1251	tree init, limit;
1252	if (!niters_maybe_zero && integer_onep (step))
1253	{
1254	/ In this case we can use a simple 0-based IV:*
1255
1256	A:
1257	x = 0;
1258	do
1259	{
1260	...
1261	x += 1;
1262	}
1263	while (x < NITERS); /*
1264	code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1265	init = build_zero_cst (niters_type);
1266	limit = niters;
1267	}
1268	else
1269	{
1270	/ The following works for all values of NITERS except 0:*
1271
1272	B:
1273	x = 0;
1274	do
1275	{
1276	...
1277	x += STEP;
1278	}
1279	while (x <= NITERS - STEP);
1280
1281	so that the loop continues to iterate if x + STEP - 1 < NITERS
1282	but stops if x + STEP - 1 >= NITERS.
1283
1284	However, if NITERS is zero, x never hits a value above NITERS - STEP
1285	before wrapping around. There are two obvious ways of dealing with
1286	this:
1287
1288	- start at STEP - 1 and compare x before incrementing it
1289	- start at -1 and compare x after incrementing it
1290
1291	The latter is simpler and is what we use. The loop in this case
1292	looks like:
1293
1294	C:
1295	x = -1;
1296	do
1297	{
1298	...
1299	x += STEP;
1300	}
1301	while (x < NITERS - STEP);
1302
1303	In both cases the loop limit is NITERS - STEP. /*
1304	gimple_seq seq = NULL;
1305	limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
1306	limit = gimple_build (seq: &seq, code: MINUS_EXPR, TREE_TYPE (limit), ops: limit, ops: step);
1307	if (seq)
1308	{
1309	basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1310	gcc_assert (!new_bb);
1311	}
1312	if (niters_maybe_zero)
1313	{
1314	/ Case C. /
1315	code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1316	init = build_all_ones_cst (niters_type);
1317	}
1318	else
1319	{
1320	/ Case B. /
1321	code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
1322	init = build_zero_cst (niters_type);
1323	}
1324	}
1325
1326	vect_iv_increment_position (loop_exit: exit_edge, bsi: &incr_gsi, insert_after: &insert_after);
1327	create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
1328	&incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
1329	indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
1330	true, NULL_TREE, true,
1331	GSI_SAME_STMT);
1332	limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
1333	true, GSI_SAME_STMT);
1334
1335	cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
1336	NULL_TREE);
1337
1338	gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1339
1340	/ Record the number of latch iterations. /
1341	if (limit == niters)
1342	/ Case A: the loop iterates NITERS times. Subtract one to get the*
1343	latch count. /*
1344	loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
1345	build_int_cst (niters_type, `1`));
1346	else
1347	/ Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.*
1348	Subtract one from this to get the latch count. /*
1349	loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
1350	limit, step);
1351
1352	if (final_iv)
1353	{
1354	gassign *assign;
1355	gcc_assert (single_pred_p (exit_edge->dest));
1356	tree phi_dest
1357	= integer_zerop (init) ? final_iv : copy_ssa_name (var: indx_after_incr);
1358	/ Make sure to maintain LC SSA form here and elide the subtraction*
1359	if the value is zero. /*
1360	gphi *phi = create_phi_node (phi_dest, exit_edge->dest);
1361	add_phi_arg (phi, indx_after_incr, exit_edge, UNKNOWN_LOCATION);
1362	if (!integer_zerop (init))
1363	{
1364	assign = gimple_build_assign (final_iv, MINUS_EXPR,
1365	phi_dest, init);
1366	gimple_stmt_iterator gsi = gsi_after_labels (bb: exit_edge->dest);
1367	gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
1368	}
1369	}
1370
1371	return cond_stmt;
1372	}
1373
1374	/ If we're using fully-masked loops, make LOOP iterate:*
1375
1376	N == (NITERS - 1) / STEP + 1
1377
1378	times. When NITERS is zero, this is equivalent to making the loop
1379	execute (1 << M) / STEP times, where M is the precision of NITERS.
1380	NITERS_MAYBE_ZERO is true if this last case might occur.
1381
1382	If we're not using fully-masked loops, make LOOP iterate:
1383
1384	N == (NITERS - STEP) / STEP + 1
1385
1386	times, where NITERS is known to be outside the range [1, STEP - 1].
1387	This is equivalent to making the loop execute NITERS / STEP times
1388	when NITERS is nonzero and (1 << M) / STEP times otherwise.
1389	NITERS_MAYBE_ZERO again indicates whether this last case might occur.
1390
1391	If FINAL_IV is nonnull, it is an SSA name that should be set to
1392	N STEP on exit from the loop.*
1393
1394	Assumption: the exit-condition of LOOP is the last stmt in the loop. /*
1395
1396	void
1397	vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo,
1398	tree niters, tree step, tree final_iv,
1399	bool niters_maybe_zero)
1400	{
1401	gcond *cond_stmt;
1402	gcond *orig_cond = get_loop_exit_condition (loop_e);
1403	gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
1404
1405	if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
1406	{
1407	if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512)
1408	cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, exit_edge: loop_e,
1409	loop_vinfo,
1410	niters, final_iv,
1411	niters_maybe_zero,
1412	loop_cond_gsi);
1413	else
1414	cond_stmt = vect_set_loop_condition_partial_vectors (loop, exit_edge: loop_e,
1415	loop_vinfo,
1416	niters, final_iv,
1417	niters_maybe_zero,
1418	loop_cond_gsi);
1419	}
1420	else
1421	cond_stmt = vect_set_loop_condition_normal (loop_vinfo, exit_edge: loop_e, loop,
1422	niters,
1423	step, final_iv,
1424	niters_maybe_zero,
1425	loop_cond_gsi);
1426
1427	/ Remove old loop exit test. /
1428	stmt_vec_info orig_cond_info;
1429	if (loop_vinfo
1430	&& (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
1431	loop_vinfo->remove_stmt (orig_cond_info);
1432	else
1433	gsi_remove (&loop_cond_gsi, true);
1434
1435	if (dump_enabled_p ())
1436	dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
1437	(gimple *) cond_stmt);
1438	}
1439
1440	/ Get the virtual operand live on E. The precondition on this is valid*
1441	immediate dominators and an actual virtual definition dominating E. /*
1442	/ ??? Costly band-aid. For the use in question we can populate a*
1443	live-on-exit/end-of-BB virtual operand when copying stmts. /*
1444
1445	static tree
1446	get_live_virtual_operand_on_edge (edge e)
1447	{
1448	basic_block bb = e->src;
1449	do
1450	{
1451	for (auto gsi = gsi_last_bb (bb); !gsi_end_p (i: gsi); gsi_prev (i: &gsi))
1452	{
1453	gimple *stmt = gsi_stmt (i: gsi);
1454	if (gimple_vdef (g: stmt))
1455	return gimple_vdef (g: stmt);
1456	if (gimple_vuse (g: stmt))
1457	return gimple_vuse (g: stmt);
1458	}
1459	if (gphi *vphi = get_virtual_phi (bb))
1460	return gimple_phi_result (gs: vphi);
1461	bb = get_immediate_dominator (CDI_DOMINATORS, bb);
1462	}
1463	while (`1`);
1464	}
1465
1466	/ Given LOOP this function generates a new copy of it and puts it*
1467	on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
1468	non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
1469	basic blocks from SCALAR_LOOP instead of LOOP, but to either the
1470	entry or exit of LOOP. If FLOW_LOOPS then connect LOOP to SCALAR_LOOP as a
1471	continuation. This is correct for cases where one loop continues from the
1472	other like in the vectorizer, but not true for uses in e.g. loop distribution
1473	where the contents of the loop body are split but the iteration space of both
1474	copies remains the same.
1475
1476	If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms
1477	dominators were updated during the peeling. When doing early break vectorization
1478	then LOOP_VINFO needs to be provided and is used to keep track of any newly created
1479	memory references that need to be updated should we decide to vectorize. /*
1480
1481	class loop *
1482	slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
1483	class loop *scalar_loop,
1484	edge scalar_exit, edge e, edge *new_e,
1485	bool flow_loops,
1486	vec<basic_block> *updated_doms,
1487	bool uncounted_p, bool create_main_e)
1488	{
1489	class loop *new_loop;
1490	basic_block new_bbs, bbs, *pbbs;
1491	bool at_exit;
1492	bool was_imm_dom;
1493	basic_block exit_dest;
1494	edge exit, new_exit;
1495	bool duplicate_outer_loop = false;
1496
1497	exit = loop_exit;
1498	at_exit = (e == exit);
1499	if (!at_exit && e != loop_preheader_edge (loop))
1500	return NULL;
1501
1502	if (scalar_loop == NULL)
1503	{
1504	scalar_loop = loop;
1505	scalar_exit = loop_exit;
1506	}
1507	else if (scalar_loop == loop)
1508	scalar_exit = loop_exit;
1509	else
1510	{
1511	/ Loop has been version, match exits up using the aux index. /
1512	for (edge exit : get_loop_exit_edges (scalar_loop))
1513	if (exit->aux == loop_exit->aux)
1514	{
1515	scalar_exit = exit;
1516	break;
1517	}
1518
1519	gcc_assert (scalar_exit);
1520	}
1521
1522	bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + `1`);
1523	pbbs = bbs + `1`;
1524	get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
1525	/ Allow duplication of outer loops. /
1526	if (scalar_loop->inner)
1527	duplicate_outer_loop = true;
1528
1529	/ Generate new loop structure. /
1530	new_loop = duplicate_loop (scalar_loop, loop_outer (loop: scalar_loop));
1531	duplicate_subloops (scalar_loop, new_loop);
1532
1533	exit_dest = exit->dest;
1534	was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1535	exit_dest) == exit->src ?
1536	true : false);
1537
1538	/ Also copy the pre-header, this avoids jumping through hoops to*
1539	duplicate the loop entry PHI arguments. Create an empty
1540	pre-header unconditionally for this. /*
1541	basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
1542	edge entry_e = single_pred_edge (bb: preheader);
1543	bbs[`0`] = preheader;
1544	new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + `1`);
1545
1546	copy_bbs (bbs, scalar_loop->num_nodes + `1`, new_bbs,
1547	&scalar_exit, `1`, &new_exit, NULL,
1548	at_exit ? loop->latch : e->src, true);
1549	exit = loop_exit;
1550	basic_block new_preheader = new_bbs[`0`];
1551
1552	gcc_assert (new_exit);
1553
1554	/ Record the new loop exit information. new_loop doesn't have SCEV data and*
1555	so we must initialize the exit information. /*
1556	if (new_e)
1557	*new_e = new_exit;
1558
1559	/ Before installing PHI arguments make sure that the edges*
1560	into them match that of the scalar loop we analyzed. This
1561	makes sure the SLP tree matches up between the main vectorized
1562	loop and the epilogue vectorized copies. /*
1563	if (single_succ_edge (bb: preheader)->dest_idx
1564	!= single_succ_edge (bb: new_bbs[`0`])->dest_idx)
1565	{
1566	basic_block swap_bb = new_bbs[`1`];
1567	gcc_assert (EDGE_COUNT (swap_bb->preds) == `2`);
1568	std::swap (EDGE_PRED (swap_bb, `0`), EDGE_PRED (swap_bb, `1`));
1569	EDGE_PRED (swap_bb, `0`)->dest_idx = `0`;
1570	EDGE_PRED (swap_bb, `1`)->dest_idx = `1`;
1571	}
1572	if (duplicate_outer_loop)
1573	{
1574	class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
1575	if (loop_preheader_edge (scalar_loop)->dest_idx
1576	!= loop_preheader_edge (new_inner_loop)->dest_idx)
1577	{
1578	basic_block swap_bb = new_inner_loop->header;
1579	gcc_assert (EDGE_COUNT (swap_bb->preds) == `2`);
1580	std::swap (EDGE_PRED (swap_bb, `0`), EDGE_PRED (swap_bb, `1`));
1581	EDGE_PRED (swap_bb, `0`)->dest_idx = `0`;
1582	EDGE_PRED (swap_bb, `1`)->dest_idx = `1`;
1583	}
1584	}
1585
1586	add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + `1`, NULL);
1587
1588	/ Skip new preheader since it's deleted if copy loop is added at entry. /
1589	for (unsigned i = (at_exit ? `0` : `1`); i < scalar_loop->num_nodes + `1`; i++)
1590	rename_variables_in_bb (bb: new_bbs[i], rename_from_outer_loop: duplicate_outer_loop);
1591
1592	/ Rename the exit uses. /
1593	for (edge exit : get_loop_exit_edges (new_loop))
1594	for (auto gsi = gsi_start_phis (exit->dest);
1595	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
1596	{
1597	tree orig_def = PHI_ARG_DEF_FROM_EDGE (gsi.phi (), exit);
1598	rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), exit));
1599	if (MAY_HAVE_DEBUG_BIND_STMTS)
1600	adjust_debug_stmts (from: orig_def, PHI_RESULT (gsi.phi ()), bb: exit->dest);
1601	}
1602
1603	auto loop_exits = get_loop_exit_edges (loop);
1604	bool multiple_exits_p = loop_exits.length () > `1`;
1605	auto_vec<basic_block> doms;
1606
1607	if (at_exit) / Add the loop copy at exit. /
1608	{
1609	if (scalar_loop != loop && new_exit->dest != exit_dest)
1610	{
1611	new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1612	flush_pending_stmts (new_exit);
1613	}
1614
1615	bool need_virtual_phi = get_virtual_phi (loop->header);
1616
1617	/ For the main loop exit preserve the LC PHI nodes. For vectorization*
1618	we need them to continue or finalize reductions. Since we do not
1619	copy the loop exit blocks we have to materialize PHIs at the
1620	new destination before redirecting edges. /*
1621	for (auto gsi_from = gsi_start_phis (loop_exit->dest);
1622	!gsi_end_p (i: gsi_from); gsi_next (i: &gsi_from))
1623	{
1624	tree res = gimple_phi_result (gs: *gsi_from);
1625	create_phi_node (copy_ssa_name (var: res), new_preheader);
1626	}
1627	edge e = redirect_edge_and_branch (loop_exit, new_preheader);
1628	gcc_assert (e == loop_exit);
1629	flush_pending_stmts (loop_exit);
1630	set_immediate_dominator (CDI_DOMINATORS, new_preheader, loop_exit->src);
1631
1632	/ If we ended up choosing an exit leading to a path not using memory*
1633	we can end up without a virtual LC PHI. Create it when it is
1634	needed because of the epilog loop continuation. /*
1635	if (need_virtual_phi && !get_virtual_phi (loop_exit->dest))
1636	{
1637	tree header_def = gimple_phi_result (gs: get_virtual_phi (loop->header));
1638	gphi *vphi = create_phi_node (copy_ssa_name (var: header_def),
1639	new_preheader);
1640	add_phi_arg (vphi, get_live_virtual_operand_on_edge (e: loop_exit),
1641	loop_exit, UNKNOWN_LOCATION);
1642	}
1643
1644	bool multiple_exits_p = loop_exits.length () > `1`;
1645	basic_block main_loop_exit_block = new_preheader;
1646	basic_block alt_loop_exit_block = NULL;
1647	/ Create the CFG for multiple exits.*
1648	\| loop_exit \| alt1 \| altN
1649	v v ... v
1650	main_loop_exit_block: alt_loop_exit_block:
1651	\| /
1652	v v
1653	new_preheader:
1654	where in the new preheader we need merge PHIs for
1655	the continuation values into the epilogue header.
1656	Do not bother with exit PHIs for the early exits but
1657	their live virtual operand. We'll fix up things below. /*
1658	if (multiple_exits_p \|\| uncounted_p)
1659	{
1660	edge loop_e = single_succ_edge (bb: new_preheader);
1661	new_preheader = split_edge (loop_e);
1662
1663	gphi *vphi = NULL;
1664	alt_loop_exit_block = new_preheader;
1665	for (auto exit : loop_exits)
1666	if (exit != loop_exit)
1667	{
1668	tree vphi_def = NULL_TREE;
1669	if (gphi *evphi = get_virtual_phi (exit->dest))
1670	vphi_def = gimple_phi_arg_def_from_edge (gs: evphi, e: exit);
1671	edge res = redirect_edge_and_branch (exit, alt_loop_exit_block);
1672	gcc_assert (res == exit);
1673	redirect_edge_var_map_clear (exit);
1674	if (alt_loop_exit_block == new_preheader)
1675	alt_loop_exit_block = split_edge (exit);
1676	if (!need_virtual_phi)
1677	continue;
1678	/ When the edge has no virtual LC PHI get at the live*
1679	virtual operand by other means. /*
1680	if (!vphi_def)
1681	vphi_def = get_live_virtual_operand_on_edge (e: exit);
1682	if (!vphi)
1683	vphi = create_phi_node (copy_ssa_name (var: vphi_def),
1684	alt_loop_exit_block);
1685	else
1686	/ Edge redirection might re-allocate the PHI node*
1687	so we have to rediscover it. /*
1688	vphi = get_virtual_phi (alt_loop_exit_block);
1689	add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION);
1690	}
1691
1692	set_immediate_dominator (CDI_DOMINATORS, new_preheader,
1693	loop->header);
1694
1695	/ Fix up the profile counts of the new exit blocks.*
1696	main_loop_exit_block was created by duplicating the
1697	preheader, so needs its count scaling according to the main
1698	exit edge's probability. The remaining count from the
1699	preheader goes to the alt_loop_exit_block, since all
1700	alternative exits have been redirected there. /*
1701	main_loop_exit_block->count = loop_exit->count ();
1702	alt_loop_exit_block->count
1703	= preheader->count - main_loop_exit_block->count;
1704	}
1705
1706	/ Adjust the epilog loop PHI entry values to continue iteration.*
1707	This adds remaining necessary LC PHI nodes to the main exit
1708	and creates merge PHIs when we have multiple exits with
1709	their appropriate continuation. /*
1710	if (flow_loops)
1711	{
1712	edge loop_entry = single_succ_edge (bb: new_preheader);
1713	bool peeled_iters = (uncounted_p
1714	\|\| single_pred (bb: loop->latch) != loop_exit->src);
1715
1716	/ Record the new SSA names in the cache so that we can skip*
1717	materializing them again when we fill in the rest of the LC SSA
1718	variables. /*
1719	hash_map <tree, tree> new_phi_args;
1720	for (auto psi = gsi_start_phis (main_loop_exit_block);
1721	!gsi_end_p (i: psi); gsi_next (i: &psi))
1722	{
1723	gphi phi = psi;
1724	tree new_arg = gimple_phi_arg_def_from_edge (gs: phi, e: loop_exit);
1725	if (TREE_CODE (new_arg) != SSA_NAME)
1726	continue;
1727
1728	/ If the loop doesn't have a virtual def then only possibly keep*
1729	the epilog LC PHI for it and avoid creating new defs. /*
1730	if (virtual_operand_p (op: new_arg) && !need_virtual_phi)
1731	{
1732	auto gsi = gsi_for_stmt (phi);
1733	remove_phi_node (&gsi, true);
1734	continue;
1735	}
1736
1737	/ If we decided not to remove the PHI node we should also not*
1738	rematerialize it later on. /*
1739	new_phi_args.put (k: new_arg, v: gimple_phi_result (gs: phi));
1740	}
1741
1742	/ Create the merge PHI nodes in new_preheader and populate the*
1743	arguments for the exits. /*
1744	if (multiple_exits_p \|\| uncounted_p)
1745	{
1746	for (auto gsi_from = gsi_start_phis (loop->header),
1747	gsi_to = gsi_start_phis (new_loop->header);
1748	!gsi_end_p (i: gsi_from) && !gsi_end_p (i: gsi_to);
1749	gsi_next (i: &gsi_from), gsi_next (i: &gsi_to))
1750	{
1751	gimple *from_phi = gsi_stmt (i: gsi_from);
1752	gimple *to_phi = gsi_stmt (i: gsi_to);
1753
1754	/ When the vector loop is peeled then we need to use the*
1755	value at start of the loop, otherwise the main loop exit
1756	should use the final iter value. /*
1757	tree new_arg;
1758	if (peeled_iters)
1759	new_arg = gimple_phi_result (gs: from_phi);
1760	else
1761	new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1762	loop_latch_edge (loop));
1763
1764	/ Check if we've already created a new phi node during edge*
1765	redirection and re-use it if so. Otherwise create a
1766	LC PHI node to feed the merge PHI. /*
1767	tree *res;
1768	if (virtual_operand_p (op: new_arg))
1769	{
1770	/ Use the existing virtual LC SSA from exit block. /
1771	gphi *vphi = get_virtual_phi (main_loop_exit_block);
1772	new_arg = gimple_phi_result (gs: vphi);
1773	}
1774	else if ((res = new_phi_args.get (k: new_arg)))
1775	new_arg = *res;
1776	else
1777	{
1778	/ Create the LC PHI node for the exit. /
1779	tree new_def = copy_ssa_name (var: new_arg);
1780	gphi *lc_phi
1781	= create_phi_node (new_def, main_loop_exit_block);
1782	SET_PHI_ARG_DEF (lc_phi, `0`, new_arg);
1783	new_arg = new_def;
1784	}
1785
1786	/ Create the PHI node in the merge block merging the*
1787	main and early exit values. /*
1788	tree new_res = copy_ssa_name (var: gimple_phi_result (gs: from_phi));
1789	gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
1790	edge main_e = single_succ_edge (bb: main_loop_exit_block);
1791	SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, main_e, new_arg);
1792
1793	/ And adjust the epilog entry value. /
1794	adjust_phi_and_debug_stmts (update_phi: to_phi, e: loop_entry, new_def: new_res);
1795	}
1796	}
1797
1798	if (multiple_exits_p)
1799	{
1800	/ After creating the merge PHIs handle the early exits those*
1801	should use the values at the start of the loop. /*
1802	for (auto gsi_from = gsi_start_phis (loop->header),
1803	gsi_to = gsi_start_phis (new_preheader);
1804	!gsi_end_p (i: gsi_from) && !gsi_end_p (i: gsi_to);
1805	gsi_next (i: &gsi_from), gsi_next (i: &gsi_to))
1806	{
1807	gimple *from_phi = gsi_stmt (i: gsi_from);
1808	gimple *to_phi = gsi_stmt (i: gsi_to);
1809
1810	/ Now update the virtual PHI nodes with the right value. /
1811	tree alt_arg = gimple_phi_result (gs: from_phi);
1812	if (virtual_operand_p (op: alt_arg))
1813	{
1814	gphi *vphi = get_virtual_phi (alt_loop_exit_block);
1815	alt_arg = gimple_phi_result (gs: vphi);
1816	}
1817	/ For other live args we didn't create LC PHI nodes.*
1818	Do so here. /*
1819	else
1820	{
1821	tree alt_def = copy_ssa_name (var: alt_arg);
1822	gphi *lc_phi
1823	= create_phi_node (alt_def, alt_loop_exit_block);
1824	for (unsigned i = `0`; i < gimple_phi_num_args (gs: lc_phi);
1825	++i)
1826	SET_PHI_ARG_DEF (lc_phi, i, alt_arg);
1827	alt_arg = alt_def;
1828	}
1829	edge alt_e = single_succ_edge (bb: alt_loop_exit_block);
1830	SET_PHI_ARG_DEF_ON_EDGE (to_phi, alt_e, alt_arg);
1831	}
1832	}
1833	/ For the single exit case only create the missing LC PHI nodes*
1834	for the continuation of the loop IVs that are not also already
1835	reductions and thus had LC PHI nodes on the exit already. /*
1836	if (!multiple_exits_p && !uncounted_p)
1837	{
1838	for (auto gsi_from = gsi_start_phis (loop->header),
1839	gsi_to = gsi_start_phis (new_loop->header);
1840	!gsi_end_p (i: gsi_from) && !gsi_end_p (i: gsi_to);
1841	gsi_next (i: &gsi_from), gsi_next (i: &gsi_to))
1842	{
1843	gimple *from_phi = gsi_stmt (i: gsi_from);
1844	gimple *to_phi = gsi_stmt (i: gsi_to);
1845	tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1846	loop_latch_edge (loop));
1847
1848	/ Check if we've already created a new phi node during edge*
1849	redirection. If we have, only propagate the value
1850	downwards. /*
1851	if (tree *res = new_phi_args.get (k: new_arg))
1852	{
1853	adjust_phi_and_debug_stmts (update_phi: to_phi, e: loop_entry, new_def: *res);
1854	continue;
1855	}
1856
1857	tree new_res = copy_ssa_name (var: gimple_phi_result (gs: from_phi));
1858	gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
1859	SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, loop_exit, new_arg);
1860	adjust_phi_and_debug_stmts (update_phi: to_phi, e: loop_entry, new_def: new_res);
1861	}
1862	}
1863	}
1864
1865	if (was_imm_dom \|\| duplicate_outer_loop)
1866	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
1867
1868	/ And remove the non-necessary forwarder again. Keep the other*
1869	one so we have a proper pre-header for the loop at the exit edge. /*
1870	redirect_edge_pred (single_succ_edge (bb: preheader),
1871	single_pred (bb: preheader));
1872	delete_basic_block (preheader);
1873	set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1874	loop_preheader_edge (scalar_loop)->src);
1875
1876	/ Finally after wiring the new epilogue we need to update its main exit*
1877	to the original function exit we recorded. Other exits are already
1878	correct. /*
1879	if (multiple_exits_p \|\| uncounted_p)
1880	{
1881	class loop *update_loop = new_loop;
1882	doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
1883	for (unsigned i = `0`; i < doms.length (); ++i)
1884	if (flow_bb_inside_loop_p (loop, doms [i]))
1885	doms.unordered_remove (ix: i);
1886
1887	for (edge e : get_loop_exit_edges (update_loop))
1888	{
1889	edge ex;
1890	edge_iterator ei;
1891	FOR_EACH_EDGE (ex, ei, e->dest->succs)
1892	{
1893	/ Find the first non-fallthrough block as fall-throughs can't*
1894	dominate other blocks. /*
1895	if (single_succ_p (bb: ex->dest))
1896	{
1897	doms.safe_push (obj: ex->dest);
1898	ex = single_succ_edge (bb: ex->dest);
1899	}
1900	doms.safe_push (obj: ex->dest);
1901	}
1902	doms.safe_push (obj: e->dest);
1903	}
1904
1905	iterate_fix_dominators (CDI_DOMINATORS, doms, false);
1906	if (updated_doms)
1907	updated_doms->safe_splice (src: doms);
1908	}
1909	}
1910	else / Add the copy at entry. /
1911	{
1912	/ Copy the current loop LC PHI nodes between the original loop exit*
1913	block and the new loop header. This allows us to later split the
1914	preheader block and still find the right LC nodes. /*
1915	if (flow_loops)
1916	for (auto gsi_from = gsi_start_phis (new_loop->header),
1917	gsi_to = gsi_start_phis (loop->header);
1918	!gsi_end_p (i: gsi_from) && !gsi_end_p (i: gsi_to);
1919	gsi_next (i: &gsi_from), gsi_next (i: &gsi_to))
1920	{
1921	gimple *from_phi = gsi_stmt (i: gsi_from);
1922	gimple *to_phi = gsi_stmt (i: gsi_to);
1923	tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1924	loop_latch_edge (new_loop));
1925	adjust_phi_and_debug_stmts (update_phi: to_phi, e: loop_preheader_edge (loop),
1926	new_def: new_arg);
1927	}
1928
1929	if (scalar_loop != loop)
1930	{
1931	/ Remove the non-necessary forwarder of scalar_loop again. /
1932	redirect_edge_pred (single_succ_edge (bb: preheader),
1933	single_pred (bb: preheader));
1934	delete_basic_block (preheader);
1935	set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1936	loop_preheader_edge (scalar_loop)->src);
1937	preheader = split_edge (loop_preheader_edge (loop));
1938	entry_e = single_pred_edge (bb: preheader);
1939	}
1940
1941	redirect_edge_and_branch_force (entry_e, new_preheader);
1942	flush_pending_stmts (entry_e);
1943	set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1944
1945
1946	/ `vect_set_loop_condition' replaces the condition in the main exit of*
1947	loop. For counted loops, this is the IV counting exit, so in the case
1948	of the prolog loop, we are replacing the old IV counting exit limit of
1949	total loop niters for the new limit of the prolog niters, as desired.
1950	For uncounted loops, we don't have an IV-counting exit to replace, so
1951	we add a dummy exit to be consumed by `vect_set_loop_condition' later
1952	on. /*
1953	if (create_main_e)
1954	{
1955	edge to_latch_e = single_pred_edge (bb: new_loop->latch);
1956	bool latch_is_false = to_latch_e->flags & EDGE_FALSE_VALUE ? true
1957	: false;
1958
1959	/ Add new bb for duplicate exit. /
1960	basic_block bbcond = split_edge (to_latch_e);
1961	gimple_stmt_iterator a = gsi_last_bb (bb: bbcond);
1962
1963	/ Fix flags for the edge leading to the latch. /
1964	to_latch_e = find_edge (bbcond, new_loop->latch);
1965	to_latch_e->flags &= ~EDGE_FALLTHRU;
1966	to_latch_e->flags \|= latch_is_false ? EDGE_FALSE_VALUE
1967	: EDGE_TRUE_VALUE;
1968
1969	/ Build the condition. /
1970	tree cone = build_int_cst (sizetype, `1`);
1971	tree czero = build_int_cst (sizetype, `0`);
1972	gcond *cond_copy = gimple_build_cond (NE_EXPR, cone, czero, NULL_TREE,
1973	NULL_TREE);
1974
1975	gsi_insert_after (&a, cond_copy, GSI_NEW_STMT);
1976
1977	/ Add edge for exiting the loop via new condition. /
1978	edge dup_exit = make_edge (bbcond, new_exit->dest, latch_is_false
1979	? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE);
1980
1981	profile_probability probability = profile_probability::even ();
1982	to_latch_e->probability = dup_exit->probability = probability;
1983
1984	set_immediate_dominator (CDI_DOMINATORS, dup_exit->src,
1985	new_exit->src);
1986	new_exit = dup_exit;
1987	*new_e = new_exit;
1988	}
1989
1990	redirect_edge_and_branch_force (new_exit, preheader);
1991	flush_pending_stmts (new_exit);
1992	set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1993
1994	/ And remove the non-necessary forwarder again. Keep the other*
1995	one so we have a proper pre-header for the loop at the exit edge. /*
1996	redirect_edge_pred (single_succ_edge (bb: new_preheader),
1997	single_pred (bb: new_preheader));
1998	delete_basic_block (new_preheader);
1999	set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
2000	loop_preheader_edge (new_loop)->src);
2001
2002	/ Update dominators for multiple exits. /
2003	if (multiple_exits_p \|\| create_main_e)
2004	{
2005	for (edge alt_e : loop_exits)
2006	{
2007	if ((alt_e == loop_exit) && !create_main_e)
2008	continue;
2009	basic_block old_dom
2010	= get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
2011	if (flow_bb_inside_loop_p (loop, old_dom))
2012	{
2013	auto_vec<basic_block, `8`> queue;
2014	for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
2015	son; son = next_dom_son (CDI_DOMINATORS, son))
2016	if (!flow_bb_inside_loop_p (loop, son))
2017	queue.safe_push (obj: son);
2018	for (auto son : queue)
2019	set_immediate_dominator (CDI_DOMINATORS,
2020	son, get_bb_copy (old_dom));
2021	}
2022	}
2023	}
2024
2025	/ When loop_exit != scalar_exit due to if-conversion loop versioning,*
2026	the `scalar_exit' now has two incoming edges, one from the if-converted
2027	and one from the peeled prolog loop. It is therefore dominated by a
2028	common block between these. Update its dominator accordingly. /*
2029	if (create_main_e && loop_exit != scalar_exit)
2030	set_immediate_dominator (CDI_DOMINATORS, scalar_exit->dest,
2031	recompute_dominator (CDI_DOMINATORS,
2032	scalar_exit->dest));
2033	}
2034
2035	free (ptr: new_bbs);
2036	free (ptr: bbs);
2037
2038	checking_verify_dominators (dir: CDI_DOMINATORS);
2039
2040	return new_loop;
2041	}
2042
2043
2044	/ Given the condition expression COND, put it as the last statement of*
2045	GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
2046	DOM_BB; return the skip edge. GUARD_TO is the target basic block to
2047	skip the loop. PROBABILITY is the skip edge's probability. Mark the
2048	new edge as irreducible if IRREDUCIBLE_P is true. /*
2049
2050	static edge
2051	slpeel_add_loop_guard (basic_block guard_bb, tree cond,
2052	basic_block guard_to, basic_block dom_bb,
2053	profile_probability probability, bool irreducible_p)
2054	{
2055	gimple_stmt_iterator gsi;
2056	edge new_e, enter_e;
2057	gcond *cond_stmt;
2058	gimple_seq gimplify_stmt_list = NULL;
2059
2060	enter_e = EDGE_SUCC (guard_bb, `0`);
2061	enter_e->flags &= ~EDGE_FALLTHRU;
2062	enter_e->flags \|= EDGE_FALSE_VALUE;
2063	gsi = gsi_last_bb (bb: guard_bb);
2064
2065	cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
2066	is_gimple_condexpr_for_cond, NULL_TREE);
2067	if (gimplify_stmt_list)
2068	gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
2069
2070	cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
2071	gsi = gsi_last_bb (bb: guard_bb);
2072	gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
2073
2074	/ Add new edge to connect guard block to the merge/loop-exit block. /
2075	new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
2076
2077	new_e->probability = probability;
2078	if (irreducible_p)
2079	new_e->flags \|= EDGE_IRREDUCIBLE_LOOP;
2080
2081	enter_e->probability = probability.invert ();
2082	set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
2083
2084	/ Split enter_e to preserve LOOPS_HAVE_PREHEADERS. /
2085	if (enter_e->dest->loop_father->header == enter_e->dest)
2086	split_edge (enter_e);
2087
2088	return new_e;
2089	}
2090
2091
2092	/ This function verifies that the following restrictions apply to LOOP:*
2093	(1) it consists of exactly 2 basic blocks - header, and an empty latch
2094	for innermost loop and 5 basic blocks for outer-loop.
2095	(2) it is single entry, single exit
2096	(3) its exit condition is the last stmt in the header
2097	(4) E is the entry/exit edge of LOOP.
2098	*/
2099
2100	bool
2101	slpeel_can_duplicate_loop_p (const class loop *loop, const_edge exit_e,
2102	const_edge e)
2103	{
2104	edge entry_e = loop_preheader_edge (loop);
2105	gcond *orig_cond = get_loop_exit_condition (exit_e);
2106	gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (bb: exit_e->src);
2107
2108	/ All loops have an outer scope; the only case loop->outer is NULL is for*
2109	the function itself. /*
2110	if (!loop_outer (loop)
2111	\|\| !empty_block_p (loop->latch)
2112	\|\| !exit_e
2113	/ Verify that new loop exit condition can be trivially modified. /
2114	\|\| (!orig_cond \|\| orig_cond != gsi_stmt (i: loop_exit_gsi))
2115	\|\| (e != exit_e && e != entry_e))
2116	return false;
2117
2118	basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
2119	get_loop_body_with_size (loop, bbs, loop->num_nodes);
2120	bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
2121	free (ptr: bbs);
2122	return ret;
2123	}
2124
2125	/ Function find_loop_location.*
2126
2127	Extract the location of the loop in the source code.
2128	If the loop is not well formed for vectorization, an estimated
2129	location is calculated.
2130	Return the loop location if succeed and NULL if not. /*
2131
2132	dump_user_location_t
2133	find_loop_location (class loop *loop)
2134	{
2135	gimple *stmt = NULL;
2136	basic_block bb;
2137	gimple_stmt_iterator si;
2138
2139	if (!loop)
2140	return dump_user_location_t ();
2141
2142	/ For the root of the loop tree return the function location. /
2143	if (!loop_outer (loop))
2144	return dump_user_location_t::from_function_decl (cfun->decl);
2145
2146	if (loops_state_satisfies_p (flags: LOOPS_HAVE_RECORDED_EXITS))
2147	{
2148	/ We only care about the loop location, so use any exit with location*
2149	information. /*
2150	for (edge e : get_loop_exit_edges (loop))
2151	{
2152	stmt = get_loop_exit_condition (e);
2153
2154	if (stmt
2155	&& LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
2156	return stmt;
2157	}
2158	}
2159
2160	/ If we got here the loop is probably not "well formed",*
2161	try to estimate the loop location /*
2162
2163	if (!loop->header)
2164	return dump_user_location_t ();
2165
2166	bb = loop->header;
2167
2168	for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
2169	{
2170	stmt = gsi_stmt (i: si);
2171	if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
2172	return stmt;
2173	}
2174
2175	return dump_user_location_t ();
2176	}
2177
2178	/ Return true if the phi described by STMT_INFO defines an IV of the*
2179	loop to be vectorized. /*
2180
2181	static bool
2182	iv_phi_p (stmt_vec_info stmt_info)
2183	{
2184	gphi phi = as_a <gphi > (p: stmt_info->stmt);
2185	if (virtual_operand_p (PHI_RESULT (phi)))
2186	return false;
2187
2188	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
2189	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
2190	return false;
2191
2192	return true;
2193	}
2194
2195	/ Return true if vectorizer can peel for nonlinear iv. /
2196	static bool
2197	vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
2198	stmt_vec_info stmt_info)
2199	{
2200	enum vect_induction_op_type induction_type
2201	= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
2202	tree niters_skip;
2203	/ Init_expr will be update by vect_update_ivs_after_vectorizer,*
2204	if niters or vf is unkown:
2205	For shift, when shift mount >= precision, there would be UD.
2206	For mult, don't known how to generate
2207	init_expr pow (step, niters) for variable niters.*
2208	For neg unknown niters are ok, since niters of vectorized main loop
2209	will always be multiple of 2.
2210	See also PR113163, PR114196 and PR114485. /*
2211	if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
2212	\|\| LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2213	\|\| (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2214	&& induction_type != vect_step_op_neg))
2215	{
2216	if (dump_enabled_p ())
2217	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2218	"Peeling for epilogue is not supported"
2219	" for this nonlinear induction"
2220	" when iteration count is unknown or"
2221	" when using partial vectorization.\n");
2222	return false;
2223	}
2224
2225	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
2226	&& induction_type == vect_step_op_mul)
2227	{
2228	if (dump_enabled_p ())
2229	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2230	"Peeling for is not supported for nonlinear mult"
2231	" induction using partial vectorization.\n");
2232	return false;
2233	}
2234
2235	/ Avoid compile time hog on vect_peel_nonlinear_iv_init. /
2236	if (induction_type == vect_step_op_mul)
2237	{
2238	tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
2239	tree type = TREE_TYPE (step_expr);
2240
2241	if (wi::exact_log2 (wi::to_wide (t: step_expr)) == -`1`
2242	&& LOOP_VINFO_INT_NITERS(loop_vinfo) >= TYPE_PRECISION (type))
2243	{
2244	if (dump_enabled_p ())
2245	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246	"Avoid compile time hog on"
2247	" vect_peel_nonlinear_iv_init"
2248	" for nonlinear induction vec_step_op_mul"
2249	" when iteration count is too big.\n");
2250	return false;
2251	}
2252	}
2253
2254	/ Also doens't support peel for neg when niter is variable.*
2255	??? generate something like niter_expr & 1 ? init_expr : -init_expr? /*
2256	niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
2257	if ((niters_skip != NULL_TREE
2258	&& (TREE_CODE (niters_skip) != INTEGER_CST
2259	\|\| (HOST_WIDE_INT) TREE_INT_CST_LOW (niters_skip) < `0`))
2260	\|\| (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
2261	&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < `0`))
2262	{
2263	if (dump_enabled_p ())
2264	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2265	"Peeling for alignement is not supported"
2266	" for nonlinear induction when niters_skip"
2267	" is not constant.\n");
2268	return false;
2269	}
2270
2271	return true;
2272	}
2273
2274	/ Function vect_can_advance_ivs_p*
2275
2276	In case the number of iterations that LOOP iterates is unknown at compile
2277	time, an epilog loop will be generated, and the loop induction variables
2278	(IVs) will be "advanced" to the value they are supposed to take just before
2279	the epilog loop. Here we check that the access function of the loop IVs
2280	and the expression that represents the loop bound are simple enough.
2281	These restrictions will be relaxed in the future. /*
2282
2283	bool
2284	vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2285	{
2286	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2287	basic_block bb = loop->header;
2288	gphi_iterator gsi;
2289
2290	/ Analyze phi functions of the loop header. /
2291
2292	if (dump_enabled_p ())
2293	dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
2294	for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2295	{
2296	tree evolution_part;
2297	enum vect_induction_op_type induction_type;
2298
2299	gphi *phi = gsi.phi ();
2300	stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
2301	if (dump_enabled_p ())
2302	dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
2303	phi_info->stmt);
2304
2305	/ Skip virtual phi's. The data dependences that are associated with*
2306	virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
2307
2308	Skip reduction phis. /*
2309	if (!iv_phi_p (stmt_info: phi_info))
2310	{
2311	if (dump_enabled_p ())
2312	dump_printf_loc (MSG_NOTE, vect_location,
2313	"reduc or virtual phi. skip.\n");
2314	continue;
2315	}
2316
2317	induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
2318	if (induction_type != vect_step_op_add)
2319	{
2320	if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, stmt_info: phi_info))
2321	return false;
2322
2323	continue;
2324	}
2325
2326	/ Analyze the evolution function. /
2327
2328	evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
2329	if (evolution_part == NULL_TREE)
2330	{
2331	if (dump_enabled_p ())
2332	dump_printf (MSG_MISSED_OPTIMIZATION,
2333	"No access function or evolution.\n");
2334	return false;
2335	}
2336
2337	/ FORNOW: We do not transform initial conditions of IVs*
2338	which evolution functions are not invariants in the loop. /*
2339
2340	if (!expr_invariant_in_loop_p (loop, evolution_part))
2341	{
2342	if (dump_enabled_p ())
2343	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2344	"evolution not invariant in loop.\n");
2345	return false;
2346	}
2347
2348	/ FORNOW: We do not transform initial conditions of IVs*
2349	which evolution functions are a polynomial of degree >= 2. /*
2350
2351	if (tree_is_chrec (expr: evolution_part))
2352	{
2353	if (dump_enabled_p ())
2354	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2355	"evolution is chrec.\n");
2356	return false;
2357	}
2358	}
2359
2360	return true;
2361	}
2362
2363
2364	/ Function vect_update_ivs_after_vectorizer.*
2365
2366	"Advance" the induction variables of LOOP to the value they should take
2367	after the execution of LOOP. This is currently necessary because the
2368	vectorizer does not handle induction variables that are used after the
2369	loop. Such a situation occurs when the last iterations of LOOP are
2370	peeled, because:
2371	1. We introduced new uses after LOOP for IVs that were not originally used
2372	after LOOP: the IVs of LOOP are now used by an epilog loop.
2373	2. LOOP is going to be vectorized; this means that it will iterate N/VF
2374	times, whereas the loop IVs should be bumped N times.
2375
2376	Input:
2377	- LOOP - a loop that is going to be vectorized. The last few iterations
2378	of LOOP were peeled.
2379	- NITERS - the number of iterations that LOOP executes (before it is
2380	vectorized). i.e, the number of times the ivs should be bumped.
2381	- UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2382	coming out from LOOP on which there are uses of the LOOP ivs
2383	(this is the path from LOOP->exit to epilog_loop->preheader).
2384
2385	The new definitions of the ivs are placed in LOOP->exit.
2386	The phi args associated with the edge UPDATE_E in the bb
2387	UPDATE_E->dest are updated accordingly.
2388
2389	- EARLY_EXIT_P - Indicates whether the exit is an early exit rather than
2390	the main latch exit.
2391
2392	Assumption 1: Like the rest of the vectorizer, this function assumes
2393	a single loop exit that has a single predecessor.
2394
2395	Assumption 2: The phi nodes in the LOOP header and in update_bb are
2396	organized in the same order.
2397
2398	Assumption 3: The access function of the ivs is simple enough (see
2399	vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2400
2401	Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2402	coming out of LOOP on which the ivs of LOOP are used (this is the path
2403	that leads to the epilog loop; other paths skip the epilog loop). This
2404	path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2405	needs to have its phis updated.
2406	*/
2407
2408	static void
2409	vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
2410	tree niters, edge update_e,
2411	bool early_exit_p)
2412	{
2413	gphi_iterator gsi, gsi1;
2414	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2415	basic_block update_bb = update_e->dest;
2416	basic_block exit_bb = update_e->src;
2417	/ Check to see if this is an empty loop pre-header block. If it exists*
2418	we need to use the edge from that block -> loop header for updates but
2419	must use the original exit_bb to add any new adjustment because there
2420	can be a skip_epilog edge bypassing the epilog and so the loop pre-header
2421	too. /*
2422	if (empty_block_p (update_bb) && single_succ_p (bb: update_bb))
2423	{
2424	update_e = single_succ_edge (bb: update_bb);
2425	update_bb = update_e->dest;
2426	}
2427	gimple_stmt_iterator last_gsi = gsi_last_bb (bb: exit_bb);
2428
2429	for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
2430	!gsi_end_p (i: gsi) && !gsi_end_p (i: gsi1);
2431	gsi_next (i: &gsi), gsi_next (i: &gsi1))
2432	{
2433	tree init_expr;
2434	tree step_expr, off;
2435	tree type;
2436	tree var, ni, ni_name;
2437
2438	gphi *phi = gsi.phi ();
2439	gphi *phi1 = gsi1.phi ();
2440	stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
2441	if (dump_enabled_p ())
2442	dump_printf_loc (MSG_NOTE, vect_location,
2443	"vect_update_ivs_after_vectorizer: phi: %G",
2444	(gimple *) phi);
2445
2446	/ Skip reduction and virtual phis. /
2447	if (!iv_phi_p (stmt_info: phi_info))
2448	{
2449	if (dump_enabled_p ())
2450	dump_printf_loc (MSG_NOTE, vect_location,
2451	"reduc or virtual phi. skip.\n");
2452	continue;
2453	}
2454
2455	type = TREE_TYPE (gimple_phi_result (phi));
2456	step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
2457	step_expr = unshare_expr (step_expr);
2458
2459	/ FORNOW: We do not support IVs whose evolution function is a polynomial*
2460	of degree >= 2 or exponential. /*
2461	gcc_assert (!tree_is_chrec (step_expr));
2462
2463	init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2464	gimple_seq stmts = NULL;
2465	enum vect_induction_op_type induction_type
2466	= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
2467
2468	if (induction_type == vect_step_op_add)
2469	{
2470	tree stype = TREE_TYPE (step_expr);
2471	off = fold_build2 (MULT_EXPR, stype,
2472	fold_convert (stype, niters), step_expr);
2473
2474	if (POINTER_TYPE_P (type))
2475	ni = fold_build_pointer_plus (init_expr, off);
2476	else
2477	ni = fold_convert (type,
2478	fold_build2 (PLUS_EXPR, stype,
2479	fold_convert (stype, init_expr),
2480	off));
2481	}
2482	/ Don't bother call vect_peel_nonlinear_iv_init. /
2483	else if (induction_type == vect_step_op_neg)
2484	ni = init_expr;
2485	else
2486	ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
2487	niters, step_expr,
2488	induction_type, early_exit_p);
2489
2490	var = create_tmp_var (type, "tmp");
2491
2492	gimple_seq new_stmts = NULL;
2493	ni_name = force_gimple_operand (ni, &new_stmts, false, var);
2494
2495	/ Exit_bb shouldn't be empty, but we also can't insert after a ctrl*
2496	statements. /*
2497	if (!gsi_end_p (i: last_gsi) && !is_ctrl_stmt (gsi_stmt (i: last_gsi)))
2498	{
2499	gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
2500	gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
2501	}
2502	else
2503	{
2504	gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
2505	gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
2506	}
2507
2508	/ Update the PHI argument on the requested edge. /
2509	adjust_phi_and_debug_stmts (update_phi: phi1, e: update_e, new_def: ni_name);
2510	}
2511	}
2512
2513	/ Return a gimple value containing the misalignment (measured in vector*
2514	elements) for the loop described by LOOP_VINFO, i.e. how many elements
2515	it is away from a perfectly aligned address. Add any new statements
2516	to SEQ. /*
2517
2518	static tree
2519	get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
2520	{
2521	dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2522	stmt_vec_info stmt_info = dr_info->stmt;
2523	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2524
2525	poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2526	unsigned HOST_WIDE_INT target_align_c;
2527	tree target_align_minus_1;
2528
2529	bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2530	size_zero_node) < `0`;
2531	tree offset = (negative
2532	? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + `1`)
2533	* TREE_INT_CST_LOW
2534	(TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
2535	: size_zero_node);
2536	tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
2537	stmt_info, seq,
2538	offset);
2539	tree type = unsigned_type_for (TREE_TYPE (start_addr));
2540	if (target_align.is_constant (const_value: &target_align_c))
2541	target_align_minus_1 = build_int_cst (type, target_align_c - `1`);
2542	else
2543	{
2544	tree vla = build_int_cst (type, target_align);
2545	target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla,
2546	build_int_cst (type, `1`));
2547	}
2548
2549	HOST_WIDE_INT elem_size
2550	= int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2551	tree elem_size_log = build_int_cst (type, exact_log2 (x: elem_size));
2552
2553	/ Create: misalign_in_bytes = addr & (target_align - 1). /
2554	tree int_start_addr = fold_convert (type, start_addr);
2555	tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
2556	target_align_minus_1);
2557
2558	/ Create: misalign_in_elems = misalign_in_bytes / element_size. /
2559	tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
2560	elem_size_log);
2561
2562	return misalign_in_elems;
2563	}
2564
2565	/ Function vect_gen_prolog_loop_niters*
2566
2567	Generate the number of iterations which should be peeled as prolog for the
2568	loop represented by LOOP_VINFO. It is calculated as the misalignment of
2569	DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
2570	As a result, after the execution of this loop, the data reference DR will
2571	refer to an aligned location. The following computation is generated:
2572
2573	If the misalignment of DR is known at compile time:
2574	addr_mis = int mis = DR_MISALIGNMENT (dr);
2575	Else, compute address misalignment in bytes:
2576	addr_mis = addr & (target_align - 1)
2577
2578	prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
2579
2580	(elem_size = element type size; an element is the scalar element whose type
2581	is the inner type of the vectype)
2582
2583	The computations will be emitted at the end of BB. We also compute and
2584	store upper bound (included) of the result in BOUND.
2585
2586	When the step of the data-ref in the loop is not 1 (as in interleaved data
2587	and SLP), the number of iterations of the prolog must be divided by the step
2588	(which is equal to the size of interleaved group).
2589
2590	The above formulas assume that VF == number of elements in the vector. This
2591	may not hold when there are multiple-types in the loop.
2592	In this case, for some data-references in the loop the VF does not represent
2593	the number of elements that fit in the vector. Therefore, instead of VF we
2594	use TYPE_VECTOR_SUBPARTS. /*
2595
2596	static tree
2597	vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
2598	basic_block bb, poly_int64 *bound)
2599	{
2600	dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2601	tree var;
2602	tree niters_type
2603	= LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo) ? sizetype
2604	: TREE_TYPE (LOOP_VINFO_NITERS
2605	(loop_vinfo));
2606	gimple_seq stmts = NULL, new_stmts = NULL;
2607	tree iters, iters_name;
2608	stmt_vec_info stmt_info = dr_info->stmt;
2609	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2610	poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2611
2612	if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > `0`)
2613	{
2614	int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2615
2616	if (dump_enabled_p ())
2617	dump_printf_loc (MSG_NOTE, vect_location,
2618	"known peeling = %d.\n", npeel);
2619
2620	iters = build_int_cst (niters_type, npeel);
2621	*bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2622	}
2623	else
2624	{
2625	tree misalign_in_elems = get_misalign_in_elems (seq: &stmts, loop_vinfo);
2626	tree type = TREE_TYPE (misalign_in_elems);
2627	HOST_WIDE_INT elem_size
2628	= int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2629	/ We only do prolog peeling if the target alignment is known at compile*
2630	time. /*
2631	poly_uint64 align_in_elems =
2632	exact_div (a: target_align, b: elem_size);
2633	tree align_in_elems_minus_1 =
2634	build_int_cst (type, align_in_elems - `1`);
2635	tree align_in_elems_tree = build_int_cst (type, align_in_elems);
2636
2637	/ Create: (niters_type) ((align_in_elems - misalign_in_elems)*
2638	& (align_in_elems - 1)). /*
2639	bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2640	size_zero_node) < `0`;
2641	if (negative)
2642	iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
2643	align_in_elems_tree);
2644	else
2645	iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
2646	misalign_in_elems);
2647	iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
2648	iters = fold_convert (niters_type, iters);
2649	*bound = align_in_elems;
2650	}
2651
2652	if (dump_enabled_p ())
2653	dump_printf_loc (MSG_NOTE, vect_location,
2654	"niters for prolog loop: %T\n", iters);
2655
2656	var = create_tmp_var (niters_type, "prolog_loop_niters");
2657	iters_name = force_gimple_operand (iters, &new_stmts, false, var);
2658
2659	if (new_stmts)
2660	gimple_seq_add_seq (&stmts, new_stmts);
2661	if (stmts)
2662	{
2663	gcc_assert (single_succ_p (bb));
2664	gimple_stmt_iterator gsi = gsi_last_bb (bb);
2665	if (gsi_end_p (i: gsi))
2666	gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2667	else
2668	gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2669	}
2670	return iters_name;
2671	}
2672
2673
2674	/ Function vect_update_init_of_dr*
2675
2676	If CODE is PLUS, the vector loop starts NITERS iterations after the
2677	scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
2678	iterations before the scalar one (using masking to skip inactive
2679	elements). This function updates the information recorded in DR to
2680	account for the difference. Specifically, it updates the OFFSET
2681	field of DR_INFO. /*
2682
2683	static void
2684	vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
2685	{
2686	struct data_reference *dr = dr_info->dr;
2687	tree offset = dr_info->offset;
2688	if (!offset)
2689	offset = build_zero_cst (sizetype);
2690
2691	niters = fold_build2 (MULT_EXPR, sizetype,
2692	fold_convert (sizetype, niters),
2693	fold_convert (sizetype, DR_STEP (dr)));
2694	offset = fold_build2 (code, sizetype,
2695	fold_convert (sizetype, offset), niters);
2696	dr_info->offset = offset;
2697	}
2698
2699
2700	/ Function vect_update_inits_of_drs*
2701
2702	Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
2703	CODE and NITERS are as for vect_update_inits_of_dr. /*
2704
2705	void
2706	vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
2707	tree_code code)
2708	{
2709	unsigned int i;
2710	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2711	struct data_reference *dr;
2712
2713	DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
2714
2715	/ Adjust niters to sizetype. We used to insert the stmts on loop preheader*
2716	here, but since we might use these niters to update the epilogues niters
2717	and data references we can't insert them here as this definition might not
2718	always dominate its uses. /*
2719	if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
2720	niters = fold_convert (sizetype, niters);
2721
2722	FOR_EACH_VEC_ELT (datarefs, i, dr)
2723	{
2724	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2725	if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
2726	&& !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
2727	vect_update_init_of_dr (dr_info, niters, code);
2728	}
2729	}
2730
2731	/ For the information recorded in LOOP_VINFO prepare the loop for peeling*
2732	by masking. This involves calculating the number of iterations to
2733	be peeled and then aligning all memory references appropriately. /*
2734
2735	void
2736	vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
2737	{
2738	tree misalign_in_elems;
2739	tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
2740
2741	gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
2742
2743	/ From the information recorded in LOOP_VINFO get the number of iterations*
2744	that need to be skipped via masking. /*
2745	if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > `0`)
2746	{
2747	poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2748	- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
2749	misalign_in_elems = build_int_cst (type, misalign);
2750	}
2751	else
2752	{
2753	gimple_seq seq1 = NULL, seq2 = NULL;
2754	misalign_in_elems = get_misalign_in_elems (seq: &seq1, loop_vinfo);
2755	misalign_in_elems = fold_convert (type, misalign_in_elems);
2756	misalign_in_elems = force_gimple_operand (misalign_in_elems,
2757	&seq2, true, NULL_TREE);
2758	gimple_seq_add_seq (&seq1, seq2);
2759	if (seq1)
2760	{
2761	edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2762	basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
2763	gcc_assert (!new_bb);
2764	}
2765	}
2766
2767	if (dump_enabled_p ())
2768	dump_printf_loc (MSG_NOTE, vect_location,
2769	"misalignment for fully-masked loop: %T\n",
2770	misalign_in_elems);
2771
2772	LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
2773
2774	vect_update_inits_of_drs (loop_vinfo, niters: misalign_in_elems, code: MINUS_EXPR);
2775	}
2776
2777	/ This function builds ni_name = number of iterations. Statements*
2778	are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
2779	it to TRUE if new ssa_var is generated. /*
2780
2781	tree
2782	vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
2783	{
2784	if (LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
2785	return NULL_TREE;
2786	tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2787	if (TREE_CODE (ni) == INTEGER_CST)
2788	return ni;
2789	else
2790	{
2791	tree ni_name, var;
2792	gimple_seq stmts = NULL;
2793	edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2794
2795	var = create_tmp_var (TREE_TYPE (ni), "niters");
2796	ni_name = force_gimple_operand (ni, &stmts, false, var);
2797	if (stmts)
2798	{
2799	gsi_insert_seq_on_edge_immediate (pe, stmts);
2800	if (new_var_p != NULL)
2801	new_var_p = true*;
2802	}
2803
2804	return ni_name;
2805	}
2806	}
2807
2808	/ Calculate the number of iterations above which vectorized loop will be*
2809	preferred than scalar loop. NITERS_PROLOG is the number of iterations
2810	of prolog loop. If it's integer const, the integer number is also passed
2811	in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
2812	number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
2813	value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
2814	threshold below which the scalar (rather than vectorized) loop will be
2815	executed. This function stores the upper bound (inclusive) of the result
2816	in BOUND_SCALAR. /*
2817
2818	static tree
2819	vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
2820	poly_int64 bound_prolog, poly_int64 bound_epilog,
2821	int th, poly_uint64 *bound_scalar,
2822	bool check_profitability)
2823	{
2824	tree type = TREE_TYPE (niters_prolog);
2825	tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
2826	build_int_cst (type, bound_epilog));
2827
2828	*bound_scalar = bound_prolog + bound_epilog;
2829	if (check_profitability)
2830	{
2831	/ TH indicates the minimum niters of vectorized loop, while we*
2832	compute the maximum niters of scalar loop. /*
2833	th--;
2834	/ Peeling for constant times. /
2835	if (int_niters_prolog >= `0`)
2836	{
2837	*bound_scalar = upper_bound (a: int_niters_prolog + bound_epilog, b: th);
2838	return build_int_cst (type, *bound_scalar);
2839	}
2840	/ Peeling an unknown number of times. Note that both BOUND_PROLOG*
2841	and BOUND_EPILOG are inclusive upper bounds. /*
2842	if (known_ge (th, bound_prolog + bound_epilog))
2843	{
2844	*bound_scalar = th;
2845	return build_int_cst (type, th);
2846	}
2847	/ Need to do runtime comparison. /
2848	else if (maybe_gt (th, bound_epilog))
2849	{
2850	bound_scalar = upper_bound (a: bound_scalar, b: th);
2851	return fold_build2 (MAX_EXPR, type,
2852	build_int_cst (type, th), niters);
2853	}
2854	}
2855	return niters;
2856	}
2857
2858	/ NITERS is the number of times that the original scalar loop executes*
2859	after peeling. Work out the maximum number of iterations N that can
2860	be handled by the vectorized form of the loop and then either:
2861
2862	a) set STEP_VECTOR_PTR to the vectorization factor and generate:*
2863
2864	niters_vector = N
2865
2866	b) set STEP_VECTOR_PTR to one and generate:*
2867
2868	niters_vector = N / vf
2869
2870	In both cases, store niters_vector in NITERS_VECTOR_PTR and add*
2871	any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
2872	is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).
2873
2874	Case (a) is used for LOOP_VINFO_USING_PARTIAL_VECTORS_P or if VF is
2875	variable. As stated above, NITERS_VECTOR then equals the number
2876	of scalar iterations and vect_set_loop_condition will handle the
2877	step. As opposed to (b) we don't know anything about NITER_VECTOR's
2878	range here.
2879	*/
2880
2881	void
2882	vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
2883	tree niters_vector_ptr, tree step_vector_ptr,
2884	bool niters_no_overflow)
2885	{
2886	tree ni_minus_gap, var;
2887	tree niters_vector, step_vector;
2888	tree type = niters ? TREE_TYPE (niters) : sizetype;
2889	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2890	edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2891
2892	/ If epilogue loop is required because of data accesses with gaps, we*
2893	subtract one iteration from the total number of iterations here for
2894	correct calculation of RATIO. /*
2895	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2896	{
2897	ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
2898	build_one_cst (type));
2899	if (!is_gimple_val (ni_minus_gap))
2900	{
2901	var = create_tmp_var (type, "ni_gap");
2902	gimple *stmts = NULL;
2903	ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
2904	true, var);
2905	gsi_insert_seq_on_edge_immediate (pe, stmts);
2906	}
2907	}
2908	else
2909	ni_minus_gap = niters;
2910
2911	/ To silence some unexpected warnings, simply initialize to 0. /
2912	unsigned HOST_WIDE_INT const_vf = `0`;
2913	if (vf.is_constant (const_value: &const_vf)
2914	&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
2915	{
2916	/ Create: niters / vf, which is equivalent to niters >> log2(vf) when*
2917	vf is a power of two, and when not we approximate using a
2918	truncating division. /*
2919	/ If it's known that niters == number of latch executions + 1 doesn't*
2920	overflow, we can generate niters / vf; otherwise we generate
2921	(niters - vf) / vf + 1 by using the fact that we know ratio
2922	will be at least one. /*
2923	tree var_vf = build_int_cst (type, const_vf);
2924	if (niters_no_overflow)
2925	niters_vector = fold_build2 (TRUNC_DIV_EXPR, type, ni_minus_gap,
2926	var_vf);
2927	else
2928	niters_vector
2929	= fold_build2 (PLUS_EXPR, type,
2930	fold_build2 (TRUNC_DIV_EXPR, type,
2931	fold_build2 (MINUS_EXPR, type,
2932	ni_minus_gap,
2933	var_vf),
2934	var_vf),
2935	build_int_cst (type, `1`));
2936	step_vector = build_one_cst (type);
2937	}
2938	else
2939	{
2940	niters_vector = ni_minus_gap;
2941	step_vector = build_int_cst (type, vf);
2942	}
2943
2944	if (!is_gimple_val (niters_vector))
2945	{
2946	var = create_tmp_var (type, "bnd");
2947	gimple_seq stmts = NULL;
2948	niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
2949	gsi_insert_seq_on_edge_immediate (pe, stmts);
2950	/ Peeling algorithm guarantees that vector loop bound is at least ONE,*
2951	we set range information to make niters analyzer's life easier.
2952	Note the number of latch iteration value can be TYPE_MAX_VALUE so
2953	we have to represent the vector niter TYPE_MAX_VALUE + 1 / vf. /*
2954	if (stmts != NULL
2955	&& integer_onep (step_vector))
2956	{
2957	if (niters_no_overflow)
2958	{
2959	int_range<`1`> vr (type,
2960	wi::one (TYPE_PRECISION (type)),
2961	wi::div_trunc (x: wi::max_value
2962	(TYPE_PRECISION (type),
2963	TYPE_SIGN (type)),
2964	y: const_vf,
2965	TYPE_SIGN (type)));
2966	set_range_info (niters_vector, vr);
2967	}
2968	/ For VF == 1 the vector IV might also overflow so we cannot*
2969	assert a minimum value of 1. /*
2970	else if (const_vf > `1`)
2971	{
2972	int_range<`1`> vr (type,
2973	wi::one (TYPE_PRECISION (type)),
2974	wi::rshift (x: wi::max_value (TYPE_PRECISION (type),
2975	TYPE_SIGN (type))
2976	- (const_vf - `1`),
2977	y: exact_log2 (x: const_vf), TYPE_SIGN (type))
2978	+ `1`);
2979	set_range_info (niters_vector, vr);
2980	}
2981	}
2982	}
2983	*niters_vector_ptr = niters_vector;
2984	*step_vector_ptr = step_vector;
2985
2986	return;
2987	}
2988
2989	/ Given NITERS_VECTOR which is the number of iterations for vectorized*
2990	loop specified by LOOP_VINFO after vectorization, compute the number
2991	of iterations before vectorization (niters_vector vf) and store it*
2992	to NITERS_VECTOR_MULT_VF_PTR. /*
2993
2994	static void
2995	vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
2996	tree niters_vector,
2997	tree *niters_vector_mult_vf_ptr)
2998	{
2999	/ We should be using a step_vector of VF if VF is variable. /
3000	int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
3001	tree type = TREE_TYPE (niters_vector);
3002	tree tree_vf = build_int_cst (type, vf);
3003	basic_block exit_bb = LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest;
3004
3005	gcc_assert (niters_vector_mult_vf_ptr != NULL);
3006	tree niters_vector_mult_vf = fold_build2 (MULT_EXPR, type,
3007	niters_vector, tree_vf);
3008
3009	/ If we've peeled a vector iteration then subtract one full vector*
3010	iteration. /*
3011	if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
3012	niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type,
3013	niters_vector_mult_vf, tree_vf);
3014
3015	if (!is_gimple_val (niters_vector_mult_vf))
3016	{
3017	tree var = create_tmp_var (type, "niters_vector_mult_vf");
3018	gimple_seq stmts = NULL;
3019	niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
3020	&stmts, true, var);
3021	gimple_stmt_iterator gsi = gsi_start_bb (bb: exit_bb);
3022	gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
3023	}
3024	*niters_vector_mult_vf_ptr = niters_vector_mult_vf;
3025	}
3026
3027	/ Function slpeel_add_loop_guard adds guard skipping from the beginning*
3028	of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
3029	are two pred edges of the merge point before UPDATE_LOOP. The two loops
3030	appear like below:
3031
3032	guard_bb:
3033	if (cond)
3034	goto merge_bb;
3035	else
3036	goto skip_loop;
3037
3038	skip_loop:
3039	header_a:
3040	i_1 = PHI<i_0, i_2>;
3041	...
3042	i_2 = i_1 + 1;
3043	if (cond_a)
3044	goto latch_a;
3045	else
3046	goto exit_a;
3047	latch_a:
3048	goto header_a;
3049
3050	exit_a:
3051	i_5 = PHI<i_2>;
3052
3053	merge_bb:
3054	;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
3055
3056	update_loop:
3057	header_b:
3058	i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
3059	...
3060	i_4 = i_3 + 1;
3061	if (cond_b)
3062	goto latch_b;
3063	else
3064	goto exit_bb;
3065	latch_b:
3066	goto header_b;
3067
3068	exit_bb:
3069
3070	This function creates PHI nodes at merge_bb and replaces the use of i_5
3071	in the update_loop's PHI node with the result of new PHI result. /*
3072
3073	static void
3074	slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
3075	class loop *update_loop,
3076	edge guard_edge, edge merge_edge)
3077	{
3078	location_t merge_loc, guard_loc;
3079	edge orig_e = loop_preheader_edge (skip_loop);
3080	edge update_e = loop_preheader_edge (update_loop);
3081	gphi_iterator gsi_orig, gsi_update;
3082
3083	for ((gsi_orig = gsi_start_phis (skip_loop->header),
3084	gsi_update = gsi_start_phis (update_loop->header));
3085	!gsi_end_p (i: gsi_orig) && !gsi_end_p (i: gsi_update);
3086	gsi_next (i: &gsi_orig), gsi_next (i: &gsi_update))
3087	{
3088	gphi *orig_phi = gsi_orig.phi ();
3089	gphi *update_phi = gsi_update.phi ();
3090
3091	/ Generate new phi node at merge bb of the guard. /
3092	tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
3093	gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
3094
3095	/ Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the*
3096	args in NEW_PHI for these edges. /*
3097	tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
3098	tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
3099	merge_loc = gimple_phi_arg_location_from_edge (phi: update_phi, e: update_e);
3100	guard_loc = gimple_phi_arg_location_from_edge (phi: orig_phi, e: orig_e);
3101	add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
3102	add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
3103
3104	/ Update phi in UPDATE_PHI. /
3105	adjust_phi_and_debug_stmts (update_phi, e: update_e, new_def: new_res);
3106	}
3107	}
3108
3109	/ LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.*
3110	Return a value that equals:
3111
3112	- MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
3113	- SKIP_VALUE when the main loop is skipped. /*
3114
3115	tree
3116	vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
3117	tree skip_value)
3118	{
3119	gcc_assert (loop_vinfo->main_loop_edge);
3120
3121	tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
3122	basic_block bb = loop_vinfo->main_loop_edge->dest;
3123	gphi *new_phi = create_phi_node (phi_result, bb);
3124	add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
3125	UNKNOWN_LOCATION);
3126	add_phi_arg (new_phi, skip_value,
3127	loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
3128	return phi_result;
3129	}
3130
3131	/ Function vect_do_peeling.*
3132
3133	Input:
3134	- LOOP_VINFO: Represent a loop to be vectorized, which looks like:
3135
3136	preheader:
3137	LOOP:
3138	header_bb:
3139	loop_body
3140	if (exit_loop_cond) goto exit_bb
3141	else goto header_bb
3142	exit_bb:
3143
3144	- NITERS: The number of iterations of the loop.
3145	- NITERSM1: The number of iterations of the loop's latch.
3146	- NITERS_NO_OVERFLOW: No overflow in computing NITERS.
3147	- TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
3148	CHECK_PROFITABILITY is true.
3149	Output:
3150	- NITERS_VECTOR and STEP_VECTOR describe how the main loop should
3151	iterate after vectorization; see vect_set_loop_condition for details.
3152	- NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that*
3153	should be set to the number of scalar iterations handled by the
3154	vector loop. The SSA name is only used on exit from the loop.
3155
3156	This function peels prolog and epilog from the loop, adds guards skipping
3157	PROLOG and EPILOG for various conditions. As a result, the changed CFG
3158	would look like:
3159
3160	guard_bb_1:
3161	if (prefer_scalar_loop) goto merge_bb_1
3162	else goto guard_bb_2
3163
3164	guard_bb_2:
3165	if (skip_prolog) goto merge_bb_2
3166	else goto prolog_preheader
3167
3168	prolog_preheader:
3169	PROLOG:
3170	prolog_header_bb:
3171	prolog_body
3172	if (exit_prolog_cond) goto prolog_exit_bb
3173	else goto prolog_header_bb
3174	prolog_exit_bb:
3175
3176	merge_bb_2:
3177
3178	vector_preheader:
3179	VECTOR LOOP:
3180	vector_header_bb:
3181	vector_body
3182	if (exit_vector_cond) goto vector_exit_bb
3183	else goto vector_header_bb
3184	vector_exit_bb:
3185
3186	guard_bb_3:
3187	if (skip_epilog) goto merge_bb_3
3188	else goto epilog_preheader
3189
3190	merge_bb_1:
3191
3192	epilog_preheader:
3193	EPILOG:
3194	epilog_header_bb:
3195	epilog_body
3196	if (exit_epilog_cond) goto merge_bb_3
3197	else goto epilog_header_bb
3198
3199	merge_bb_3:
3200
3201	Note this function peels prolog and epilog only if it's necessary,
3202	as well as guards.
3203	This function returns the epilogue loop if a decision was made to vectorize
3204	it, otherwise NULL.
3205
3206	The analysis resulting in this epilogue loop's loop_vec_info was performed
3207	in the same vect_analyze_loop call as the main loop's. At that time
3208	vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
3209	vectorization factors than the main loop. This list is chained in the
3210	loop's loop_vec_info in the 'epilogue_vinfo' member. When we decide to
3211	vectorize the epilogue loop for a lower vectorization factor, the
3212	loop_vec_info in epilogue_vinfo is updated and linked to the epilogue loop.
3213	This is later used to vectorize the epilogue.
3214	The reason the loop_vec_info needs updating is that it was
3215	constructed based on the original main loop, and the epilogue loop is a
3216	copy of this loop, so all links pointing to statements in the original loop
3217	need updating. Furthermore, these loop_vec_infos share the
3218	data_reference's records, which will also need to be updated.
3219
3220	TODO: Guard for prefer_scalar_loop should be emitted along with
3221	versioning conditions if loop versioning is needed. /*
3222
3223
3224	class loop *
3225	vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
3226	tree niters_vector, tree step_vector,
3227	tree niters_vector_mult_vf_var, int* th,
3228	bool check_profitability, bool niters_no_overflow,
3229	tree *advance)
3230	{
3231	edge e, guard_e;
3232	tree type = niters ? TREE_TYPE (niters) : sizetype;
3233	tree guard_cond;
3234	basic_block guard_bb, guard_to;
3235	profile_probability prob_prolog, prob_vector, prob_epilog;
3236	int estimated_vf;
3237	int prolog_peeling = `0`;
3238	bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL;
3239	bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
3240
3241	if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
3242	prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3243
3244	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3245	poly_uint64 bound_epilog = `0`;
3246	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
3247	&& LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
3248	bound_epilog += vf - `1`;
3249	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3250	bound_epilog += `1`;
3251
3252	/ For early breaks the scalar loop needs to execute at most VF times*
3253	to find the element that caused the break. /*
3254	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3255	bound_epilog = vf;
3256
3257	bool epilog_peeling = maybe_ne (a: bound_epilog, b: `0U`);
3258	poly_uint64 bound_scalar = bound_epilog;
3259
3260	if (!prolog_peeling && !epilog_peeling)
3261	return NULL;
3262
3263	/ Before doing any peeling make sure to reset debug binds outside of*
3264	the loop referring to defs not in LC SSA. /*
3265	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3266	for (unsigned i = `0`; i < loop->num_nodes; ++i)
3267	{
3268	basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
3269	imm_use_iterator ui;
3270	gimple *use_stmt;
3271	for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
3272	gsi_next (i: &gsi))
3273	{
3274	FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
3275	if (gimple_debug_bind_p (s: use_stmt)
3276	&& loop != gimple_bb (g: use_stmt)->loop_father
3277	&& !flow_loop_nested_p (loop,
3278	gimple_bb (g: use_stmt)->loop_father))
3279	{
3280	gimple_debug_bind_reset_value (dbg: use_stmt);
3281	update_stmt (s: use_stmt);
3282	}
3283	}
3284	for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);
3285	gsi_next (i: &gsi))
3286	{
3287	ssa_op_iter op_iter;
3288	def_operand_p def_p;
3289	FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
3290	FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
3291	if (gimple_debug_bind_p (s: use_stmt)
3292	&& loop != gimple_bb (g: use_stmt)->loop_father
3293	&& !flow_loop_nested_p (loop,
3294	gimple_bb (g: use_stmt)->loop_father))
3295	{
3296	gimple_debug_bind_reset_value (dbg: use_stmt);
3297	update_stmt (s: use_stmt);
3298	}
3299	}
3300	}
3301
3302	prob_vector = profile_probability::guessed_always ().apply_scale (num: `9`, den: `10`);
3303	estimated_vf = vect_vf_for_cost (loop_vinfo);
3304	if (estimated_vf == `2`)
3305	estimated_vf = `3`;
3306	prob_prolog = prob_epilog = profile_probability::guessed_always ()
3307	.apply_scale (num: estimated_vf - `1`, den: estimated_vf);
3308
3309	class loop prolog = NULL, epilog = NULL;
3310	class loop *first_loop = loop;
3311	bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
3312
3313	/ SSA form needs to be up-to-date since we are going to manually*
3314	update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
3315	update SSA state after that, so we have to make sure to not lose any
3316	pending update needs. /*
3317	gcc_assert (!need_ssa_update_p (cfun));
3318
3319	/ If we're vectorizing an epilogue loop, we have ensured that the*
3320	virtual operand is in SSA form throughout the vectorized main loop.
3321	Normally it is possible to trace the updated
3322	vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
3323	back to scalar-stmt vuses, meaning that the effect of the SSA update
3324	remains local to the main loop. However, there are rare cases in
3325	which the vectorized loop should have vdefs even when the original scalar
3326	loop didn't. For example, vectorizing a load with IFN_LOAD_LANES
3327	introduces clobbers of the temporary vector array, which in turn
3328	needs new vdefs. If the scalar loop doesn't write to memory, these
3329	new vdefs will be the only ones in the vector loop.
3330	We are currently defering updating virtual SSA form and creating
3331	of a virtual PHI for this case so we do not have to make sure the
3332	newly introduced virtual def is in LCSSA form. /*
3333
3334	if (MAY_HAVE_DEBUG_BIND_STMTS)
3335	{
3336	gcc_assert (!adjust_vec.exists ());
3337	adjust_vec.create (nelems: `32`);
3338	}
3339	initialize_original_copy_tables ();
3340
3341	/ Record the anchor bb at which the guard should be placed if the scalar*
3342	loop might be preferred. /*
3343	basic_block anchor = loop_preheader_edge (loop)->src;
3344
3345	/ Generate the number of iterations for the prolog loop. We do this here*
3346	so that we can also get the upper bound on the number of iterations. /*
3347	tree niters_prolog;
3348	poly_int64 bound_prolog = `0`;
3349	if (prolog_peeling)
3350	{
3351	niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, bb: anchor,
3352	bound: &bound_prolog);
3353	/ If algonment peeling is known, we will always execute prolog. /
3354	if (TREE_CODE (niters_prolog) == INTEGER_CST)
3355	prob_prolog = profile_probability::always ();
3356	}
3357	else
3358	niters_prolog = build_int_cst (type, `0`);
3359
3360	loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
3361	tree niters_vector_mult_vf = NULL_TREE;
3362	/ Saving NITERs before the loop, as this may be changed by prologue. /
3363	tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
3364	edge update_e = NULL, skip_e = NULL;
3365	unsigned int lowest_vf = constant_lower_bound (a: vf);
3366	/ Prolog loop may be skipped. /
3367	bool skip_prolog = (prolog_peeling != `0`);
3368	/ Skip this loop to epilog when there are not enough iterations to enter this*
3369	vectorized loop. If true we should perform runtime checks on the NITERS
3370	to check whether we should skip the current vectorized loop. If we know
3371	the number of scalar iterations we may choose to add a runtime check if
3372	this number "maybe" smaller than the number of iterations required
3373	when we know the number of scalar iterations may potentially
3374	be smaller than the number of iterations required to enter this loop, for
3375	this we use the upper bounds on the prolog and epilog peeling. When we
3376	don't know the number of iterations and don't require versioning it is
3377	because we have asserted that there are enough scalar iterations to enter
3378	the main loop, so this skip is not necessary. When we are versioning then
3379	we only add such a skip if we have chosen to vectorize the epilogue. /*
3380	bool skip_vector = false;
3381	if (!uncounted_p)
3382	skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3383	? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
3384	b: bound_prolog + bound_epilog)
3385	: (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo)
3386	\|\| vect_epilogues));
3387
3388	/ Epilog loop must be executed if the number of iterations for epilog*
3389	loop is known at compile time, otherwise we need to add a check at
3390	the end of vector loop and skip to the end of epilog loop. /*
3391	bool skip_epilog = (prolog_peeling < `0`
3392	\|\| !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3393	\|\| !vf.is_constant ());
3394	/ PEELING_FOR_GAPS and peeling for early breaks are special because epilog*
3395	loop must be executed. /*
3396	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
3397	\|\| LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3398	skip_epilog = false;
3399
3400	class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3401	auto_vec<profile_count> original_counts;
3402	basic_block *original_bbs = NULL;
3403
3404	if (skip_vector)
3405	{
3406	split_edge (loop_preheader_edge (loop));
3407
3408	if (epilog_peeling && (vect_epilogues \|\| scalar_loop == NULL))
3409	{
3410	original_bbs = get_loop_body (loop);
3411	for (unsigned int i = `0`; i < loop->num_nodes; i++)
3412	original_counts.safe_push(obj: original_bbs[i]->count);
3413	}
3414
3415	/ Due to the order in which we peel prolog and epilog, we first*
3416	propagate probability to the whole loop. The purpose is to
3417	avoid adjusting probabilities of both prolog and vector loops
3418	separately. Note in this case, the probability of epilog loop
3419	needs to be scaled back later. /*
3420	basic_block bb_before_loop = loop_preheader_edge (loop)->src;
3421	if (prob_vector.initialized_p ())
3422	{
3423	scale_bbs_frequencies (&bb_before_loop, `1`, prob_vector);
3424	scale_loop_profile (loop, prob_vector, -`1`);
3425	}
3426	}
3427
3428	if (vect_epilogues)
3429	{
3430	/ Make sure to set the epilogue's epilogue scalar loop, such that we can*
3431	use the original scalar loop as remaining epilogue if necessary. /*
3432	LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
3433	= LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3434	LOOP_VINFO_SCALAR_MAIN_EXIT (epilogue_vinfo)
3435	= LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
3436	}
3437
3438	if (prolog_peeling)
3439	{
3440	e = loop_preheader_edge (loop);
3441	edge exit_e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
3442	gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, exit_e, e)
3443	&& (!LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
3444	\|\| uncounted_p));
3445
3446	/ Peel prolog and put it on preheader edge of loop. /
3447	edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
3448	edge prolog_e = NULL;
3449	prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loop_exit: exit_e,
3450	scalar_loop, scalar_exit: scalar_e,
3451	e, new_e: &prolog_e, flow_loops: true, NULL,
3452	uncounted_p: false, create_main_e: uncounted_p);
3453
3454	gcc_assert (prolog);
3455	prolog->force_vectorize = false;
3456
3457	/ Assign hierarchical discriminators to distinguish prolog loop. /
3458	gimple *prolog_last = last_nondebug_stmt (prolog->header);
3459	location_t prolog_loc
3460	= prolog_last ? gimple_location (g: prolog_last) : UNKNOWN_LOCATION;
3461	if (prolog_loc != UNKNOWN_LOCATION)
3462	{
3463	unsigned int prolog_copyid = allocate_copyid_base (loc: prolog_loc, stride: `1`);
3464	assign_discriminators_to_loop (loop: prolog, multiplicity_factor: `0`, copyid: prolog_copyid);
3465	}
3466	first_loop = prolog;
3467	reset_original_copy_tables ();
3468
3469	/ Update the number of iterations for prolog loop. /
3470	tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
3471	vect_set_loop_condition (loop: prolog, loop_e: prolog_e, NULL, niters: niters_prolog,
3472	step: step_prolog, NULL_TREE, niters_maybe_zero: false);
3473
3474	/ Skip the prolog loop. /
3475	if (skip_prolog)
3476	{
3477	guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3478	niters_prolog, build_int_cst (type, `0`));
3479	guard_bb = loop_preheader_edge (prolog)->src;
3480	basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
3481	guard_to = split_edge (loop_preheader_edge (loop));
3482	guard_e = slpeel_add_loop_guard (guard_bb, cond: guard_cond,
3483	guard_to, dom_bb: guard_bb,
3484	probability: prob_prolog.invert (),
3485	irreducible_p: irred_flag);
3486	for (edge alt_e : get_loop_exit_edges (prolog))
3487	{
3488	if (alt_e == prolog_e)
3489	continue;
3490	basic_block old_dom
3491	= get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
3492	if (flow_bb_inside_loop_p (prolog, old_dom))
3493	{
3494	auto_vec<basic_block, `8`> queue;
3495	for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
3496	son; son = next_dom_son (CDI_DOMINATORS, son))
3497	if (!flow_bb_inside_loop_p (prolog, son))
3498	queue.safe_push (obj: son);
3499	for (auto son : queue)
3500	set_immediate_dominator (CDI_DOMINATORS, son, guard_bb);
3501	}
3502	}
3503
3504	e = EDGE_PRED (guard_to, `0`);
3505	e = (e != guard_e ? e : EDGE_PRED (guard_to, `1`));
3506	slpeel_update_phi_nodes_for_guard1 (skip_loop: prolog, update_loop: loop, guard_edge: guard_e, merge_edge: e);
3507
3508	scale_bbs_frequencies (&bb_after_prolog, `1`, prob_prolog);
3509	scale_loop_profile (prolog, prob_prolog,
3510	estimated_poly_value (x: bound_prolog) - `1`);
3511	}
3512
3513	/ Update init address of DRs. /
3514	vect_update_inits_of_drs (loop_vinfo, niters: niters_prolog, code: PLUS_EXPR);
3515	if (!uncounted_p)
3516	{
3517	/ Update niters for vector loop. /
3518	LOOP_VINFO_NITERS (loop_vinfo)
3519	= fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
3520	LOOP_VINFO_NITERSM1 (loop_vinfo)
3521	= fold_build2 (MINUS_EXPR, type,
3522	LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
3523	}
3524	bool new_var_p = false;
3525	niters = vect_build_loop_niters (loop_vinfo, new_var_p: &new_var_p);
3526	/ It's guaranteed that vector loop bound before vectorization is at*
3527	least VF, so set range information for newly generated var. /*
3528	if (new_var_p)
3529	{
3530	int_range<`1`> vr (type,
3531	wi::to_wide (t: build_int_cst (type, lowest_vf)),
3532	wi::to_wide (TYPE_MAX_VALUE (type)));
3533	set_range_info (niters, vr);
3534	}
3535
3536	/ Prolog iterates at most bound_prolog times, latch iterates at*
3537	most bound_prolog - 1 times. /*
3538	if (bound_prolog.is_constant ())
3539	record_niter_bound (prolog, bound_prolog.to_constant () - `1`, false,
3540	true);
3541	delete_update_ssa ();
3542	adjust_vec_debug_stmts ();
3543	scev_reset ();
3544	}
3545	basic_block bb_before_epilog = NULL;
3546
3547	if (epilog_peeling)
3548	{
3549	e = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
3550	gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e, e));
3551
3552	/ Peel epilog and put it on exit edge of loop. If we are vectorizing*
3553	said epilog then we should use a copy of the main loop as a starting
3554	point. This loop may have already had some preliminary transformations
3555	to allow for more optimal vectorization, for example if-conversion.
3556	If we are not vectorizing the epilog then we should use the scalar loop
3557	as the transformations mentioned above make less or no sense when not
3558	vectorizing. /*
3559	edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
3560	epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
3561	edge epilog_e = vect_epilogues ? e : scalar_e;
3562	edge new_epilog_e = NULL;
3563	auto_vec<basic_block> doms;
3564	epilog
3565	= slpeel_tree_duplicate_loop_to_edge_cfg (loop, loop_exit: e, scalar_loop: epilog, scalar_exit: epilog_e, e,
3566	new_e: &new_epilog_e, flow_loops: true, updated_doms: &doms,
3567	uncounted_p);
3568
3569	LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e;
3570	gcc_assert (epilog);
3571	gcc_assert (new_epilog_e);
3572	epilog->force_vectorize = false;
3573	bb_before_epilog = loop_preheader_edge (epilog)->src;
3574
3575	/ Assign hierarchical discriminators to distinguish epilog loop.*
3576	Only assign if it's a scalar epilog. If it will be vectorized
3577	(vect_epilogues), discriminators will be assigned.
3578	Use dynamic copy_id allocation instead of hardcoded constants. /*
3579	if (!vect_epilogues)
3580	{
3581	gimple *epilog_last = last_nondebug_stmt (epilog->header);
3582	location_t epilog_loc
3583	= epilog_last ? gimple_location (g: epilog_last) : UNKNOWN_LOCATION;
3584	if (epilog_loc != UNKNOWN_LOCATION)
3585	{
3586	unsigned int epilog_copyid = allocate_copyid_base (loc: epilog_loc, stride: `1`);
3587	assign_discriminators_to_loop (loop: epilog, multiplicity_factor: `0`, copyid: epilog_copyid);
3588	}
3589	}
3590
3591	/ Scalar version loop may be preferred. In this case, add guard*
3592	and skip to epilog. Note this only happens when the number of
3593	iterations of loop is unknown at compile time, otherwise this
3594	won't be vectorized. /*
3595	if (skip_vector)
3596	{
3597	/ Additional epilogue iteration is peeled if gap exists. /
3598	tree t = vect_gen_scalar_loop_niters (niters_prolog, int_niters_prolog: prolog_peeling,
3599	bound_prolog, bound_epilog,
3600	th, bound_scalar: &bound_scalar,
3601	check_profitability);
3602	/ Build guard against NITERSM1 since NITERS may overflow. /
3603	guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
3604	guard_bb = anchor;
3605	guard_to = split_edge (loop_preheader_edge (epilog));
3606	guard_e = slpeel_add_loop_guard (guard_bb, cond: guard_cond,
3607	guard_to, dom_bb: guard_bb,
3608	probability: prob_vector.invert (),
3609	irreducible_p: irred_flag);
3610	skip_e = guard_e;
3611	e = EDGE_PRED (guard_to, `0`);
3612	e = (e != guard_e ? e : EDGE_PRED (guard_to, `1`));
3613
3614	/ Handle any remaining dominator updates needed after*
3615	inserting the loop skip edge above. /*
3616	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
3617	&& prolog_peeling)
3618	{
3619	/ Adding a skip edge to skip a loop with multiple exits*
3620	means the dominator of the join blocks for all exits shifts
3621	from the prolog skip guard to the loop skip guard. /*
3622	auto prolog_skip_bb
3623	= single_pred (bb: loop_preheader_edge (prolog)->src);
3624	auto needs_update
3625	= get_dominated_by (CDI_DOMINATORS, prolog_skip_bb);
3626
3627	/ Update everything except for the immediate children of*
3628	the prolog skip block (the prolog and vector preheaders).
3629	Those should remain dominated by the prolog skip block itself,
3630	since the loop guard edge goes to the epilogue. /*
3631	for (auto bb : needs_update)
3632	if (bb != EDGE_SUCC (prolog_skip_bb, `0`)->dest
3633	&& bb != EDGE_SUCC (prolog_skip_bb, `1`)->dest)
3634	set_immediate_dominator (CDI_DOMINATORS, bb, guard_bb);
3635	}
3636
3637	slpeel_update_phi_nodes_for_guard1 (skip_loop: first_loop, update_loop: epilog, guard_edge: guard_e, merge_edge: e);
3638
3639	/ Simply propagate profile info from guard_bb to guard_to which is*
3640	a merge point of control flow. /*
3641	profile_count old_count = guard_to->count;
3642	guard_to->count = guard_bb->count;
3643
3644	/ Restore the counts of the epilog loop if we didn't use the scalar loop. /
3645	if (vect_epilogues \|\| scalar_loop == NULL)
3646	{
3647	gcc_assert(epilog->num_nodes == loop->num_nodes);
3648	basic_block *bbs = get_loop_body (epilog);
3649	for (unsigned int i = `0`; i < epilog->num_nodes; i++)
3650	{
3651	gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
3652	bbs[i]->count = original_counts [i];
3653	}
3654	free (ptr: bbs);
3655	free (ptr: original_bbs);
3656	}
3657	else if (old_count.nonzero_p ())
3658	scale_loop_profile (epilog, guard_to->count.probability_in (overall: old_count), -`1`);
3659
3660	/ Only need to handle basic block before epilog loop if it's not*
3661	the guard_bb, which is the case when skip_vector is true. /*
3662	if (guard_bb != bb_before_epilog && single_pred_p (bb: bb_before_epilog))
3663	bb_before_epilog->count = single_pred_edge (bb: bb_before_epilog)->count ();
3664	bb_before_epilog = loop_preheader_edge (epilog)->src;
3665	}
3666
3667	if (!uncounted_p)
3668	{
3669	/ If loop is peeled for non-zero constant times, now niters refers to*
3670	orig_niters - prolog_peeling, it won't overflow even the
3671	orig_niters overflows. /*
3672	niters_no_overflow \|= (prolog_peeling > `0`);
3673	vect_gen_vector_loop_niters (loop_vinfo, niters,
3674	niters_vector_ptr: niters_vector, step_vector_ptr: step_vector,
3675	niters_no_overflow);
3676	if (!integer_onep (*step_vector))
3677	{
3678	/ On exit from the loop we will have an easy way of calcalating*
3679	NITERS_VECTOR / STEP STEP. Install a dummy definition*
3680	until then. /*
3681	niters_vector_mult_vf
3682	= make_ssa_name (TREE_TYPE (*niters_vector));
3683	SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
3684	*niters_vector_mult_vf_var = niters_vector_mult_vf;
3685	}
3686	else
3687	vect_gen_vector_loop_niters_mult_vf (loop_vinfo, niters_vector: *niters_vector,
3688	niters_vector_mult_vf_ptr: &niters_vector_mult_vf);
3689	/ Update IVs of original loop as if they were advanced by*
3690	niters_vector_mult_vf steps. /*
3691	gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
3692	update_e = skip_vector ? e : loop_preheader_edge (epilog);
3693	}
3694	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3695	update_e = single_succ_edge (LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest);
3696
3697	/ If we have a peeled vector iteration we will never skip the epilog loop*
3698	and we can simplify the cfg a lot by not doing the edge split. /*
3699	if (skip_epilog
3700	\|\| (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
3701	&& !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)))
3702	{
3703	guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3704	niters, niters_vector_mult_vf);
3705
3706	guard_bb = LOOP_VINFO_MAIN_EXIT (loop_vinfo)->dest;
3707	edge epilog_e = LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo);
3708	guard_to = epilog_e->dest;
3709	guard_e = slpeel_add_loop_guard (guard_bb, cond: guard_cond, guard_to,
3710	dom_bb: skip_vector ? anchor : guard_bb,
3711	probability: prob_epilog.invert (),
3712	irreducible_p: irred_flag);
3713
3714	doms.safe_push (obj: guard_to);
3715	if (vect_epilogues)
3716	epilogue_vinfo->skip_this_loop_edge = guard_e;
3717	edge main_iv = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
3718	gphi_iterator gsi2 = gsi_start_phis (main_iv->dest);
3719	for (gphi_iterator gsi = gsi_start_phis (guard_to);
3720	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
3721	{
3722	/ We are expecting all of the PHIs we have on epilog_e*
3723	to be also on the main loop exit. But sometimes
3724	a stray virtual definition can appear at epilog_e
3725	which we can then take as the same on all exits,
3726	we've removed the LC SSA PHI on the main exit before
3727	so we wouldn't need to create a loop PHI for it. /*
3728	if (virtual_operand_p (op: gimple_phi_result (gs: *gsi))
3729	&& (gsi_end_p (i: gsi2)
3730	\|\| !virtual_operand_p (op: gimple_phi_result (gs: *gsi2))))
3731	add_phi_arg (*gsi,
3732	gimple_phi_arg_def_from_edge (gs: *gsi, e: epilog_e),
3733	guard_e, UNKNOWN_LOCATION);
3734	else
3735	{
3736	add_phi_arg (gsi, gimple_phi_result (gs: gsi2), guard_e,
3737	UNKNOWN_LOCATION);
3738	gsi_next (i: &gsi2);
3739	}
3740	}
3741
3742	/ Only need to handle basic block before epilog loop if it's not*
3743	the guard_bb, which is the case when skip_vector is true. /*
3744	if (guard_bb != bb_before_epilog)
3745	{
3746	prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
3747
3748	scale_bbs_frequencies (&bb_before_epilog, `1`, prob_epilog);
3749	}
3750	scale_loop_profile (epilog, prob_epilog, -`1`);
3751	}
3752
3753	/ If we have a peeled vector iteration, all exits are the same, leave it*
3754	and so the main exit needs to be treated the same as the alternative
3755	exits in that we leave their updates to vectorizable_live_operations.
3756	*/
3757	tree vector_iters_vf = niters_vector_mult_vf;
3758	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3759	{
3760	tree tmp_niters_vf
3761	= make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
3762
3763	if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
3764	&& get_loop_exit_edges (loop).length () == `1`))
3765	{
3766	basic_block exit_bb = NULL;
3767	edge update_e = NULL;
3768
3769	/ Identify the early exit merge block. I wish we had stored*
3770	this. /*
3771	for (auto e : get_loop_exit_edges (loop))
3772	if (e != LOOP_VINFO_MAIN_EXIT (loop_vinfo))
3773	{
3774	exit_bb = e->dest;
3775	update_e = single_succ_edge (bb: exit_bb);
3776	break;
3777	}
3778	vect_update_ivs_after_vectorizer (loop_vinfo, niters: tmp_niters_vf,
3779	update_e, early_exit_p: true);
3780	}
3781	if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
3782	vector_iters_vf = tmp_niters_vf;
3783
3784	LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
3785	}
3786
3787	bool recalculate_peel_niters_init
3788	= LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
3789	vect_update_ivs_after_vectorizer (loop_vinfo, niters: vector_iters_vf,
3790	update_e,
3791	early_exit_p: recalculate_peel_niters_init);
3792
3793	/ Recalculate the dominators after adding the guard edge. /
3794	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3795	iterate_fix_dominators (CDI_DOMINATORS, doms, false);
3796
3797	/ When we do not have a loop-around edge to the epilog we know*
3798	the vector loop covered at least VF scalar iterations unless
3799	we have early breaks.
3800	Update any known upper bound with this knowledge. /*
3801	if (! skip_vector
3802	&& ! LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3803	{
3804	if (epilog->any_upper_bound)
3805	epilog->nb_iterations_upper_bound -= lowest_vf;
3806	if (epilog->any_likely_upper_bound)
3807	epilog->nb_iterations_likely_upper_bound -= lowest_vf;
3808	if (epilog->any_estimate)
3809	epilog->nb_iterations_estimate -= lowest_vf;
3810	}
3811
3812	unsigned HOST_WIDE_INT bound;
3813	if (bound_scalar.is_constant (const_value: &bound))
3814	{
3815	gcc_assert (bound != `0`);
3816	/ Adjust the upper bound by the extra peeled vector iteration if we*
3817	are an epilogue of an peeled vect loop and not VLA. For VLA the
3818	loop bounds are unknown. /*
3819	if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
3820	&& vf.is_constant ())
3821	bound += vf.to_constant ();
3822	/ -1 to convert loop iterations to latch iterations. /
3823	record_niter_bound (epilog, bound - `1`, false, true);
3824	scale_loop_profile (epilog, profile_probability::always (),
3825	bound - `1`);
3826	}
3827
3828	delete_update_ssa ();
3829	adjust_vec_debug_stmts ();
3830	scev_reset ();
3831	}
3832
3833	if (vect_epilogues)
3834	{
3835	epilog->aux = epilogue_vinfo;
3836	LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
3837	LOOP_VINFO_MAIN_EXIT (epilogue_vinfo)
3838	= LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo);
3839
3840	loop_constraint_clear (loop: epilog, LOOP_C_INFINITE);
3841
3842	/ We now must calculate the number of NITERS performed by the previous*
3843	loop and EPILOGUE_NITERS to be performed by the epilogue. /*
3844	tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
3845	niters_prolog, niters_vector_mult_vf);
3846
3847	/ If skip_vector we may skip the previous loop, we insert a phi-node to*
3848	determine whether we are coming from the previous vectorized loop
3849	using the update_e edge or the skip_vector basic block using the
3850	skip_e edge. /*
3851	if (skip_vector)
3852	{
3853	gcc_assert (update_e != NULL && skip_e != NULL);
3854	gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
3855	update_e->dest);
3856	tree new_ssa = make_ssa_name (TREE_TYPE (niters));
3857	gimple *stmt = gimple_build_assign (new_ssa, niters);
3858	gimple_stmt_iterator gsi;
3859	if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
3860	&& SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
3861	{
3862	gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
3863	gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
3864	}
3865	else
3866	{
3867	gsi = gsi_last_bb (bb: update_e->src);
3868	gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
3869	}
3870
3871	niters = new_ssa;
3872	add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
3873	add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
3874	UNKNOWN_LOCATION);
3875	niters = PHI_RESULT (new_phi);
3876	epilogue_vinfo->main_loop_edge = update_e;
3877	epilogue_vinfo->skip_main_loop_edge = skip_e;
3878	}
3879
3880	/ Set ADVANCE to the number of iterations performed by the previous*
3881	loop and its prologue. /*
3882	*advance = niters;
3883
3884	/ Subtract the number of iterations performed by the vectorized loop*
3885	from the number of total iterations. /*
3886	tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
3887	before_loop_niters,
3888	niters);
3889
3890	LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
3891	LOOP_VINFO_NITERSM1 (epilogue_vinfo)
3892	= fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
3893	epilogue_niters,
3894	build_one_cst (TREE_TYPE (epilogue_niters)));
3895	}
3896
3897	adjust_vec.release ();
3898	free_original_copy_tables ();
3899
3900	return vect_epilogues ? epilog : NULL;
3901	}
3902
3903	/ Function vect_create_cond_for_niters_checks.*
3904
3905	Create a conditional expression that represents the run-time checks for
3906	loop's niter. The loop is guaranteed to terminate if the run-time
3907	checks hold.
3908
3909	Input:
3910	COND_EXPR - input conditional expression. New conditions will be chained
3911	with logical AND operation. If it is NULL, then the function
3912	is used to return the number of alias checks.
3913	LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3914	to be checked.
3915
3916	Output:
3917	COND_EXPR - conditional expression.
3918
3919	The returned COND_EXPR is the conditional expression to be used in the
3920	if statement that controls which version of the loop gets executed at
3921	runtime. /*
3922
3923	static void
3924	vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
3925	{
3926	tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
3927
3928	if (*cond_expr)
3929	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3930	*cond_expr, part_cond_expr);
3931	else
3932	*cond_expr = part_cond_expr;
3933	}
3934
3935	/ Set COND_EXPR to a tree that is true when both the original COND_EXPR*
3936	and PART_COND_EXPR are true. Treat a null COND_EXPR as "true". /
3937
3938	static void
3939	chain_cond_expr (tree *cond_expr, tree part_cond_expr)
3940	{
3941	if (*cond_expr)
3942	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3943	*cond_expr, part_cond_expr);
3944	else
3945	*cond_expr = part_cond_expr;
3946	}
3947
3948	/ Function vect_create_cond_for_align_checks.*
3949
3950	Create a conditional expression that represents the alignment checks for
3951	all of data references (array element references) whose alignment must be
3952	checked at runtime.
3953
3954	Input:
3955	COND_EXPR - input conditional expression. New conditions will be chained
3956	with logical AND operation.
3957	LOOP_VINFO - three fields of the loop information are used.
3958	LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
3959	LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
3960	LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT indicates which check applies.
3961
3962	Output:
3963	COND_EXPR_STMT_LIST - statements needed to construct the conditional
3964	expression.
3965	The returned value is the conditional expression to be used in the if
3966	statement that controls which version of the loop gets executed at runtime.
3967
3968	Based on the boolean value of LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT, we decide
3969	which type of check should be applied and create two different expressions
3970	accordingly.
3971	1) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is false, we see if all data refs
3972	to be checked are already aligned to an alignment boundary. We create
3973	an expression of "(a_1 \| a_2 \| a_3 \| ... \| a_n) & mask", where "a_i" is
3974	the address of i'th data reference.
3975	2) When LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we see if all data refs
3976	can be aligned to a boundary after a certain amount of peeling, in other
3977	words, their addresses have the same bottom bits according to the mask.
3978	We create "((a_1 ^ a_2) \| (a_2 ^ a_3) \| ... \| (a_n-1 ^ a_n)) & mask",
3979	where "a_i" is the address of i'th data reference.
3980
3981	Both algorithms make two assumptions:
3982	1) The number of bytes "n" in a vector is a power of 2.
3983	2) An address "a" is aligned if a%n is zero and that this
3984	test can be done as a&(n-1) == 0. For example, for 16
3985	byte vectors the test is a&0xf == 0. /*
3986
3987	static void
3988	vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
3989	tree *cond_expr,
3990	gimple_seq *cond_expr_stmt_list)
3991	{
3992	const vec<stmt_vec_info> &may_misalign_stmts
3993	= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3994	stmt_vec_info stmt_info;
3995	poly_uint64 mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
3996	tree mask_cst;
3997	unsigned int i;
3998	tree int_ptrsize_type;
3999	char tmp_name[`30`];
4000	tree or_tmp_name = NULL_TREE;
4001	tree prev_addr_tmp_name = NULL_TREE;
4002	tree and_tmp_name;
4003	gimple *and_stmt;
4004	tree ptrsize_zero;
4005	tree part_cond_expr;
4006
4007	gcc_assert (known_ne (mask, `0U`));
4008
4009	int_ptrsize_type = signed_type_for (ptr_type_node);
4010
4011	/ If LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true, we should have at least two*
4012	datarefs to check the mutual alignment. /*
4013	gcc_assert (may_misalign_stmts.length () > `1`
4014	\|\| !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo));
4015
4016	FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
4017	{
4018	gimple_seq new_stmt_list = NULL;
4019	tree addr_base;
4020	tree addr_tmp_name;
4021	tree xor_tmp_name;
4022	tree new_or_tmp_name;
4023	gimple addr_stmt, or_stmt, *xor_stmt;
4024	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4025	bool negative = tree_int_cst_compare
4026	(DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < `0`;
4027	tree offset = negative
4028	? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + `1`)
4029	* TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
4030	: size_zero_node;
4031
4032	/ create: addr_tmp = (int)(address_of_first_vector) /
4033	addr_base =
4034	vect_create_addr_base_for_vector_ref (loop_vinfo,
4035	stmt_info, &new_stmt_list,
4036	offset);
4037	if (new_stmt_list != NULL)
4038	gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
4039
4040	sprintf (s: tmp_name, format: "addr2int%d", i);
4041	addr_tmp_name = make_temp_ssa_name (type: int_ptrsize_type, NULL, name: tmp_name);
4042	addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
4043	gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
4044
4045	if (LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo))
4046	{
4047	/ Create "((a_1 ^ a_2) \| (a_2 ^ a_3) \| ... \| (a_n-1 ^ a_n)) & mask"*
4048	to check mutual alignment. /*
4049	if (prev_addr_tmp_name != NULL_TREE)
4050	{
4051	sprintf (s: tmp_name, format: "xorptrs%d_%d", i - `1`, i);
4052	xor_tmp_name = make_temp_ssa_name (type: int_ptrsize_type, NULL,
4053	name: tmp_name);
4054	xor_stmt = gimple_build_assign (xor_tmp_name, BIT_XOR_EXPR,
4055	prev_addr_tmp_name,
4056	addr_tmp_name);
4057	gimple_seq_add_stmt (cond_expr_stmt_list, xor_stmt);
4058	if (or_tmp_name == NULL_TREE)
4059	{
4060	/ Create the 1st XOR when the 2nd data ref is seen. /
4061	or_tmp_name = xor_tmp_name;
4062	}
4063	else
4064	{
4065	/ Create: or_tmp = or_tmp \| new_xor_tmp. /
4066	sprintf (s: tmp_name, format: "orxors%d", i - `1`);
4067	new_or_tmp_name = make_temp_ssa_name (type: int_ptrsize_type, NULL,
4068	name: tmp_name);
4069	or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
4070	or_tmp_name, xor_tmp_name);
4071	gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
4072	or_tmp_name = new_or_tmp_name;
4073	}
4074	}
4075	prev_addr_tmp_name = addr_tmp_name;
4076	}
4077	else
4078	{
4079	/ Create: "(a_1 \| a_2 \| a_3 \| ... \| a_n) & mask" to check if all*
4080	addresses are already aligned. /*
4081	if (or_tmp_name != NULL_TREE)
4082	{
4083	/ Create: or_tmp = or_tmp \| addr_tmp. /
4084	sprintf (s: tmp_name, format: "orptrs%d", i);
4085	new_or_tmp_name = make_temp_ssa_name (type: int_ptrsize_type, NULL,
4086	name: tmp_name);
4087	or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
4088	or_tmp_name, addr_tmp_name);
4089	gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
4090	or_tmp_name = new_or_tmp_name;
4091	}
4092	else
4093	or_tmp_name = addr_tmp_name;
4094	}
4095
4096	} / end for i /
4097
4098	mask_cst = build_int_cst (int_ptrsize_type, mask);
4099
4100	/ create: and_tmp = or_tmp & mask /
4101	and_tmp_name = make_temp_ssa_name (type: int_ptrsize_type, NULL, name: "andmask");
4102
4103	and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
4104	or_tmp_name, mask_cst);
4105	gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
4106
4107	/ Make and_tmp the left operand of the conditional test against zero.*
4108	if and_tmp has a nonzero bit then some address is unaligned. /*
4109	ptrsize_zero = build_int_cst (int_ptrsize_type, `0`);
4110	part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
4111	and_tmp_name, ptrsize_zero);
4112	chain_cond_expr (cond_expr, part_cond_expr);
4113	}
4114
4115	/ Function vect_create_cond_for_vla_spec_read.*
4116
4117	Create a conditional expression that represents the run-time checks with
4118	max speculative read amount in VLA modes. We check two things:
4119	1) if the max speculative read amount exceeds the min page size
4120	2) if the VF is power-of-2 - done by checking the max read amount instead
4121
4122	Input:
4123	COND_EXPR - input conditional expression. New conditions will be chained
4124	with logical AND operation.
4125	LOOP_VINFO - field LOOP_VINFO_MAX_SPEC_READ_AMOUNT contains the max
4126	possible speculative read amount in VLA modes.
4127
4128	Output:
4129	COND_EXPR - conditional expression.
4130
4131	The returned COND_EXPR is the conditional expression to be used in the
4132	if statement that controls which version of the loop gets executed at
4133	runtime. /*
4134
4135	static void
4136	vect_create_cond_for_vla_spec_read (loop_vec_info loop_vinfo, tree *cond_expr)
4137	{
4138	poly_uint64 read_amount_poly = LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo);
4139	tree amount = build_int_cst (long_unsigned_type_node, read_amount_poly);
4140
4141	/ Both the read amount and the VF must be variants, and the read amount must*
4142	be a constant power-of-2 multiple of the VF. /*
4143	unsigned HOST_WIDE_INT multiple;
4144	gcc_assert (!read_amount_poly.is_constant ()
4145	&& !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
4146	&& constant_multiple_p (read_amount_poly,
4147	LOOP_VINFO_VECT_FACTOR (loop_vinfo),
4148	&multiple)
4149	&& pow2p_hwi (multiple));
4150
4151	tree cst_ul_zero = build_int_cstu (long_unsigned_type_node, `0U`);
4152	tree cst_ul_one = build_int_cstu (long_unsigned_type_node, `1U`);
4153	tree cst_ul_pagesize = build_int_cstu (long_unsigned_type_node,
4154	(unsigned long) param_min_pagesize);
4155
4156	/ Create an expression of "amount & (amount - 1) == 0". /
4157	tree amount_m1 = fold_build2 (MINUS_EXPR, long_unsigned_type_node,
4158	amount, cst_ul_one);
4159	tree amount_and_expr = fold_build2 (BIT_AND_EXPR, long_unsigned_type_node,
4160	amount, amount_m1);
4161	tree powof2_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
4162	amount_and_expr, cst_ul_zero);
4163	chain_cond_expr (cond_expr, part_cond_expr: powof2_cond_expr);
4164
4165	/ Create an expression of "amount <= cst_ul_pagesize". /
4166	tree pagesize_cond_expr = fold_build2 (LE_EXPR, boolean_type_node,
4167	amount, cst_ul_pagesize);
4168	chain_cond_expr (cond_expr, part_cond_expr: pagesize_cond_expr);
4169	}
4170
4171	/ If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,*
4172	create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
4173	Set COND_EXPR to a tree that is true when both the original COND_EXPR
4174	and this new condition are true. Treat a null COND_EXPR as "true". /
4175
4176	static void
4177	vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
4178	{
4179	const vec<vec_object_pair> &pairs
4180	= LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
4181	unsigned int i;
4182	vec_object_pair *pair;
4183	FOR_EACH_VEC_ELT (pairs, i, pair)
4184	{
4185	tree addr1 = build_fold_addr_expr (pair->first);
4186	tree addr2 = build_fold_addr_expr (pair->second);
4187	tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
4188	addr1, addr2);
4189	chain_cond_expr (cond_expr, part_cond_expr);
4190	}
4191	}
4192
4193	/ Create an expression that is true when all lower-bound conditions for*
4194	the vectorized loop are met. Chain this condition with COND_EXPR. /
4195
4196	static void
4197	vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
4198	{
4199	const vec<vec_lower_bound> &lower_bounds
4200	= LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
4201	for (unsigned int i = `0`; i < lower_bounds.length (); ++i)
4202	{
4203	tree expr = lower_bounds [i].expr;
4204	tree type = unsigned_type_for (TREE_TYPE (expr));
4205	expr = fold_convert (type, expr);
4206	poly_uint64 bound = lower_bounds [i].min_value;
4207	if (!lower_bounds [i].unsigned_p)
4208	{
4209	expr = fold_build2 (PLUS_EXPR, type, expr,
4210	build_int_cstu (type, bound - `1`));
4211	bound += bound - `1`;
4212	}
4213	tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
4214	build_int_cstu (type, bound));
4215	chain_cond_expr (cond_expr, part_cond_expr);
4216	}
4217	}
4218
4219	/ Function vect_create_cond_for_alias_checks.*
4220
4221	Create a conditional expression that represents the run-time checks for
4222	overlapping of address ranges represented by a list of data references
4223	relations passed as input.
4224
4225	Input:
4226	COND_EXPR - input conditional expression. New conditions will be chained
4227	with logical AND operation. If it is NULL, then the function
4228	is used to return the number of alias checks.
4229	LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
4230	to be checked.
4231
4232	Output:
4233	COND_EXPR - conditional expression.
4234
4235	The returned COND_EXPR is the conditional expression to be used in the if
4236	statement that controls which version of the loop gets executed at runtime.
4237	*/
4238
4239	void
4240	vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
4241	{
4242	const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
4243	LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
4244
4245	if (comp_alias_ddrs.is_empty ())
4246	return;
4247
4248	create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
4249	&comp_alias_ddrs, cond_expr);
4250	if (dump_enabled_p ())
4251	dump_printf_loc (MSG_NOTE, vect_location,
4252	"created %u versioning for alias checks.\n",
4253	comp_alias_ddrs.length ());
4254	}
4255
4256
4257	/ Function vect_loop_versioning.*
4258
4259	If the loop has data references that may or may not be aligned or/and
4260	has data reference relations whose independence was not proven then
4261	two versions of the loop need to be generated, one which is vectorized
4262	and one which isn't. A test is then generated to control which of the
4263	loops is executed. The test checks for the alignment of all of the
4264	data references that may or may not be aligned. An additional
4265	sequence of runtime tests is generated for each pairs of DDRs whose
4266	independence was not proven. The vectorized version of loop is
4267	executed only if both alias and alignment tests are passed.
4268
4269	The test generated to check which version of loop is executed
4270	is modified to also check for profitability as indicated by the
4271	cost model threshold TH.
4272
4273	The versioning precondition(s) are placed in COND_EXPR and*
4274	COND_EXPR_STMT_LIST. /
4275
4276	class loop *
4277	vect_loop_versioning (loop_vec_info loop_vinfo,
4278	gimple *loop_vectorized_call)
4279	{
4280	class loop loop = LOOP_VINFO_LOOP (loop_vinfo), nloop;
4281	class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
4282	basic_block condition_bb;
4283	gphi_iterator gsi;
4284	gimple_stmt_iterator cond_exp_gsi;
4285	basic_block merge_bb;
4286	basic_block new_exit_bb;
4287	edge new_exit_e, e;
4288	gphi orig_phi, new_phi;
4289	tree cond_expr = NULL_TREE;
4290	gimple_seq cond_expr_stmt_list = NULL;
4291	tree arg;
4292	profile_probability prob = profile_probability::likely ();
4293	gimple_seq gimplify_stmt_list = NULL;
4294	tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
4295	bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
4296	bool version_spec_read = LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (loop_vinfo);
4297	bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
4298	bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
4299	poly_uint64 versioning_threshold
4300	= LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
4301	tree version_simd_if_cond
4302	= LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
4303	unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
4304	bool uncounted_p = LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo);
4305
4306	if (!uncounted_p && vect_apply_runtime_profitability_check_p (loop_vinfo)
4307	&& !ordered_p (a: th, b: versioning_threshold))
4308	cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
4309	build_int_cst (TREE_TYPE (scalar_loop_iters),
4310	th - `1`));
4311	if (!uncounted_p && maybe_ne (a: versioning_threshold, b: `0U`))
4312	{
4313	tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
4314	build_int_cst (TREE_TYPE (scalar_loop_iters),
4315	versioning_threshold - `1`));
4316	if (cond_expr)
4317	cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
4318	expr, cond_expr);
4319	else
4320	cond_expr = expr;
4321	}
4322
4323	tree cost_name = NULL_TREE;
4324	profile_probability prob2 = profile_probability::always ();
4325	if (cond_expr
4326	&& EXPR_P (cond_expr)
4327	&& (version_niter
4328	\|\| version_align
4329	\|\| version_alias
4330	\|\| version_simd_if_cond))
4331	{
4332	cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
4333	&cond_expr_stmt_list,
4334	is_gimple_val, NULL_TREE);
4335	/ Split prob () into two so that the overall probability of passing*
4336	both the cost-model and versioning checks is the orig prob. /*
4337	prob2 = prob = prob.sqrt ();
4338	}
4339
4340	if (version_niter)
4341	vect_create_cond_for_niters_checks (loop_vinfo, cond_expr: &cond_expr);
4342
4343	if (cond_expr)
4344	{
4345	gimple_seq tem = NULL;
4346	cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
4347	&tem, is_gimple_condexpr_for_cond,
4348	NULL_TREE);
4349	gimple_seq_add_seq (&cond_expr_stmt_list, tem);
4350	}
4351
4352	if (version_align)
4353	vect_create_cond_for_align_checks (loop_vinfo, cond_expr: &cond_expr,
4354	cond_expr_stmt_list: &cond_expr_stmt_list);
4355
4356	if (version_spec_read)
4357	vect_create_cond_for_vla_spec_read (loop_vinfo, cond_expr: &cond_expr);
4358
4359	if (version_alias)
4360	{
4361	vect_create_cond_for_unequal_addrs (loop_vinfo, cond_expr: &cond_expr);
4362	vect_create_cond_for_lower_bounds (loop_vinfo, cond_expr: &cond_expr);
4363	vect_create_cond_for_alias_checks (loop_vinfo, cond_expr: &cond_expr);
4364	}
4365
4366	if (version_simd_if_cond)
4367	{
4368	gcc_assert (dom_info_available_p (CDI_DOMINATORS));
4369	if (flag_checking)
4370	if (basic_block bb
4371	= gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
4372	gcc_assert (bb != loop->header
4373	&& dominated_by_p (CDI_DOMINATORS, loop->header, bb)
4374	&& (scalar_loop == NULL
4375	\|\| (bb != scalar_loop->header
4376	&& dominated_by_p (CDI_DOMINATORS,
4377	scalar_loop->header, bb))));
4378	tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
4379	tree c = fold_build2 (NE_EXPR, boolean_type_node,
4380	version_simd_if_cond, zero);
4381	if (cond_expr)
4382	cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
4383	c, cond_expr);
4384	else
4385	cond_expr = c;
4386	if (dump_enabled_p ())
4387	dump_printf_loc (MSG_NOTE, vect_location,
4388	"created versioning for simd if condition check.\n");
4389	}
4390
4391	cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
4392	&gimplify_stmt_list,
4393	is_gimple_condexpr_for_cond, NULL_TREE);
4394	gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
4395
4396	/ Compute the outermost loop cond_expr and cond_expr_stmt_list are*
4397	invariant in. /*
4398	class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
4399	for (gimple_stmt_iterator gsi = gsi_start (seq&: cond_expr_stmt_list);
4400	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
4401	{
4402	gimple *stmt = gsi_stmt (i: gsi);
4403	update_stmt (s: stmt);
4404	ssa_op_iter iter;
4405	use_operand_p use_p;
4406	basic_block def_bb;
4407	FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
4408	if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
4409	&& flow_bb_inside_loop_p (outermost, def_bb))
4410	outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + `1`);
4411	}
4412
4413	/ Search for the outermost loop we can version. Avoid versioning of*
4414	non-perfect nests but allow if-conversion versioned loops inside. /*
4415	class loop *loop_to_version = loop;
4416	if (flow_loop_nested_p (outermost, loop))
4417	{
4418	if (dump_enabled_p ())
4419	dump_printf_loc (MSG_NOTE, vect_location,
4420	"trying to apply versioning to outer loop %d\n",
4421	outermost->num);
4422	if (outermost->num == `0`)
4423	outermost = superloop_at_depth (loop, `1`);
4424	/ And avoid applying versioning on non-perfect nests. /
4425	while (loop_to_version != outermost
4426	&& (e = single_exit (loop_outer (loop: loop_to_version)))
4427	&& !(e->flags & EDGE_COMPLEX)
4428	&& (!loop_outer (loop: loop_to_version)->inner->next
4429	\|\| vect_loop_vectorized_call (loop_to_version))
4430	&& (!loop_outer (loop: loop_to_version)->inner->next
4431	\|\| !loop_outer (loop: loop_to_version)->inner->next->next))
4432	loop_to_version = loop_outer (loop: loop_to_version);
4433	}
4434
4435	/ Apply versioning. If there is already a scalar version created by*
4436	if-conversion re-use that. Note we cannot re-use the copy of
4437	an if-converted outer-loop when vectorizing the inner loop only. /*
4438	gcond *cond;
4439	if ((!loop_to_version->inner \|\| loop == loop_to_version)
4440	&& loop_vectorized_call)
4441	{
4442	gcc_assert (scalar_loop);
4443	condition_bb = gimple_bb (g: loop_vectorized_call);
4444	cond = as_a <gcond > (p: gsi_last_bb (bb: condition_bb));
4445	gimple_cond_set_condition_from_tree (cond, cond_expr);
4446	update_stmt (s: cond);
4447
4448	if (cond_expr_stmt_list)
4449	{
4450	cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
4451	gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
4452	GSI_SAME_STMT);
4453	}
4454
4455	/ if-conversion uses profile_probability::always () for both paths,*
4456	reset the paths probabilities appropriately. /*
4457	edge te, fe;
4458	extract_true_false_edges_from_block (condition_bb, &te, &fe);
4459	te->probability = prob;
4460	fe->probability = prob.invert ();
4461	/ We can scale loops counts immediately but have to postpone*
4462	scaling the scalar loop because we re-use it during peeling.
4463
4464	Ifcvt duplicates loop preheader, loop body and produces an basic
4465	block after loop exit. We need to scale all that. /*
4466	basic_block preheader = loop_preheader_edge (loop_to_version)->src;
4467	preheader->count = preheader->count.apply_probability (prob: prob * prob2);
4468	scale_loop_frequencies (loop_to_version, prob * prob2);
4469	/ When the loop has multiple exits then we can only version itself.*
4470	This is denoted by loop_to_version == loop. In this case we can
4471	do the versioning by selecting the exit edge the vectorizer is
4472	currently using. /*
4473	edge exit_edge;
4474	if (loop_to_version == loop)
4475	exit_edge = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
4476	else
4477	exit_edge = single_exit (loop_to_version);
4478	exit_edge->dest->count = preheader->count;
4479	LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = (prob * prob2).invert ();
4480
4481	nloop = scalar_loop;
4482	if (dump_enabled_p ())
4483	dump_printf_loc (MSG_NOTE, vect_location,
4484	"reusing %sloop version created by if conversion\n",
4485	loop_to_version != loop ? "outer " : "");
4486	}
4487	else
4488	{
4489	if (loop_to_version != loop
4490	&& dump_enabled_p ())
4491	dump_printf_loc (MSG_NOTE, vect_location,
4492	"applying loop versioning to outer loop %d\n",
4493	loop_to_version->num);
4494
4495	unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
4496
4497	initialize_original_copy_tables ();
4498	nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
4499	prob * prob2, (prob * prob2).invert (),
4500	prob * prob2, (prob * prob2).invert (),
4501	true);
4502
4503	/ If the PHI nodes in the loop header were reallocated, we need to fix up*
4504	our internally stashed copies of those. /*
4505	if (loop_to_version == loop)
4506	for (auto gsi = gsi_start_phis (loop->header);
4507	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
4508	loop_vinfo->resync_stmt_addr (gsi.phi ());
4509
4510	/ We will later insert second conditional so overall outcome of*
4511	both is prob prob2. /
4512	edge true_e, false_e;
4513	extract_true_false_edges_from_block (condition_bb, &true_e, &false_e);
4514	true_e->probability = prob;
4515	false_e->probability = prob.invert ();
4516	gcc_assert (nloop);
4517	nloop = get_loop_copy (loop);
4518
4519	/ Assign hierarchical discriminators to distinguish loop versions.*
4520	Only assign to the scalar version here; the vectorized version will
4521	get discriminators later during transformation/peeling.
4522	Use dynamic copy_id allocation instead of hardcoded constants. /*
4523	gimple *nloop_last = last_nondebug_stmt (nloop->header);
4524	location_t nloop_loc
4525	= nloop_last ? gimple_location (g: nloop_last) : UNKNOWN_LOCATION;
4526	if (nloop_loc != UNKNOWN_LOCATION)
4527	{
4528	unsigned int nloop_copyid = allocate_copyid_base (loc: nloop_loc, stride: `1`);
4529	assign_discriminators_to_loop (loop: nloop, multiplicity_factor: `0`, copyid: nloop_copyid);
4530	}
4531	/ For cycle vectorization with SLP we rely on the PHI arguments*
4532	appearing in the same order as the SLP node operands which for the
4533	loop PHI nodes means the preheader edge dest index needs to remain
4534	the same for the analyzed loop which also becomes the vectorized one.
4535	Make it so in case the state after versioning differs by redirecting
4536	the first edge into the header to the same destination which moves
4537	it last. /*
4538	if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
4539	{
4540	edge e = EDGE_PRED (loop->header, `0`);
4541	ssa_redirect_edge (e, e->dest);
4542	flush_pending_stmts (e);
4543	}
4544	gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
4545
4546	/ Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will*
4547	reap those otherwise; they also refer to the original
4548	loops. /*
4549	class loop *l = loop;
4550	while (gimple *call = vect_loop_vectorized_call (l))
4551	{
4552	call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
4553	fold_loop_internal_call (call, boolean_false_node);
4554	l = loop_outer (loop: l);
4555	}
4556	free_original_copy_tables ();
4557
4558	if (cond_expr_stmt_list)
4559	{
4560	cond_exp_gsi = gsi_last_bb (bb: condition_bb);
4561	gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
4562	GSI_SAME_STMT);
4563	}
4564
4565	/ Loop versioning violates an assumption we try to maintain during*
4566	vectorization - that the loop exit block has a single predecessor.
4567	After versioning, the exit block of both loop versions is the same
4568	basic block (i.e. it has two predecessors). Just in order to simplify
4569	following transformations in the vectorizer, we fix this situation
4570	here by adding a new (empty) block on the exit-edge of the loop,
4571	with the proper loop-exit phis to maintain loop-closed-form.
4572	If loop versioning wasn't done from loop, but scalar_loop instead,
4573	merge_bb will have already just a single successor. /*
4574
4575	/ When the loop has multiple exits then we can only version itself.*
4576	This is denoted by loop_to_version == loop. In this case we can
4577	do the versioning by selecting the exit edge the vectorizer is
4578	currently using. /*
4579	edge exit_edge;
4580	if (loop_to_version == loop)
4581	exit_edge = LOOP_VINFO_MAIN_EXIT (loop_vinfo);
4582	else
4583	exit_edge = single_exit (loop_to_version);
4584
4585	gcc_assert (exit_edge);
4586	merge_bb = exit_edge->dest;
4587	if (EDGE_COUNT (merge_bb->preds) >= `2`)
4588	{
4589	gcc_assert (EDGE_COUNT (merge_bb->preds) >= `2`);
4590	new_exit_bb = split_edge (exit_edge);
4591	new_exit_e = exit_edge;
4592	e = EDGE_SUCC (new_exit_bb, `0`);
4593
4594	for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (i: gsi);
4595	gsi_next (i: &gsi))
4596	{
4597	tree new_res;
4598	orig_phi = gsi.phi ();
4599	new_res = copy_ssa_name (PHI_RESULT (orig_phi));
4600	new_phi = create_phi_node (new_res, new_exit_bb);
4601	arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
4602	add_phi_arg (new_phi, arg, new_exit_e,
4603	gimple_phi_arg_location_from_edge (phi: orig_phi, e));
4604	adjust_phi_and_debug_stmts (update_phi: orig_phi, e, PHI_RESULT (new_phi));
4605	}
4606	}
4607
4608	update_ssa (TODO_update_ssa_no_phi);
4609	}
4610
4611	/ Split the cost model check off to a separate BB. Costing assumes*
4612	this is the only thing we perform when we enter the scalar loop
4613	from a failed cost decision. /*
4614	if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
4615	{
4616	gimple *def = SSA_NAME_DEF_STMT (cost_name);
4617	gcc_assert (gimple_bb (def) == condition_bb);
4618	/ All uses of the cost check are 'true' after the check we*
4619	are going to insert. /*
4620	replace_uses_by (cost_name, boolean_true_node);
4621	/ And we're going to build the new single use of it. /
4622	gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
4623	NULL_TREE, NULL_TREE);
4624	edge e = split_block (gimple_bb (g: def), def);
4625	gimple_stmt_iterator gsi = gsi_for_stmt (def);
4626	gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
4627	edge true_e, false_e;
4628	extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
4629	e->flags &= ~EDGE_FALLTHRU;
4630	e->flags \|= EDGE_TRUE_VALUE;
4631	edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
4632	e->probability = prob2;
4633	e2->probability = prob2.invert ();
4634	e->dest->count = e->count ();
4635	set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
4636	auto_vec<basic_block, `3`> adj;
4637	for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
4638	son;
4639	son = next_dom_son (CDI_DOMINATORS, son))
4640	if (EDGE_COUNT (son->preds) > `1`)
4641	adj.safe_push (obj: son);
4642	for (auto son : adj)
4643	set_immediate_dominator (CDI_DOMINATORS, son, e->src);
4644	//debug_bb (condition_bb);
4645	//debug_bb (e->src);
4646	}
4647
4648	if (version_niter)
4649	{
4650	/ The versioned loop could be infinite, we need to clear existing*
4651	niter information which is copied from the original loop. /*
4652	gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
4653	vect_free_loop_info_assumptions (nloop);
4654	}
4655
4656	if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
4657	&& dump_enabled_p ())
4658	{
4659	if (version_alias)
4660	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS \| MSG_PRIORITY_USER_FACING,
4661	vect_location,
4662	"loop versioned for vectorization because of "
4663	"possible aliasing\n");
4664	if (version_align)
4665	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS \| MSG_PRIORITY_USER_FACING,
4666	vect_location,
4667	"loop versioned for vectorization to enhance "
4668	"alignment\n");
4669
4670	}
4671
4672	return nloop;
4673	}
4674

source code of gcc/tree-vect-loop-manip.cc