HexagonISelLoweringHVX.cpp source code [llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp]

1	//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "HexagonISelLowering.h"
10	#include "HexagonRegisterInfo.h"
11	#include "HexagonSubtarget.h"
12	#include "llvm/ADT/SetVector.h"
13	#include "llvm/ADT/SmallVector.h"
14	#include "llvm/Analysis/MemoryLocation.h"
15	#include "llvm/CodeGen/MachineBasicBlock.h"
16	#include "llvm/CodeGen/MachineFunction.h"
17	#include "llvm/CodeGen/MachineInstr.h"
18	#include "llvm/CodeGen/MachineOperand.h"
19	#include "llvm/CodeGen/MachineRegisterInfo.h"
20	#include "llvm/CodeGen/TargetInstrInfo.h"
21	#include "llvm/IR/IntrinsicsHexagon.h"
22	#include "llvm/Support/CommandLine.h"
23
24	#include <algorithm>
25	#include <string>
26	#include <utility>
27
28	using namespace llvm;
29
30	static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
31	cl::Hidden, cl::init(Val: `16`),
32	cl::desc ("Lower threshold (in bytes) for widening to HVX vectors"));
33
34	static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35	static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36	static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37	static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39	static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40	// For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41	MVT ElemTy = Ty.getScalarType();
42	switch (ElemTy.SimpleTy) {
43	case MVT::f16:
44	return std::make_tuple(args: `5`, args: `15`, args: `10`);
45	case MVT::f32:
46	return std::make_tuple(args: `8`, args: `127`, args: `23`);
47	case MVT::f64:
48	return std::make_tuple(args: `11`, args: `1023`, args: `52`);
49	default:
50	break;
51	}
52	llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53	}
54
55	void
56	HexagonTargetLowering::initializeHVXLowering() {
57	if (Subtarget.useHVX64BOps()) {
58	addRegisterClass(VT: MVT::v64i8, RC: &Hexagon::HvxVRRegClass);
59	addRegisterClass(VT: MVT::v32i16, RC: &Hexagon::HvxVRRegClass);
60	addRegisterClass(VT: MVT::v16i32, RC: &Hexagon::HvxVRRegClass);
61	addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxWRRegClass);
62	addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxWRRegClass);
63	addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxWRRegClass);
64	// These "short" boolean vector types should be legal because
65	// they will appear as results of vector compares. If they were
66	// not legal, type legalization would try to make them legal
67	// and that would require using operations that do not use or
68	// produce such types. That, in turn, would imply using custom
69	// nodes, which would be unoptimizable by the DAG combiner.
70	// The idea is to rely on target-independent operations as much
71	// as possible.
72	addRegisterClass(VT: MVT::v16i1, RC: &Hexagon::HvxQRRegClass);
73	addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
74	addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
75	} else if (Subtarget.useHVX128BOps()) {
76	addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxVRRegClass);
77	addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxVRRegClass);
78	addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxVRRegClass);
79	addRegisterClass(VT: MVT::v256i8, RC: &Hexagon::HvxWRRegClass);
80	addRegisterClass(VT: MVT::v128i16, RC: &Hexagon::HvxWRRegClass);
81	addRegisterClass(VT: MVT::v64i32, RC: &Hexagon::HvxWRRegClass);
82	addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
83	addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
84	addRegisterClass(VT: MVT::v128i1, RC: &Hexagon::HvxQRRegClass);
85	if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86	addRegisterClass(VT: MVT::v32f32, RC: &Hexagon::HvxVRRegClass);
87	addRegisterClass(VT: MVT::v64f16, RC: &Hexagon::HvxVRRegClass);
88	addRegisterClass(VT: MVT::v64f32, RC: &Hexagon::HvxWRRegClass);
89	addRegisterClass(VT: MVT::v128f16, RC: &Hexagon::HvxWRRegClass);
90	}
91	}
92
93	// Set up operation actions.
94
95	bool Use64b = Subtarget.useHVX64BOps();
96	ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97	ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98	MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99	MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100	MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102	auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
103	setOperationAction(Op: Opc, VT: FromTy, Action: Promote);
104	AddPromotedToType(Opc, OrigVT: FromTy, DestVT: ToTy);
105	};
106
107	// Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108	// Note: v16i1 -> i16 is handled in type legalization instead of op
109	// legalization.
110	setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom);
111	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
112	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
113	setOperationAction(Op: ISD::BITCAST, VT: MVT::v16i1, Action: Custom);
114	setOperationAction(Op: ISD::BITCAST, VT: MVT::v128i1, Action: Custom);
115	setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
116	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteV, Action: Legal);
117	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteW, Action: Legal);
118	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
119
120	if (Subtarget.useHVX128BOps())
121	setOperationAction(Op: ISD::BITCAST, VT: MVT::v32i1, Action: Custom);
122	if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
123	Subtarget.useHVXFloatingPoint()) {
124
125	static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
126	static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
127
128	for (MVT T : FloatV) {
129	setOperationAction(Op: ISD::FADD, VT: T, Action: Legal);
130	setOperationAction(Op: ISD::FSUB, VT: T, Action: Legal);
131	setOperationAction(Op: ISD::FMUL, VT: T, Action: Legal);
132	setOperationAction(Op: ISD::FMINIMUMNUM, VT: T, Action: Legal);
133	setOperationAction(Op: ISD::FMAXIMUMNUM, VT: T, Action: Legal);
134
135	setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
136	setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
137
138	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
139	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
140
141	setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
142	setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
143	// Custom-lower BUILD_VECTOR. The standard (target-independent)
144	// handling of it would convert it to a load, which is not always
145	// the optimal choice.
146	setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
147	}
148
149
150	// BUILD_VECTOR with f16 operands cannot be promoted without
151	// promoting the result, so lower the node to vsplat or constant pool
152	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom);
153	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::f16, Action: Custom);
154	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::f16, Action: Custom);
155
156	// Vector shuffle is always promoted to ByteV and a bitcast to f16 is
157	// generated.
158	setPromoteTo (ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
159	setPromoteTo (ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
160	setPromoteTo (ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
161	setPromoteTo (ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
162
163	for (MVT P : FloatW) {
164	setOperationAction(Op: ISD::LOAD, VT: P, Action: Custom);
165	setOperationAction(Op: ISD::STORE, VT: P, Action: Custom);
166	setOperationAction(Op: ISD::FADD, VT: P, Action: Custom);
167	setOperationAction(Op: ISD::FSUB, VT: P, Action: Custom);
168	setOperationAction(Op: ISD::FMUL, VT: P, Action: Custom);
169	setOperationAction(Op: ISD::FMINIMUMNUM, VT: P, Action: Custom);
170	setOperationAction(Op: ISD::FMAXIMUMNUM, VT: P, Action: Custom);
171	setOperationAction(Op: ISD::SETCC, VT: P, Action: Custom);
172	setOperationAction(Op: ISD::VSELECT, VT: P, Action: Custom);
173
174	// Custom-lower BUILD_VECTOR. The standard (target-independent)
175	// handling of it would convert it to a load, which is not always
176	// the optimal choice.
177	setOperationAction(Op: ISD::BUILD_VECTOR, VT: P, Action: Custom);
178	// Make concat-vectors custom to handle concats of more than 2 vectors.
179	setOperationAction(Op: ISD::CONCAT_VECTORS, VT: P, Action: Custom);
180
181	setOperationAction(Op: ISD::MLOAD, VT: P, Action: Custom);
182	setOperationAction(Op: ISD::MSTORE, VT: P, Action: Custom);
183	}
184
185	if (Subtarget.useHVXQFloatOps()) {
186	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Custom);
187	setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
188	} else if (Subtarget.useHVXIEEEFPOps()) {
189	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Legal);
190	setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
191	}
192	}
193
194	for (MVT T : LegalV) {
195	setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
196	setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
197
198	setOperationAction(Op: ISD::ABS, VT: T, Action: Legal);
199	setOperationAction(Op: ISD::AND, VT: T, Action: Legal);
200	setOperationAction(Op: ISD::OR, VT: T, Action: Legal);
201	setOperationAction(Op: ISD::XOR, VT: T, Action: Legal);
202	setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
203	setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
204	setOperationAction(Op: ISD::MUL, VT: T, Action: Legal);
205	setOperationAction(Op: ISD::CTPOP, VT: T, Action: Legal);
206	setOperationAction(Op: ISD::CTLZ, VT: T, Action: Legal);
207	setOperationAction(Op: ISD::SELECT, VT: T, Action: Legal);
208	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
209	setOperationAction(Op: ISD::UADDSAT, VT: T, Action: Legal);
210	setOperationAction(Op: ISD::SADDSAT, VT: T, Action: Legal);
211	if (T != ByteV) {
212	setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
213	setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
214	setOperationAction(Op: ISD::BSWAP, VT: T, Action: Legal);
215	}
216
217	setOperationAction(Op: ISD::SMIN, VT: T, Action: Legal);
218	setOperationAction(Op: ISD::SMAX, VT: T, Action: Legal);
219	if (T.getScalarType() != MVT::i32) {
220	setOperationAction(Op: ISD::UMIN, VT: T, Action: Legal);
221	setOperationAction(Op: ISD::UMAX, VT: T, Action: Legal);
222	}
223
224	setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
225	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
226	setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
227	setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
228	if (T.getScalarType() != MVT::i32) {
229	setOperationAction(Op: ISD::MULHS, VT: T, Action: Legal);
230	setOperationAction(Op: ISD::MULHU, VT: T, Action: Legal);
231	}
232
233	setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
234	// Make concat-vectors custom to handle concats of more than 2 vectors.
235	setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
236	setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
237	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: T, Action: Custom);
238	setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
239	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: T, Action: Custom);
240	setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
241	setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
242	setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
243	setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
244	setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
245	if (T != ByteV) {
246	setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
247	// HVX only has shifts of words and halfwords.
248	setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
249	setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
250	setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
251
252	// Promote all shuffles to operate on vectors of bytes.
253	setPromoteTo (ISD::VECTOR_SHUFFLE, T, ByteV);
254	}
255
256	if (Subtarget.useHVXFloatingPoint()) {
257	// Same action for both QFloat and IEEE.
258	setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
259	setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
260	setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
261	setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
262	}
263
264	setCondCodeAction(CCs: ISD::SETNE, VT: T, Action: Expand);
265	setCondCodeAction(CCs: ISD::SETLE, VT: T, Action: Expand);
266	setCondCodeAction(CCs: ISD::SETGE, VT: T, Action: Expand);
267	setCondCodeAction(CCs: ISD::SETLT, VT: T, Action: Expand);
268	setCondCodeAction(CCs: ISD::SETULE, VT: T, Action: Expand);
269	setCondCodeAction(CCs: ISD::SETUGE, VT: T, Action: Expand);
270	setCondCodeAction(CCs: ISD::SETULT, VT: T, Action: Expand);
271	}
272
273	for (MVT T : LegalW) {
274	// Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
275	// independent) handling of it would convert it to a load, which is
276	// not always the optimal choice.
277	setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
278	// Make concat-vectors custom to handle concats of more than 2 vectors.
279	setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
280
281	// Custom-lower these operations for pairs. Expand them into a concat
282	// of the corresponding operations on individual vectors.
283	setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
284	setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
285	setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
286	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Custom);
287	setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
288	setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
289	setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
290	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Custom);
291
292	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
293	setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
294	setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
295	setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
296	setOperationAction(Op: ISD::ABS, VT: T, Action: Custom);
297	setOperationAction(Op: ISD::CTLZ, VT: T, Action: Custom);
298	setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
299	setOperationAction(Op: ISD::CTPOP, VT: T, Action: Custom);
300
301	setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
302	setOperationAction(Op: ISD::UADDSAT, VT: T, Action: Legal);
303	setOperationAction(Op: ISD::SADDSAT, VT: T, Action: Legal);
304	setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
305	setOperationAction(Op: ISD::MUL, VT: T, Action: Custom);
306	setOperationAction(Op: ISD::MULHS, VT: T, Action: Custom);
307	setOperationAction(Op: ISD::MULHU, VT: T, Action: Custom);
308	setOperationAction(Op: ISD::AND, VT: T, Action: Custom);
309	setOperationAction(Op: ISD::OR, VT: T, Action: Custom);
310	setOperationAction(Op: ISD::XOR, VT: T, Action: Custom);
311	setOperationAction(Op: ISD::SETCC, VT: T, Action: Custom);
312	setOperationAction(Op: ISD::VSELECT, VT: T, Action: Custom);
313	if (T != ByteW) {
314	setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
315	setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
316	setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
317
318	// Promote all shuffles to operate on vectors of bytes.
319	setPromoteTo (ISD::VECTOR_SHUFFLE, T, ByteW);
320	}
321	setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
322	setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
323
324	setOperationAction(Op: ISD::SMIN, VT: T, Action: Custom);
325	setOperationAction(Op: ISD::SMAX, VT: T, Action: Custom);
326	if (T.getScalarType() != MVT::i32) {
327	setOperationAction(Op: ISD::UMIN, VT: T, Action: Custom);
328	setOperationAction(Op: ISD::UMAX, VT: T, Action: Custom);
329	}
330
331	if (Subtarget.useHVXFloatingPoint()) {
332	// Same action for both QFloat and IEEE.
333	setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
334	setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
335	setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
336	setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
337	}
338	}
339
340	// Legalize all of these to HexagonISD::[SU]MUL_LOHI.
341	setOperationAction(Op: ISD::MULHS, VT: WordV, Action: Custom); // -> _LOHI
342	setOperationAction(Op: ISD::MULHU, VT: WordV, Action: Custom); // -> _LOHI
343	setOperationAction(Op: ISD::SMUL_LOHI, VT: WordV, Action: Custom);
344	setOperationAction(Op: ISD::UMUL_LOHI, VT: WordV, Action: Custom);
345
346	setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v64f16, Action: Expand);
347	setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v64f16, Action: Expand);
348	setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v64f16, Action: Expand);
349	setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v64f16, Action: Expand);
350	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v64f16, Action: Expand);
351	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v64f16, Action: Expand);
352	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v64f16, Action: Expand);
353	setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v64f16, Action: Expand);
354	setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v64f16, Action: Expand);
355	setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v64f16, Action: Expand);
356	setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v64f16, Action: Expand);
357	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v64f16, Action: Expand);
358	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v64f16, Action: Expand);
359	setCondCodeAction(CCs: ISD::SETO, VT: MVT::v64f16, Action: Expand);
360
361	setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v32f32, Action: Expand);
362	setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v32f32, Action: Expand);
363	setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v32f32, Action: Expand);
364	setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v32f32, Action: Expand);
365	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v32f32, Action: Expand);
366	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v32f32, Action: Expand);
367	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v32f32, Action: Expand);
368	setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v32f32, Action: Expand);
369	setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v32f32, Action: Expand);
370	setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v32f32, Action: Expand);
371	setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v32f32, Action: Expand);
372	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v32f32, Action: Expand);
373	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v32f32, Action: Expand);
374	setCondCodeAction(CCs: ISD::SETO, VT: MVT::v32f32, Action: Expand);
375
376	// Boolean vectors.
377
378	for (MVT T : LegalW) {
379	// Boolean types for vector pairs will overlap with the boolean
380	// types for single vectors, e.g.
381	// v64i8 -> v64i1 (single)
382	// v64i16 -> v64i1 (pair)
383	// Set these actions first, and allow the single actions to overwrite
384	// any duplicates.
385	MVT BoolW = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
386	setOperationAction(Op: ISD::SETCC, VT: BoolW, Action: Custom);
387	setOperationAction(Op: ISD::AND, VT: BoolW, Action: Custom);
388	setOperationAction(Op: ISD::OR, VT: BoolW, Action: Custom);
389	setOperationAction(Op: ISD::XOR, VT: BoolW, Action: Custom);
390	// Masked load/store takes a mask that may need splitting.
391	setOperationAction(Op: ISD::MLOAD, VT: BoolW, Action: Custom);
392	setOperationAction(Op: ISD::MSTORE, VT: BoolW, Action: Custom);
393	}
394
395	for (MVT T : LegalV) {
396	MVT BoolV = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
397	setOperationAction(Op: ISD::BUILD_VECTOR, VT: BoolV, Action: Custom);
398	setOperationAction(Op: ISD::CONCAT_VECTORS, VT: BoolV, Action: Custom);
399	setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: BoolV, Action: Custom);
400	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: BoolV, Action: Custom);
401	setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: BoolV, Action: Custom);
402	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: BoolV, Action: Custom);
403	setOperationAction(Op: ISD::SELECT, VT: BoolV, Action: Custom);
404	setOperationAction(Op: ISD::AND, VT: BoolV, Action: Legal);
405	setOperationAction(Op: ISD::OR, VT: BoolV, Action: Legal);
406	setOperationAction(Op: ISD::XOR, VT: BoolV, Action: Legal);
407	}
408
409	if (Use64b) {
410	for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
411	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
412	} else {
413	for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
414	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
415	}
416
417	// Handle store widening for short vectors.
418	unsigned HwLen = Subtarget.getVectorLength();
419	for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
420	if (ElemTy == MVT::i1)
421	continue;
422	int ElemWidth = ElemTy.getFixedSizeInBits();
423	int MaxElems = (`8`*HwLen) / ElemWidth;
424	for (int N = `2`; N < MaxElems; N *= `2`) {
425	MVT VecTy = MVT::getVectorVT(VT: ElemTy, NumElements: N);
426	auto Action = getPreferredVectorAction(VT: VecTy);
427	if (Action == TargetLoweringBase::TypeWidenVector) {
428	setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Custom);
429	setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Custom);
430	setOperationAction(Op: ISD::SETCC, VT: VecTy, Action: Custom);
431	setOperationAction(Op: ISD::TRUNCATE, VT: VecTy, Action: Custom);
432	setOperationAction(Op: ISD::ANY_EXTEND, VT: VecTy, Action: Custom);
433	setOperationAction(Op: ISD::SIGN_EXTEND, VT: VecTy, Action: Custom);
434	setOperationAction(Op: ISD::ZERO_EXTEND, VT: VecTy, Action: Custom);
435	if (Subtarget.useHVXFloatingPoint()) {
436	setOperationAction(Op: ISD::FP_TO_SINT, VT: VecTy, Action: Custom);
437	setOperationAction(Op: ISD::FP_TO_UINT, VT: VecTy, Action: Custom);
438	setOperationAction(Op: ISD::SINT_TO_FP, VT: VecTy, Action: Custom);
439	setOperationAction(Op: ISD::UINT_TO_FP, VT: VecTy, Action: Custom);
440	}
441
442	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: N);
443	if (!isTypeLegal(VT: BoolTy))
444	setOperationAction(Op: ISD::SETCC, VT: BoolTy, Action: Custom);
445	}
446	}
447	}
448
449	setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
450	}
451
452	unsigned
453	HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
454	MVT ElemTy = VecTy.getVectorElementType();
455	unsigned VecLen = VecTy.getVectorNumElements();
456	unsigned HwLen = Subtarget.getVectorLength();
457
458	// Split vectors of i1 that exceed byte vector length.
459	if (ElemTy == MVT::i1 && VecLen > HwLen)
460	return TargetLoweringBase::TypeSplitVector;
461
462	ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
463	// For shorter vectors of i1, widen them if any of the corresponding
464	// vectors of integers needs to be widened.
465	if (ElemTy == MVT::i1) {
466	for (MVT T : Tys) {
467	assert(T != MVT::i1);
468	auto A = getPreferredHvxVectorAction(VecTy: MVT::getVectorVT(VT: T, NumElements: VecLen));
469	if (A != ~`0u`)
470	return A;
471	}
472	return ~`0u`;
473	}
474
475	// If the size of VecTy is at least half of the vector length,
476	// widen the vector. Note: the threshold was not selected in
477	// any scientific way.
478	if (llvm::is_contained(Range&: Tys, Element: ElemTy)) {
479	unsigned VecWidth = VecTy.getSizeInBits();
480	unsigned HwWidth = `8`*HwLen;
481	if (VecWidth > `2`*HwWidth)
482	return TargetLoweringBase::TypeSplitVector;
483
484	bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > `0`;
485	if (HaveThreshold && `8`*HvxWidenThreshold <= VecWidth)
486	return TargetLoweringBase::TypeWidenVector;
487	if (VecWidth >= HwWidth/`2` && VecWidth < HwWidth)
488	return TargetLoweringBase::TypeWidenVector;
489	}
490
491	// Defer to default.
492	return ~`0u`;
493	}
494
495	unsigned
496	HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
497	unsigned Opc = Op.getOpcode();
498	switch (Opc) {
499	case HexagonISD::SMUL_LOHI:
500	case HexagonISD::UMUL_LOHI:
501	case HexagonISD::USMUL_LOHI:
502	return TargetLoweringBase::Custom;
503	}
504	return TargetLoweringBase::Legal;
505	}
506
507	SDValue
508	HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
509	const SDLoc &dl, SelectionDAG &DAG) const {
510	SmallVector<SDValue,`4`> IntOps;
511	IntOps.push_back(Elt: DAG.getConstant(Val: IntId, DL: dl, VT: MVT::i32));
512	append_range(C&: IntOps, R&: Ops);
513	return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: ResTy, Ops: IntOps);
514	}
515
516	MVT
517	HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
518	assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
519
520	MVT ElemTy = Tys.first.getVectorElementType();
521	return MVT::getVectorVT(VT: ElemTy, NumElements: Tys.first.getVectorNumElements() +
522	Tys.second.getVectorNumElements());
523	}
524
525	HexagonTargetLowering::TypePair
526	HexagonTargetLowering::typeSplit(MVT VecTy) const {
527	assert(VecTy.isVector());
528	unsigned NumElem = VecTy.getVectorNumElements();
529	assert((NumElem % `2`) == `0` && "Expecting even-sized vector type");
530	MVT HalfTy = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: NumElem/`2`);
531	return { HalfTy, HalfTy };
532	}
533
534	MVT
535	HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
536	MVT ElemTy = VecTy.getVectorElementType();
537	MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() * Factor);
538	return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
539	}
540
541	MVT
542	HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
543	MVT ElemTy = VecTy.getVectorElementType();
544	MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() / Factor);
545	return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
546	}
547
548	SDValue
549	HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
550	SelectionDAG &DAG) const {
551	if (ty(Op: Vec).getVectorElementType() == ElemTy)
552	return Vec;
553	MVT CastTy = tyVector(Ty: Vec.getValueType().getSimpleVT(), ElemTy);
554	return DAG.getBitcast(VT: CastTy, V: Vec);
555	}
556
557	SDValue
558	HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
559	SelectionDAG &DAG) const {
560	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: typeJoin(Tys: ty(Ops)),
561	N1: Ops.first, N2: Ops.second);
562	}
563
564	HexagonTargetLowering::VectorPair
565	HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
566	SelectionDAG &DAG) const {
567	TypePair Tys = typeSplit(VecTy: ty(Op: Vec));
568	if (Vec.getOpcode() == HexagonISD::QCAT)
569	return VectorPair (Vec.getOperand(i: `0`), Vec.getOperand(i: `1`));
570	return DAG.SplitVector(N: Vec, DL: dl, LoVT: Tys.first, HiVT: Tys.second);
571	}
572
573	bool
574	HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
575	return Subtarget.isHVXVectorType(VecTy: Ty) &&
576	Ty.getSizeInBits() == `8` * Subtarget.getVectorLength();
577	}
578
579	bool
580	HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
581	return Subtarget.isHVXVectorType(VecTy: Ty) &&
582	Ty.getSizeInBits() == `16` * Subtarget.getVectorLength();
583	}
584
585	bool
586	HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
587	return Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true) &&
588	Ty.getVectorElementType() == MVT::i1;
589	}
590
591	bool HexagonTargetLowering::allowsHvxMemoryAccess(
592	MVT VecTy, MachineMemOperand::Flags Flags, unsigned Fast) const* {
593	// Bool vectors are excluded by default, but make it explicit to
594	// emphasize that bool vectors cannot be loaded or stored.
595	// Also, disallow double vector stores (to prevent unnecessary
596	// store widening in DAG combiner).
597	if (VecTy.getSizeInBits() > `8`*Subtarget.getVectorLength())
598	return false;
599	if (!Subtarget.isHVXVectorType(VecTy, /IncludeBool=/false))
600	return false;
601	if (Fast)
602	*Fast = `1`;
603	return true;
604	}
605
606	bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
607	MVT VecTy, MachineMemOperand::Flags Flags, unsigned Fast) const* {
608	if (!Subtarget.isHVXVectorType(VecTy))
609	return false;
610	// XXX Should this be false? vmemu are a bit slower than vmem.
611	if (Fast)
612	*Fast = `1`;
613	return true;
614	}
615
616	void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
617	MachineInstr &MI, SDNode Node) const* {
618	unsigned Opc = MI.getOpcode();
619	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
620	MachineBasicBlock &MB = *MI.getParent();
621	MachineFunction &MF = *MB.getParent();
622	MachineRegisterInfo &MRI = MF.getRegInfo();
623	DebugLoc DL = MI.getDebugLoc();
624	auto At = MI.getIterator();
625
626	switch (Opc) {
627	case Hexagon::PS_vsplatib:
628	if (Subtarget.useHVXV62Ops()) {
629	// SplatV = A2_tfrsi #imm
630	// OutV = V6_lvsplatb SplatV
631	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
632	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
633	.add(MO: MI.getOperand(i: `1`));
634	Register OutV = MI.getOperand(i: `0`).getReg();
635	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
636	.addReg(RegNo: SplatV);
637	} else {
638	// SplatV = A2_tfrsi #imm:#imm:#imm:#imm
639	// OutV = V6_lvsplatw SplatV
640	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
641	const MachineOperand &InpOp = MI.getOperand(i: `1`);
642	assert(InpOp.isImm());
643	uint32_t V = InpOp.getImm() & `0xFF`;
644	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
645	.addImm(Val: V << `24` \| V << `16` \| V << `8` \| V);
646	Register OutV = MI.getOperand(i: `0`).getReg();
647	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
648	}
649	MB.erase(I: At);
650	break;
651	case Hexagon::PS_vsplatrb:
652	if (Subtarget.useHVXV62Ops()) {
653	// OutV = V6_lvsplatb Inp
654	Register OutV = MI.getOperand(i: `0`).getReg();
655	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
656	.add(MO: MI.getOperand(i: `1`));
657	} else {
658	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
659	const MachineOperand &InpOp = MI.getOperand(i: `1`);
660	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::S2_vsplatrb), DestReg: SplatV)
661	.addReg(RegNo: InpOp.getReg(), flags: `0`, SubReg: InpOp.getSubReg());
662	Register OutV = MI.getOperand(i: `0`).getReg();
663	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV)
664	.addReg(RegNo: SplatV);
665	}
666	MB.erase(I: At);
667	break;
668	case Hexagon::PS_vsplatih:
669	if (Subtarget.useHVXV62Ops()) {
670	// SplatV = A2_tfrsi #imm
671	// OutV = V6_lvsplath SplatV
672	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
673	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
674	.add(MO: MI.getOperand(i: `1`));
675	Register OutV = MI.getOperand(i: `0`).getReg();
676	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
677	.addReg(RegNo: SplatV);
678	} else {
679	// SplatV = A2_tfrsi #imm:#imm
680	// OutV = V6_lvsplatw SplatV
681	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
682	const MachineOperand &InpOp = MI.getOperand(i: `1`);
683	assert(InpOp.isImm());
684	uint32_t V = InpOp.getImm() & `0xFFFF`;
685	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
686	.addImm(Val: V << `16` \| V);
687	Register OutV = MI.getOperand(i: `0`).getReg();
688	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
689	}
690	MB.erase(I: At);
691	break;
692	case Hexagon::PS_vsplatrh:
693	if (Subtarget.useHVXV62Ops()) {
694	// OutV = V6_lvsplath Inp
695	Register OutV = MI.getOperand(i: `0`).getReg();
696	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
697	.add(MO: MI.getOperand(i: `1`));
698	} else {
699	// SplatV = A2_combine_ll Inp, Inp
700	// OutV = V6_lvsplatw SplatV
701	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
702	const MachineOperand &InpOp = MI.getOperand(i: `1`);
703	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_combine_ll), DestReg: SplatV)
704	.addReg(RegNo: InpOp.getReg(), flags: `0`, SubReg: InpOp.getSubReg())
705	.addReg(RegNo: InpOp.getReg(), flags: `0`, SubReg: InpOp.getSubReg());
706	Register OutV = MI.getOperand(i: `0`).getReg();
707	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
708	}
709	MB.erase(I: At);
710	break;
711	case Hexagon::PS_vsplatiw:
712	case Hexagon::PS_vsplatrw:
713	if (Opc == Hexagon::PS_vsplatiw) {
714	// SplatV = A2_tfrsi #imm
715	Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
716	BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
717	.add(MO: MI.getOperand(i: `1`));
718	MI.getOperand(i: `1`).ChangeToRegister(Reg: SplatV, isDef: false);
719	}
720	// OutV = V6_lvsplatw SplatV/Inp
721	MI.setDesc(TII.get(Opcode: Hexagon::V6_lvsplatw));
722	break;
723	}
724	}
725
726	SDValue
727	HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
728	SelectionDAG &DAG) const {
729	if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
730	ElemIdx = DAG.getBitcast(VT: MVT::i32, V: ElemIdx);
731
732	unsigned ElemWidth = ElemTy.getSizeInBits();
733	if (ElemWidth == `8`)
734	return ElemIdx;
735
736	unsigned L = Log2_32(Value: ElemWidth/`8`);
737	const SDLoc &dl(ElemIdx);
738	return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32,
739	Ops: {ElemIdx, DAG.getConstant(Val: L, DL: dl, VT: MVT::i32)});
740	}
741
742	SDValue
743	HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
744	SelectionDAG &DAG) const {
745	unsigned ElemWidth = ElemTy.getSizeInBits();
746	assert(ElemWidth >= `8` && ElemWidth <= `32`);
747	if (ElemWidth == `32`)
748	return Idx;
749
750	if (ty(Op: Idx) != MVT::i32)
751	Idx = DAG.getBitcast(VT: MVT::i32, V: Idx);
752	const SDLoc &dl(Idx);
753	SDValue Mask = DAG.getConstant(Val: `32`/ElemWidth - `1`, DL: dl, VT: MVT::i32);
754	SDValue SubIdx = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, Ops: {Idx, Mask});
755	return SubIdx;
756	}
757
758	SDValue
759	HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
760	SDValue Op1, ArrayRef<int> Mask,
761	SelectionDAG &DAG) const {
762	MVT OpTy = ty(Op: Op0);
763	assert(OpTy == ty(Op1));
764
765	MVT ElemTy = OpTy.getVectorElementType();
766	if (ElemTy == MVT::i8)
767	return DAG.getVectorShuffle(VT: OpTy, dl, N1: Op0, N2: Op1, Mask);
768	assert(ElemTy.getSizeInBits() >= `8`);
769
770	MVT ResTy = tyVector(Ty: OpTy, ElemTy: MVT::i8);
771	unsigned ElemSize = ElemTy.getSizeInBits() / `8`;
772
773	SmallVector<int,`128`> ByteMask;
774	for (int M : Mask) {
775	if (M < `0`) {
776	for (unsigned I = `0`; I != ElemSize; ++I)
777	ByteMask.push_back(Elt: -`1`);
778	} else {
779	int NewM = M*ElemSize;
780	for (unsigned I = `0`; I != ElemSize; ++I)
781	ByteMask.push_back(Elt: NewM+I);
782	}
783	}
784	assert(ResTy.getVectorNumElements() == ByteMask.size());
785	return DAG.getVectorShuffle(VT: ResTy, dl, N1: opCastElem(Vec: Op0, ElemTy: MVT::i8, DAG),
786	N2: opCastElem(Vec: Op1, ElemTy: MVT::i8, DAG), Mask: ByteMask);
787	}
788
789	SDValue
790	HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
791	const SDLoc &dl, MVT VecTy,
792	SelectionDAG &DAG) const {
793	unsigned VecLen = Values.size();
794	MachineFunction &MF = DAG.getMachineFunction();
795	MVT ElemTy = VecTy.getVectorElementType();
796	unsigned ElemWidth = ElemTy.getSizeInBits();
797	unsigned HwLen = Subtarget.getVectorLength();
798
799	unsigned ElemSize = ElemWidth / `8`;
800	assert(ElemSize*VecLen == HwLen);
801	SmallVector<SDValue,`32`> Words;
802
803	if (VecTy.getVectorElementType() != MVT::i32 &&
804	!(Subtarget.useHVXFloatingPoint() &&
805	VecTy.getVectorElementType() == MVT::f32)) {
806	assert((ElemSize == `1` \|\| ElemSize == `2`) && "Invalid element size");
807	unsigned OpsPerWord = (ElemSize == `1`) ? `4` : `2`;
808	MVT PartVT = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: OpsPerWord);
809	for (unsigned i = `0`; i != VecLen; i += OpsPerWord) {
810	SDValue W = buildVector32(Elem: Values.slice(N: i, M: OpsPerWord), dl, VecTy: PartVT, DAG);
811	Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V: W));
812	}
813	} else {
814	for (SDValue V : Values)
815	Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V));
816	}
817	auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
818	unsigned NumValues = Values.size();
819	assert(NumValues > `0`);
820	bool IsUndef = true;
821	for (unsigned i = `0`; i != NumValues; ++i) {
822	if (Values [i].isUndef())
823	continue;
824	IsUndef = false;
825	if (!SplatV.getNode())
826	SplatV = Values [i];
827	else if (SplatV != Values [i])
828	return false;
829	}
830	if (IsUndef)
831	SplatV = Values [`0`];
832	return true;
833	};
834
835	unsigned NumWords = Words.size();
836	SDValue SplatV;
837	bool IsSplat = isSplat (Words, SplatV);
838	if (IsSplat && isUndef(Op: SplatV))
839	return DAG.getUNDEF(VT: VecTy);
840	if (IsSplat) {
841	assert(SplatV.getNode());
842	if (isNullConstant(V: SplatV))
843	return getZero(dl, Ty: VecTy, DAG);
844	MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/`4`);
845	SDValue S = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: WordTy, Operand: SplatV);
846	return DAG.getBitcast(VT: VecTy, V: S);
847	}
848
849	// Delay recognizing constant vectors until here, so that we can generate
850	// a vsplat.
851	SmallVector<ConstantInt*, `128`> Consts(VecLen);
852	bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
853	if (AllConst) {
854	ArrayRef<Constant> Tmp((Constant*)Consts.begin(),
855	(Constant**)Consts.end());
856	Constant *CV = ConstantVector::get(V: Tmp);
857	Align Alignment(HwLen);
858	SDValue CP =
859	LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: VecTy, Align: Alignment), DAG);
860	return DAG.getLoad(VT: VecTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
861	PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
862	}
863
864	// A special case is a situation where the vector is built entirely from
865	// elements extracted from another vector. This could be done via a shuffle
866	// more efficiently, but typically, the size of the source vector will not
867	// match the size of the vector being built (which precludes the use of a
868	// shuffle directly).
869	// This only handles a single source vector, and the vector being built
870	// should be of a sub-vector type of the source vector type.
871	auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
872	SmallVectorImpl<int> &SrcIdx) {
873	SDValue Vec;
874	for (SDValue V : Values) {
875	if (isUndef(Op: V)) {
876	SrcIdx.push_back(Elt: -`1`);
877	continue;
878	}
879	if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
880	return false;
881	// All extracts should come from the same vector.
882	SDValue T = V.getOperand(i: `0`);
883	if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
884	return false;
885	Vec = T;
886	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: `1`));
887	if (C == nullptr)
888	return false;
889	int I = C->getSExtValue();
890	assert(I >= `0` && "Negative element index");
891	SrcIdx.push_back(Elt: I);
892	}
893	SrcVec = Vec;
894	return true;
895	};
896
897	SmallVector<int,`128`> ExtIdx;
898	SDValue ExtVec;
899	if (IsBuildFromExtracts (ExtVec, ExtIdx)) {
900	MVT ExtTy = ty(Op: ExtVec);
901	unsigned ExtLen = ExtTy.getVectorNumElements();
902	if (ExtLen == VecLen \|\| ExtLen == `2`*VecLen) {
903	// Construct a new shuffle mask that will produce a vector with the same
904	// number of elements as the input vector, and such that the vector we
905	// want will be the initial subvector of it.
906	SmallVector<int,`128`> Mask;
907	BitVector Used(ExtLen);
908
909	for (int M : ExtIdx) {
910	Mask.push_back(Elt: M);
911	if (M >= `0`)
912	Used.set(M);
913	}
914	// Fill the rest of the mask with the unused elements of ExtVec in hopes
915	// that it will result in a permutation of ExtVec's elements. It's still
916	// fine if it doesn't (e.g. if undefs are present, or elements are
917	// repeated), but permutations can always be done efficiently via vdelta
918	// and vrdelta.
919	for (unsigned I = `0`; I != ExtLen; ++I) {
920	if (Mask.size() == ExtLen)
921	break;
922	if (!Used.test(Idx: I))
923	Mask.push_back(Elt: I);
924	}
925
926	SDValue S = DAG.getVectorShuffle(VT: ExtTy, dl, N1: ExtVec,
927	N2: DAG.getUNDEF(VT: ExtTy), Mask);
928	return ExtLen == VecLen ? S : LoHalf(V: S, DAG);
929	}
930	}
931
932	// Find most common element to initialize vector with. This is to avoid
933	// unnecessary vinsert/valign for cases where the same value is present
934	// many times. Creates a histogram of the vector's elements to find the
935	// most common element n.
936	assert(`4`*Words.size() == Subtarget.getVectorLength());
937	int VecHist[`32`];
938	int n = `0`;
939	for (unsigned i = `0`; i != NumWords; ++i) {
940	VecHist[i] = `0`;
941	if (Words [i].isUndef())
942	continue;
943	for (unsigned j = i; j != NumWords; ++j)
944	if (Words [i] == Words [j])
945	VecHist[i]++;
946
947	if (VecHist[i] > VecHist[n])
948	n = i;
949	}
950
951	SDValue HalfV = getZero(dl, Ty: VecTy, DAG);
952	if (VecHist[n] > `1`) {
953	SDValue SplatV = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Words [n]);
954	HalfV = DAG.getNode(Opcode: HexagonISD::VALIGN, DL: dl, VT: VecTy,
955	Ops: {HalfV, SplatV, DAG.getConstant(Val: HwLen/`2`, DL: dl, VT: MVT::i32)});
956	}
957	SDValue HalfV0 = HalfV;
958	SDValue HalfV1 = HalfV;
959
960	// Construct two halves in parallel, then or them together. Rn and Rm count
961	// number of rotations needed before the next element. One last rotation is
962	// performed post-loop to position the last element.
963	int Rn = `0`, Rm = `0`;
964	SDValue Sn, Sm;
965	SDValue N = HalfV0;
966	SDValue M = HalfV1;
967	for (unsigned i = `0`; i != NumWords/`2`; ++i) {
968	// Rotate by element count since last insertion.
969	if (Words [i] != Words [n] \|\| VecHist[n] <= `1`) {
970	Sn = DAG.getConstant(Val: Rn, DL: dl, VT: MVT::i32);
971	HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
972	N = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
973	Ops: {HalfV0, Words [i]});
974	Rn = `0`;
975	}
976	if (Words [i+NumWords/`2`] != Words [n] \|\| VecHist[n] <= `1`) {
977	Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
978	HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
979	M = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
980	Ops: {HalfV1, Words [i+NumWords/`2`]});
981	Rm = `0`;
982	}
983	Rn += `4`;
984	Rm += `4`;
985	}
986	// Perform last rotation.
987	Sn = DAG.getConstant(Val: Rn+HwLen/`2`, DL: dl, VT: MVT::i32);
988	Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
989	HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
990	HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
991
992	SDValue T0 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV0);
993	SDValue T1 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV1);
994
995	SDValue DstV = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ty(Op: T0), Ops: {T0, T1});
996
997	SDValue OutV =
998	DAG.getBitcast(VT: tyVector(Ty: ty(Op: DstV), ElemTy: VecTy.getVectorElementType()), V: DstV);
999	return OutV;
1000	}
1001
1002	SDValue
1003	HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1004	unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1005	MVT PredTy = ty(Op: PredV);
1006	unsigned HwLen = Subtarget.getVectorLength();
1007	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1008
1009	if (Subtarget.isHVXVectorType(VecTy: PredTy, IncludeBool: true)) {
1010	// Move the vector predicate SubV to a vector register, and scale it
1011	// down to match the representation (bytes per type element) that VecV
1012	// uses. The scaling down will pick every 2nd or 4th (every Scale-th
1013	// in general) element and put them at the front of the resulting
1014	// vector. This subvector will then be inserted into the Q2V of VecV.
1015	// To avoid having an operation that generates an illegal type (short
1016	// vector), generate a full size vector.
1017	//
1018	SDValue T = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: PredV);
1019	SmallVector<int,`128`> Mask(HwLen);
1020	// Scale = BitBytes(PredV) / Given BitBytes.
1021	unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1022	unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1023
1024	for (unsigned i = `0`; i != HwLen; ++i) {
1025	unsigned Num = i % Scale;
1026	unsigned Off = i / Scale;
1027	Mask [BlockLen*Num + Off] = i;
1028	}
1029	SDValue S = DAG.getVectorShuffle(VT: ByteTy, dl, N1: T, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1030	if (!ZeroFill)
1031	return S;
1032	// Fill the bytes beyond BlockLen with 0s.
1033	// V6_pred_scalar2 cannot fill the entire predicate, so it only works
1034	// when BlockLen < HwLen.
1035	assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1036	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1037	SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1038	Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1039	SDValue M = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Q);
1040	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ByteTy, N1: S, N2: M);
1041	}
1042
1043	// Make sure that this is a valid scalar predicate.
1044	assert(PredTy == MVT::v2i1 \|\| PredTy == MVT::v4i1 \|\| PredTy == MVT::v8i1);
1045
1046	unsigned Bytes = `8` / PredTy.getVectorNumElements();
1047	SmallVector<SDValue,`4`> Words[`2`];
1048	unsigned IdxW = `0`;
1049
1050	SDValue W0 = isUndef(Op: PredV)
1051	? DAG.getUNDEF(VT: MVT::i64)
1052	: DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: PredV);
1053	Words[IdxW].push_back(Elt: HiHalf(V: W0, DAG));
1054	Words[IdxW].push_back(Elt: LoHalf(V: W0, DAG));
1055
1056	while (Bytes < BitBytes) {
1057	IdxW ^= `1`;
1058	Words[IdxW].clear();
1059
1060	if (Bytes < `4`) {
1061	for (const SDValue &W : Words[IdxW ^ `1`]) {
1062	SDValue T = expandPredicate(Vec32: W, dl, DAG);
1063	Words[IdxW].push_back(Elt: HiHalf(V: T, DAG));
1064	Words[IdxW].push_back(Elt: LoHalf(V: T, DAG));
1065	}
1066	} else {
1067	for (const SDValue &W : Words[IdxW ^ `1`]) {
1068	Words[IdxW].push_back(Elt: W);
1069	Words[IdxW].push_back(Elt: W);
1070	}
1071	}
1072	Bytes *= `2`;
1073	}
1074
1075	assert(Bytes == BitBytes);
1076
1077	SDValue Vec = ZeroFill ? getZero(dl, Ty: ByteTy, DAG) : DAG.getUNDEF(VT: ByteTy);
1078	SDValue S4 = DAG.getConstant(Val: HwLen-`4`, DL: dl, VT: MVT::i32);
1079	for (const SDValue &W : Words[IdxW]) {
1080	Vec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Vec, N2: S4);
1081	Vec = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: ByteTy, N1: Vec, N2: W);
1082	}
1083
1084	return Vec;
1085	}
1086
1087	SDValue
1088	HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1089	const SDLoc &dl, MVT VecTy,
1090	SelectionDAG &DAG) const {
1091	// Construct a vector V of bytes, such that a comparison V >u 0 would
1092	// produce the required vector predicate.
1093	unsigned VecLen = Values.size();
1094	unsigned HwLen = Subtarget.getVectorLength();
1095	assert(VecLen <= HwLen \|\| VecLen == `8`*HwLen);
1096	SmallVector<SDValue,`128`> Bytes;
1097	bool AllT = true, AllF = true;
1098
1099	auto IsTrue = [] (SDValue V) {
1100	if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1101	return !N->isZero();
1102	return false;
1103	};
1104	auto IsFalse = [] (SDValue V) {
1105	if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1106	return N->isZero();
1107	return false;
1108	};
1109
1110	if (VecLen <= HwLen) {
1111	// In the hardware, each bit of a vector predicate corresponds to a byte
1112	// of a vector register. Calculate how many bytes does a bit of VecTy
1113	// correspond to.
1114	assert(HwLen % VecLen == `0`);
1115	unsigned BitBytes = HwLen / VecLen;
1116	for (SDValue V : Values) {
1117	AllT &= IsTrue (V);
1118	AllF &= IsFalse (V);
1119
1120	SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(Op: V, DL: dl, VT: MVT::i8)
1121	: DAG.getUNDEF(VT: MVT::i8);
1122	for (unsigned B = `0`; B != BitBytes; ++B)
1123	Bytes.push_back(Elt: Ext);
1124	}
1125	} else {
1126	// There are as many i1 values, as there are bits in a vector register.
1127	// Divide the values into groups of 8 and check that each group consists
1128	// of the same value (ignoring undefs).
1129	for (unsigned I = `0`; I != VecLen; I += `8`) {
1130	unsigned B = `0`;
1131	// Find the first non-undef value in this group.
1132	for (; B != `8`; ++B) {
1133	if (!Values [I+B].isUndef())
1134	break;
1135	}
1136	SDValue F = Values [I+B];
1137	AllT &= IsTrue (F);
1138	AllF &= IsFalse (F);
1139
1140	SDValue Ext = (B < `8`) ? DAG.getZExtOrTrunc(Op: F, DL: dl, VT: MVT::i8)
1141	: DAG.getUNDEF(VT: MVT::i8);
1142	Bytes.push_back(Elt: Ext);
1143	// Verify that the rest of values in the group are the same as the
1144	// first.
1145	for (; B != `8`; ++B)
1146	assert(Values[I+B].isUndef() \|\| Values[I+B] == F);
1147	}
1148	}
1149
1150	if (AllT)
1151	return DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: VecTy);
1152	if (AllF)
1153	return DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: VecTy);
1154
1155	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1156	SDValue ByteVec = buildHvxVectorReg(Values: Bytes, dl, VecTy: ByteTy, DAG);
1157	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1158	}
1159
1160	SDValue
1161	HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1162	const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1163	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1164
1165	unsigned ElemWidth = ElemTy.getSizeInBits();
1166	assert(ElemWidth >= `8` && ElemWidth <= `32`);
1167	(void)ElemWidth;
1168
1169	SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1170	SDValue ExWord = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1171	Ops: {VecV, ByteIdx});
1172	if (ElemTy == MVT::i32)
1173	return ExWord;
1174
1175	// Have an extracted word, need to extract the smaller element out of it.
1176	// 1. Extract the bits of (the original) IdxV that correspond to the index
1177	// of the desired element in the 32-bit word.
1178	SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1179	// 2. Extract the element from the word.
1180	SDValue ExVec = DAG.getBitcast(VT: tyVector(Ty: ty(Op: ExWord), ElemTy), V: ExWord);
1181	return extractVector(VecV: ExVec, IdxV: SubIdx, dl, ValTy: ElemTy, ResTy: MVT::i32, DAG);
1182	}
1183
1184	SDValue
1185	HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1186	const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1187	// Implement other return types if necessary.
1188	assert(ResTy == MVT::i1);
1189
1190	unsigned HwLen = Subtarget.getVectorLength();
1191	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1192	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1193
1194	unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1195	SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1196	IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1197
1198	SDValue ExtB = extractHvxElementReg(VecV: ByteVec, IdxV, dl, ResTy: MVT::i32, DAG);
1199	SDValue Zero = DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32);
1200	return getInstr(MachineOpc: Hexagon::C2_cmpgtui, dl, Ty: MVT::i1, Ops: {ExtB, Zero}, DAG);
1201	}
1202
1203	SDValue
1204	HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1205	SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1206	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1207
1208	unsigned ElemWidth = ElemTy.getSizeInBits();
1209	assert(ElemWidth >= `8` && ElemWidth <= `32`);
1210	(void)ElemWidth;
1211
1212	auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1213	SDValue ByteIdxV) {
1214	MVT VecTy = ty(Op: VecV);
1215	unsigned HwLen = Subtarget.getVectorLength();
1216	SDValue MaskV =
1217	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
1218	Ops: {ByteIdxV, DAG.getSignedConstant(Val: -`4`, DL: dl, VT: MVT::i32)});
1219	SDValue RotV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {VecV, MaskV});
1220	SDValue InsV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, Ops: {RotV, ValV});
1221	SDValue SubV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1222	Ops: {DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32), MaskV});
1223	SDValue TorV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {InsV, SubV});
1224	return TorV;
1225	};
1226
1227	SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1228	if (ElemTy == MVT::i32)
1229	return InsertWord (VecV, ValV, ByteIdx);
1230
1231	// If this is not inserting a 32-bit word, convert it into such a thing.
1232	// 1. Extract the existing word from the target vector.
1233	SDValue WordIdx = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
1234	Ops: {ByteIdx, DAG.getConstant(Val: `2`, DL: dl, VT: MVT::i32)});
1235	SDValue Ext = extractHvxElementReg(VecV: opCastElem(Vec: VecV, ElemTy: MVT::i32, DAG), IdxV: WordIdx,
1236	dl, ResTy: MVT::i32, DAG);
1237
1238	// 2. Treating the extracted word as a 32-bit vector, insert the given
1239	// value into it.
1240	SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1241	MVT SubVecTy = tyVector(Ty: ty(Op: Ext), ElemTy);
1242	SDValue Ins = insertVector(VecV: DAG.getBitcast(VT: SubVecTy, V: Ext),
1243	ValV, IdxV: SubIdx, dl, ValTy: ElemTy, DAG);
1244
1245	// 3. Insert the 32-bit word back into the original vector.
1246	return InsertWord (VecV, Ins, ByteIdx);
1247	}
1248
1249	SDValue
1250	HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1251	SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1252	unsigned HwLen = Subtarget.getVectorLength();
1253	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1254	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1255
1256	unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1257	SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1258	IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1259	ValV = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i32, Operand: ValV);
1260
1261	SDValue InsV = insertHvxElementReg(VecV: ByteVec, IdxV, ValV, dl, DAG);
1262	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ty(Op: VecV), Operand: InsV);
1263	}
1264
1265	SDValue
1266	HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1267	SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1268	MVT VecTy = ty(Op: VecV);
1269	unsigned HwLen = Subtarget.getVectorLength();
1270	unsigned Idx = IdxV.getNode()->getAsZExtVal();
1271	MVT ElemTy = VecTy.getVectorElementType();
1272	unsigned ElemWidth = ElemTy.getSizeInBits();
1273
1274	// If the source vector is a vector pair, get the single vector containing
1275	// the subvector of interest. The subvector will never overlap two single
1276	// vectors.
1277	if (isHvxPairTy(Ty: VecTy)) {
1278	unsigned SubIdx = Hexagon::vsub_lo;
1279	if (Idx * ElemWidth >= `8` * HwLen) {
1280	SubIdx = Hexagon::vsub_hi;
1281	Idx -= VecTy.getVectorNumElements() / `2`;
1282	}
1283
1284	VecTy = typeSplit(VecTy).first;
1285	VecV = DAG.getTargetExtractSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV);
1286	if (VecTy == ResTy)
1287	return VecV;
1288	}
1289
1290	// The only meaningful subvectors of a single HVX vector are those that
1291	// fit in a scalar register.
1292	assert(ResTy.getSizeInBits() == `32` \|\| ResTy.getSizeInBits() == `64`);
1293
1294	MVT WordTy = tyVector(Ty: VecTy, ElemTy: MVT::i32);
1295	SDValue WordVec = DAG.getBitcast(VT: WordTy, V: VecV);
1296	unsigned WordIdx = (Idx*ElemWidth) / `32`;
1297
1298	SDValue W0Idx = DAG.getConstant(Val: WordIdx, DL: dl, VT: MVT::i32);
1299	SDValue W0 = extractHvxElementReg(VecV: WordVec, IdxV: W0Idx, dl, ResTy: MVT::i32, DAG);
1300	if (ResTy.getSizeInBits() == `32`)
1301	return DAG.getBitcast(VT: ResTy, V: W0);
1302
1303	SDValue W1Idx = DAG.getConstant(Val: WordIdx+`1`, DL: dl, VT: MVT::i32);
1304	SDValue W1 = extractHvxElementReg(VecV: WordVec, IdxV: W1Idx, dl, ResTy: MVT::i32, DAG);
1305	SDValue WW = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::i64, DAG);
1306	return DAG.getBitcast(VT: ResTy, V: WW);
1307	}
1308
1309	SDValue
1310	HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1311	const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1312	MVT VecTy = ty(Op: VecV);
1313	unsigned HwLen = Subtarget.getVectorLength();
1314	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1315	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1316	// IdxV is required to be a constant.
1317	unsigned Idx = IdxV.getNode()->getAsZExtVal();
1318
1319	unsigned ResLen = ResTy.getVectorNumElements();
1320	unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1321	unsigned Offset = Idx * BitBytes;
1322	SDValue Undef = DAG.getUNDEF(VT: ByteTy);
1323	SmallVector<int,`128`> Mask;
1324
1325	if (Subtarget.isHVXVectorType(VecTy: ResTy, IncludeBool: true)) {
1326	// Converting between two vector predicates. Since the result is shorter
1327	// than the source, it will correspond to a vector predicate with the
1328	// relevant bits replicated. The replication count is the ratio of the
1329	// source and target vector lengths.
1330	unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1331	assert(isPowerOf2_32(Rep) && HwLen % Rep == `0`);
1332	for (unsigned i = `0`; i != HwLen/Rep; ++i) {
1333	for (unsigned j = `0`; j != Rep; ++j)
1334	Mask.push_back(Elt: i + Offset);
1335	}
1336	SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1337	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: ShuffV);
1338	}
1339
1340	// Converting between a vector predicate and a scalar predicate. In the
1341	// vector predicate, a group of BitBytes bits will correspond to a single
1342	// i1 element of the source vector type. Those bits will all have the same
1343	// value. The same will be true for ByteVec, where each byte corresponds
1344	// to a bit in the vector predicate.
1345	// The algorithm is to traverse the ByteVec, going over the i1 values from
1346	// the source vector, and generate the corresponding representation in an
1347	// 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1348	// elements so that the interesting 8 bytes will be in the low end of the
1349	// vector.
1350	unsigned Rep = `8` / ResLen;
1351	// Make sure the output fill the entire vector register, so repeat the
1352	// 8-byte groups as many times as necessary.
1353	for (unsigned r = `0`; r != HwLen/ResLen; ++r) {
1354	// This will generate the indexes of the 8 interesting bytes.
1355	for (unsigned i = `0`; i != ResLen; ++i) {
1356	for (unsigned j = `0`; j != Rep; ++j)
1357	Mask.push_back(Elt: Offset + i*BitBytes);
1358	}
1359	}
1360
1361	SDValue Zero = getZero(dl, Ty: MVT::i32, DAG);
1362	SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1363	// Combine the two low words from ShuffV into a v8i8, and byte-compare
1364	// them against 0.
1365	SDValue W0 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, Ops: {ShuffV, Zero});
1366	SDValue W1 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1367	Ops: {ShuffV, DAG.getConstant(Val: `4`, DL: dl, VT: MVT::i32)});
1368	SDValue Vec64 = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::v8i8, DAG);
1369	return getInstr(MachineOpc: Hexagon::A4_vcmpbgtui, dl, Ty: ResTy,
1370	Ops: {Vec64, DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32)}, DAG);
1371	}
1372
1373	SDValue
1374	HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1375	SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1376	MVT VecTy = ty(Op: VecV);
1377	MVT SubTy = ty(Op: SubV);
1378	unsigned HwLen = Subtarget.getVectorLength();
1379	MVT ElemTy = VecTy.getVectorElementType();
1380	unsigned ElemWidth = ElemTy.getSizeInBits();
1381
1382	bool IsPair = isHvxPairTy(Ty: VecTy);
1383	MVT SingleTy = MVT::getVectorVT(VT: ElemTy, NumElements: (`8`*HwLen)/ElemWidth);
1384	// The two single vectors that VecV consists of, if it's a pair.
1385	SDValue V0, V1;
1386	SDValue SingleV = VecV;
1387	SDValue PickHi;
1388
1389	if (IsPair) {
1390	V0 = LoHalf(V: VecV, DAG);
1391	V1 = HiHalf(V: VecV, DAG);
1392
1393	SDValue HalfV = DAG.getConstant(Val: SingleTy.getVectorNumElements(),
1394	DL: dl, VT: MVT::i32);
1395	PickHi = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: IdxV, RHS: HalfV, Cond: ISD::SETUGT);
1396	if (isHvxSingleTy(Ty: SubTy)) {
1397	if (const auto CN = dyn_cast<const* ConstantSDNode>(Val: IdxV.getNode())) {
1398	unsigned Idx = CN->getZExtValue();
1399	assert(Idx == `0` \|\| Idx == VecTy.getVectorNumElements()/`2`);
1400	unsigned SubIdx = (Idx == `0`) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1401	return DAG.getTargetInsertSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV, Subreg: SubV);
1402	}
1403	// If IdxV is not a constant, generate the two variants: with the
1404	// SubV as the high and as the low subregister, and select the right
1405	// pair based on the IdxV.
1406	SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SubV, V1});
1407	SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SubV});
1408	return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1409	}
1410	// The subvector being inserted must be entirely contained in one of
1411	// the vectors V0 or V1. Set SingleV to the correct one, and update
1412	// IdxV to be the index relative to the beginning of that vector.
1413	SDValue S = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: IdxV, N2: HalfV);
1414	IdxV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: PickHi, N2: S, N3: IdxV);
1415	SingleV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: SingleTy, N1: PickHi, N2: V1, N3: V0);
1416	}
1417
1418	// The only meaningful subvectors of a single HVX vector are those that
1419	// fit in a scalar register.
1420	assert(SubTy.getSizeInBits() == `32` \|\| SubTy.getSizeInBits() == `64`);
1421	// Convert IdxV to be index in bytes.
1422	auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1423	if (!IdxN \|\| !IdxN->isZero()) {
1424	IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1425	N2: DAG.getConstant(Val: ElemWidth/`8`, DL: dl, VT: MVT::i32));
1426	SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: IdxV);
1427	}
1428	// When inserting a single word, the rotation back to the original position
1429	// would be by HwLen-Idx, but if two words are inserted, it will need to be
1430	// by (HwLen-4)-Idx.
1431	unsigned RolBase = HwLen;
1432	if (SubTy.getSizeInBits() == `32`) {
1433	SDValue V = DAG.getBitcast(VT: MVT::i32, V: SubV);
1434	SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: V);
1435	} else {
1436	SDValue V = DAG.getBitcast(VT: MVT::i64, V: SubV);
1437	SDValue R0 = LoHalf(V, DAG);
1438	SDValue R1 = HiHalf(V, DAG);
1439	SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R0);
1440	SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV,
1441	N2: DAG.getConstant(Val: `4`, DL: dl, VT: MVT::i32));
1442	SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R1);
1443	RolBase = HwLen-`4`;
1444	}
1445	// If the vector wasn't ror'ed, don't ror it back.
1446	if (RolBase != `4` \|\| !IdxN \|\| !IdxN->isZero()) {
1447	SDValue RolV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1448	N1: DAG.getConstant(Val: RolBase, DL: dl, VT: MVT::i32), N2: IdxV);
1449	SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: RolV);
1450	}
1451
1452	if (IsPair) {
1453	SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SingleV, V1});
1454	SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SingleV});
1455	return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1456	}
1457	return SingleV;
1458	}
1459
1460	SDValue
1461	HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1462	SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1463	MVT VecTy = ty(Op: VecV);
1464	MVT SubTy = ty(Op: SubV);
1465	assert(Subtarget.isHVXVectorType(VecTy, true));
1466	// VecV is an HVX vector predicate. SubV may be either an HVX vector
1467	// predicate as well, or it can be a scalar predicate.
1468
1469	unsigned VecLen = VecTy.getVectorNumElements();
1470	unsigned HwLen = Subtarget.getVectorLength();
1471	assert(HwLen % VecLen == `0` && "Unexpected vector type");
1472
1473	unsigned Scale = VecLen / SubTy.getVectorNumElements();
1474	unsigned BitBytes = HwLen / VecLen;
1475	unsigned BlockLen = HwLen / Scale;
1476
1477	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1478	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1479	SDValue ByteSub = createHvxPrefixPred(PredV: SubV, dl, BitBytes, ZeroFill: false, DAG);
1480	SDValue ByteIdx;
1481
1482	auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1483	if (!IdxN \|\| !IdxN->isZero()) {
1484	ByteIdx = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1485	N2: DAG.getConstant(Val: BitBytes, DL: dl, VT: MVT::i32));
1486	ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteIdx);
1487	}
1488
1489	// ByteVec is the target vector VecV rotated in such a way that the
1490	// subvector should be inserted at index 0. Generate a predicate mask
1491	// and use vmux to do the insertion.
1492	assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1493	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1494	SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1495	Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1496	ByteVec = getInstr(MachineOpc: Hexagon::V6_vmux, dl, Ty: ByteTy, Ops: {Q, ByteSub, ByteVec}, DAG);
1497	// Rotate ByteVec back, and convert to a vector predicate.
1498	if (!IdxN \|\| !IdxN->isZero()) {
1499	SDValue HwLenV = DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32);
1500	SDValue ByteXdi = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: HwLenV, N2: ByteIdx);
1501	ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteXdi);
1502	}
1503	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1504	}
1505
1506	SDValue
1507	HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1508	MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1509	// Sign- and any-extending of a vector predicate to a vector register is
1510	// equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1511	// a vector of 1s (where the 1s are of type matching the vector type).
1512	assert(Subtarget.isHVXVectorType(ResTy));
1513	if (!ZeroExt)
1514	return DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ResTy, Operand: VecV);
1515
1516	assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1517	SDValue True = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1518	Operand: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
1519	SDValue False = getZero(dl, Ty: ResTy, DAG);
1520	return DAG.getSelect(DL: dl, VT: ResTy, Cond: VecV, LHS: True, RHS: False);
1521	}
1522
1523	SDValue
1524	HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1525	MVT ResTy, SelectionDAG &DAG) const {
1526	// Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1527	// (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1528	// vector register. The remaining bits of the vector register are
1529	// unspecified.
1530
1531	MachineFunction &MF = DAG.getMachineFunction();
1532	unsigned HwLen = Subtarget.getVectorLength();
1533	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1534	MVT PredTy = ty(Op: VecQ);
1535	unsigned PredLen = PredTy.getVectorNumElements();
1536	assert(HwLen % PredLen == `0`);
1537	MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: `8`*HwLen/PredLen), NumElements: PredLen);
1538
1539	Type Int8Ty = Type::getInt8Ty(C&: DAG.getContext());
1540	SmallVector<Constant*, `128`> Tmp;
1541	// Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1542	// These are bytes with the LSB rotated left with respect to their index.
1543	for (unsigned i = `0`; i != HwLen/`8`; ++i) {
1544	for (unsigned j = `0`; j != `8`; ++j)
1545	Tmp.push_back(Elt: ConstantInt::get(Ty: Int8Ty, V: `1ull` << j));
1546	}
1547	Constant *CV = ConstantVector::get(V: Tmp);
1548	Align Alignment(HwLen);
1549	SDValue CP =
1550	LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: ByteTy, Align: Alignment), DAG);
1551	SDValue Bytes =
1552	DAG.getLoad(VT: ByteTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
1553	PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
1554
1555	// Select the bytes that correspond to true bits in the vector predicate.
1556	SDValue Sel = DAG.getSelect(DL: dl, VT: VecTy, Cond: VecQ, LHS: DAG.getBitcast(VT: VecTy, V: Bytes),
1557	RHS: getZero(dl, Ty: VecTy, DAG));
1558	// Calculate the OR of all bytes in each group of 8. That will compress
1559	// all the individual bits into a single byte.
1560	// First, OR groups of 4, via vrmpy with 0x01010101.
1561	SDValue All1 =
1562	DAG.getSplatBuildVector(VT: MVT::v4i8, DL: dl, Op: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
1563	SDValue Vrmpy = getInstr(MachineOpc: Hexagon::V6_vrmpyub, dl, Ty: ByteTy, Ops: {Sel, All1}, DAG);
1564	// Then rotate the accumulated vector by 4 bytes, and do the final OR.
1565	SDValue Rot = getInstr(MachineOpc: Hexagon::V6_valignbi, dl, Ty: ByteTy,
1566	Ops: {Vrmpy, Vrmpy, DAG.getTargetConstant(Val: `4`, DL: dl, VT: MVT::i32)}, DAG);
1567	SDValue Vor = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, Ops: {Vrmpy, Rot});
1568
1569	// Pick every 8th byte and coalesce them at the beginning of the output.
1570	// For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1571	// byte and so on.
1572	SmallVector<int,`128`> Mask;
1573	for (unsigned i = `0`; i != HwLen; ++i)
1574	Mask.push_back(Elt: (`8`*i) % HwLen + i/(HwLen/`8`));
1575	SDValue Collect =
1576	DAG.getVectorShuffle(VT: ByteTy, dl, N1: Vor, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1577	return DAG.getBitcast(VT: ResTy, V: Collect);
1578	}
1579
1580	SDValue
1581	HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1582	const SDLoc &dl, SelectionDAG &DAG) const {
1583	// Take a vector and resize the element type to match the given type.
1584	MVT InpTy = ty(Op: VecV);
1585	if (InpTy == ResTy)
1586	return VecV;
1587
1588	unsigned InpWidth = InpTy.getSizeInBits();
1589	unsigned ResWidth = ResTy.getSizeInBits();
1590
1591	if (InpTy.isFloatingPoint()) {
1592	return InpWidth < ResWidth
1593	? DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: ResTy, Operand: VecV)
1594	: DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: ResTy, N1: VecV,
1595	N2: DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32));
1596	}
1597
1598	assert(InpTy.isInteger());
1599
1600	if (InpWidth < ResWidth) {
1601	unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1602	return DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ResTy, Operand: VecV);
1603	} else {
1604	unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1605	return DAG.getNode(Opcode: NarOpc, DL: dl, VT: ResTy, N1: VecV, N2: DAG.getValueType(ResTy));
1606	}
1607	}
1608
1609	SDValue
1610	HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1611	SelectionDAG &DAG) const {
1612	assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == `0`);
1613
1614	const SDLoc &dl(Vec);
1615	unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1616	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: SubTy,
1617	Ops: {Vec, DAG.getConstant(Val: ElemIdx, DL: dl, VT: MVT::i32)});
1618	}
1619
1620	SDValue
1621	HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1622	const {
1623	const SDLoc &dl(Op);
1624	MVT VecTy = ty(Op);
1625
1626	unsigned Size = Op.getNumOperands();
1627	SmallVector<SDValue,`128`> Ops;
1628	for (unsigned i = `0`; i != Size; ++i)
1629	Ops.push_back(Elt: Op.getOperand(i));
1630
1631	if (VecTy.getVectorElementType() == MVT::i1)
1632	return buildHvxVectorPred(Values: Ops, dl, VecTy, DAG);
1633
1634	// In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1635	// not a legal type, just bitcast the node to use i16
1636	// types and bitcast the result back to f16
1637	if (VecTy.getVectorElementType() == MVT::f16) {
1638	SmallVector<SDValue,`64`> NewOps;
1639	for (unsigned i = `0`; i != Size; i++)
1640	NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Ops [i]));
1641
1642	SDValue T0 = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl,
1643	VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), Ops: NewOps);
1644	return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::f16), V: T0);
1645	}
1646
1647	// First, split the BUILD_VECTOR for vector pairs. We could generate
1648	// some pairs directly (via splat), but splats should be generated
1649	// by the combiner prior to getting here.
1650	if (VecTy.getSizeInBits() == `16` * Subtarget.getVectorLength()) {
1651	ArrayRef<SDValue> A(Ops);
1652	MVT SingleTy = typeSplit(VecTy).first;
1653	SDValue V0 = buildHvxVectorReg(Values: A.take_front(N: Size / `2`), dl, VecTy: SingleTy, DAG);
1654	SDValue V1 = buildHvxVectorReg(Values: A.drop_front(N: Size / `2`), dl, VecTy: SingleTy, DAG);
1655	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, N1: V0, N2: V1);
1656	}
1657
1658	return buildHvxVectorReg(Values: Ops, dl, VecTy, DAG);
1659	}
1660
1661	SDValue
1662	HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1663	const {
1664	const SDLoc &dl(Op);
1665	MVT VecTy = ty(Op);
1666	MVT ArgTy = ty(Op: Op.getOperand(i: `0`));
1667
1668	if (ArgTy == MVT::f16) {
1669	MVT SplatTy = MVT::getVectorVT(VT: MVT::i16, NumElements: VecTy.getVectorNumElements());
1670	SDValue ToInt16 = DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: `0`));
1671	SDValue ToInt32 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: ToInt16);
1672	SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: SplatTy, Operand: ToInt32);
1673	return DAG.getBitcast(VT: VecTy, V: Splat);
1674	}
1675
1676	return SDValue ();
1677	}
1678
1679	SDValue
1680	HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1681	const {
1682	// Vector concatenation of two integer (non-bool) vectors does not need
1683	// special lowering. Custom-lower concats of bool vectors and expand
1684	// concats of more than 2 vectors.
1685	MVT VecTy = ty(Op);
1686	const SDLoc &dl(Op);
1687	unsigned NumOp = Op.getNumOperands();
1688	if (VecTy.getVectorElementType() != MVT::i1) {
1689	if (NumOp == `2`)
1690	return Op;
1691	// Expand the other cases into a build-vector.
1692	SmallVector<SDValue,`8`> Elems;
1693	for (SDValue V : Op.getNode()->ops())
1694	DAG.ExtractVectorElements(Op: V, Args&: Elems);
1695	// A vector of i16 will be broken up into a build_vector of i16's.
1696	// This is a problem, since at the time of operation legalization,
1697	// all operations are expected to be type-legalized, and i16 is not
1698	// a legal type. If any of the extracted elements is not of a valid
1699	// type, sign-extend it to a valid one.
1700	for (SDValue &V : Elems) {
1701	MVT Ty = ty(Op: V);
1702	if (!isTypeLegal(VT: Ty)) {
1703	MVT NTy = typeLegalize(Ty, DAG);
1704	if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1705	V = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: NTy,
1706	N1: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: NTy,
1707	N1: V.getOperand(i: `0`), N2: V.getOperand(i: `1`)),
1708	N2: DAG.getValueType(Ty));
1709	continue;
1710	}
1711	// A few less complicated cases.
1712	switch (V.getOpcode()) {
1713	case ISD::Constant:
1714	V = DAG.getSExtOrTrunc(Op: V, DL: dl, VT: NTy);
1715	break;
1716	case ISD::UNDEF:
1717	V = DAG.getUNDEF(VT: NTy);
1718	break;
1719	case ISD::TRUNCATE:
1720	V = V.getOperand(i: `0`);
1721	break;
1722	default:
1723	llvm_unreachable("Unexpected vector element");
1724	}
1725	}
1726	}
1727	return DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Elems);
1728	}
1729
1730	assert(VecTy.getVectorElementType() == MVT::i1);
1731	unsigned HwLen = Subtarget.getVectorLength();
1732	assert(isPowerOf2_32(NumOp) && HwLen % NumOp == `0`);
1733
1734	SDValue Op0 = Op.getOperand(i: `0`);
1735
1736	// If the operands are HVX types (i.e. not scalar predicates), then
1737	// defer the concatenation, and create QCAT instead.
1738	if (Subtarget.isHVXVectorType(VecTy: ty(Op: Op0), IncludeBool: true)) {
1739	if (NumOp == `2`)
1740	return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: Op0, N2: Op.getOperand(i: `1`));
1741
1742	ArrayRef<SDUse> U(Op.getNode()->ops());
1743	SmallVector<SDValue, `4`> SV(U);
1744	ArrayRef<SDValue> Ops(SV);
1745
1746	MVT HalfTy = typeSplit(VecTy).first;
1747	SDValue V0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1748	Ops: Ops.take_front(N: NumOp/`2`));
1749	SDValue V1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1750	Ops: Ops.take_back(N: NumOp/`2`));
1751	return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: V0, N2: V1);
1752	}
1753
1754	// Count how many bytes (in a vector register) each bit in VecTy
1755	// corresponds to.
1756	unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1757
1758	SmallVector<SDValue,`8`> Prefixes;
1759	for (SDValue V : Op.getNode()->op_values()) {
1760	SDValue P = createHvxPrefixPred(PredV: V, dl, BitBytes, ZeroFill: true, DAG);
1761	Prefixes.push_back(Elt: P);
1762	}
1763
1764	unsigned InpLen = ty(Op: Op.getOperand(i: `0`)).getVectorNumElements();
1765	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1766	SDValue S = DAG.getConstant(Val: HwLen - InpLen*BitBytes, DL: dl, VT: MVT::i32);
1767	SDValue Res = getZero(dl, Ty: ByteTy, DAG);
1768	for (unsigned i = `0`, e = Prefixes.size(); i != e; ++i) {
1769	Res = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Res, N2: S);
1770	Res = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, N1: Res, N2: Prefixes [e-i-`1`]);
1771	}
1772	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: Res);
1773	}
1774
1775	SDValue
1776	HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1777	const {
1778	// Change the type of the extracted element to i32.
1779	SDValue VecV = Op.getOperand(i: `0`);
1780	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1781	const SDLoc &dl(Op);
1782	SDValue IdxV = Op.getOperand(i: `1`);
1783	if (ElemTy == MVT::i1)
1784	return extractHvxElementPred(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1785
1786	return extractHvxElementReg(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1787	}
1788
1789	SDValue
1790	HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1791	const {
1792	const SDLoc &dl(Op);
1793	MVT VecTy = ty(Op);
1794	SDValue VecV = Op.getOperand(i: `0`);
1795	SDValue ValV = Op.getOperand(i: `1`);
1796	SDValue IdxV = Op.getOperand(i: `2`);
1797	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1798	if (ElemTy == MVT::i1)
1799	return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1800
1801	if (ElemTy == MVT::f16) {
1802	SDValue T0 = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl,
1803	VT: tyVector(Ty: VecTy, ElemTy: MVT::i16),
1804	N1: DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), V: VecV),
1805	N2: DAG.getBitcast(VT: MVT::i16, V: ValV), N3: IdxV);
1806	return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::f16), V: T0);
1807	}
1808
1809	return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1810	}
1811
1812	SDValue
1813	HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1814	const {
1815	SDValue SrcV = Op.getOperand(i: `0`);
1816	MVT SrcTy = ty(Op: SrcV);
1817	MVT DstTy = ty(Op);
1818	SDValue IdxV = Op.getOperand(i: `1`);
1819	unsigned Idx = IdxV.getNode()->getAsZExtVal();
1820	assert(Idx % DstTy.getVectorNumElements() == `0`);
1821	(void)Idx;
1822	const SDLoc &dl(Op);
1823
1824	MVT ElemTy = SrcTy.getVectorElementType();
1825	if (ElemTy == MVT::i1)
1826	return extractHvxSubvectorPred(VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1827
1828	return extractHvxSubvectorReg(OrigOp: Op, VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1829	}
1830
1831	SDValue
1832	HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1833	const {
1834	// Idx does not need to be a constant.
1835	SDValue VecV = Op.getOperand(i: `0`);
1836	SDValue ValV = Op.getOperand(i: `1`);
1837	SDValue IdxV = Op.getOperand(i: `2`);
1838
1839	const SDLoc &dl(Op);
1840	MVT VecTy = ty(Op: VecV);
1841	MVT ElemTy = VecTy.getVectorElementType();
1842	if (ElemTy == MVT::i1)
1843	return insertHvxSubvectorPred(VecV, SubV: ValV, IdxV, dl, DAG);
1844
1845	return insertHvxSubvectorReg(VecV, SubV: ValV, IdxV, dl, DAG);
1846	}
1847
1848	SDValue
1849	HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1850	// Lower any-extends of boolean vectors to sign-extends, since they
1851	// translate directly to Q2V. Zero-extending could also be done equally
1852	// fast, but Q2V is used/recognized in more places.
1853	// For all other vectors, use zero-extend.
1854	MVT ResTy = ty(Op);
1855	SDValue InpV = Op.getOperand(i: `0`);
1856	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1857	if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1858	return LowerHvxSignExt(Op, DAG);
1859	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (Op), VT: ResTy, Operand: InpV);
1860	}
1861
1862	SDValue
1863	HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1864	MVT ResTy = ty(Op);
1865	SDValue InpV = Op.getOperand(i: `0`);
1866	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1867	if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1868	return extendHvxVectorPred(VecV: InpV, dl: SDLoc (Op), ResTy: ty(Op), ZeroExt: false, DAG);
1869	return Op;
1870	}
1871
1872	SDValue
1873	HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1874	MVT ResTy = ty(Op);
1875	SDValue InpV = Op.getOperand(i: `0`);
1876	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1877	if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1878	return extendHvxVectorPred(VecV: InpV, dl: SDLoc (Op), ResTy: ty(Op), ZeroExt: true, DAG);
1879	return Op;
1880	}
1881
1882	SDValue
1883	HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1884	// Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1885	// cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1886	const SDLoc &dl(Op);
1887	MVT ResTy = ty(Op);
1888	SDValue InpV = Op.getOperand(i: `0`);
1889	assert(ResTy == ty(InpV));
1890
1891	// Calculate the vectors of 1 and bitwidth(x).
1892	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1893	unsigned ElemWidth = ElemTy.getSizeInBits();
1894
1895	SDValue Vec1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1896	Operand: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
1897	SDValue VecW = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1898	Operand: DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32));
1899	SDValue VecN1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1900	Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
1901
1902	// Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1903	// a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1904	// it separately in custom combine or selection).
1905	SDValue A = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy,
1906	Ops: {DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {InpV, VecN1}),
1907	DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {InpV, Vec1})});
1908	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy,
1909	Ops: {VecW, DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: ResTy, Operand: A)});
1910	}
1911
1912	SDValue
1913	HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1914	const SDLoc &dl(Op);
1915	MVT ResTy = ty(Op);
1916	assert(ResTy.getVectorElementType() == MVT::i32);
1917
1918	SDValue Vs = Op.getOperand(i: `0`);
1919	SDValue Vt = Op.getOperand(i: `1`);
1920
1921	SDVTList ResTys = DAG.getVTList(VT1: ResTy, VT2: ResTy);
1922	unsigned Opc = Op.getOpcode();
1923
1924	// On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1925	if (Opc == ISD::MULHU)
1926	return DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: `1`);
1927	if (Opc == ISD::MULHS)
1928	return DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: `1`);
1929
1930	#ifndef NDEBUG
1931	Op.dump(&DAG);
1932	#endif
1933	llvm_unreachable("Unexpected mulh operation");
1934	}
1935
1936	SDValue
1937	HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1938	const SDLoc &dl(Op);
1939	unsigned Opc = Op.getOpcode();
1940	SDValue Vu = Op.getOperand(i: `0`);
1941	SDValue Vv = Op.getOperand(i: `1`);
1942
1943	// If the HI part is not used, convert it to a regular MUL.
1944	if (auto HiVal = Op.getValue(R: `1`); HiVal.use_empty()) {
1945	// Need to preserve the types and the number of values.
1946	SDValue Hi = DAG.getUNDEF(VT: ty(Op: HiVal));
1947	SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: ty(Op), Ops: {Vu, Vv});
1948	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
1949	}
1950
1951	bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1952	bool SignedVv = Opc == HexagonISD::SMUL_LOHI \|\| Opc == HexagonISD::USMUL_LOHI;
1953
1954	// Legal on HVX v62+, but lower it here because patterns can't handle multi-
1955	// valued nodes.
1956	if (Subtarget.useHVXV62Ops())
1957	return emitHvxMulLoHiV62(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
1958
1959	if (Opc == HexagonISD::SMUL_LOHI) {
1960	// Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1961	// for other signedness LOHI is cheaper.
1962	if (auto LoVal = Op.getValue(R: `0`); LoVal.use_empty()) {
1963	SDValue Hi = emitHvxMulHsV60(A: Vu, B: Vv, dl, DAG);
1964	SDValue Lo = DAG.getUNDEF(VT: ty(Op: LoVal));
1965	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
1966	}
1967	}
1968
1969	return emitHvxMulLoHiV60(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
1970	}
1971
1972	SDValue
1973	HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1974	SDValue Val = Op.getOperand(i: `0`);
1975	MVT ResTy = ty(Op);
1976	MVT ValTy = ty(Op: Val);
1977	const SDLoc &dl(Op);
1978
1979	if (isHvxBoolTy(Ty: ValTy) && ResTy.isScalarInteger()) {
1980	unsigned HwLen = Subtarget.getVectorLength();
1981	MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/`4`);
1982	SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG);
1983	unsigned BitWidth = ResTy.getSizeInBits();
1984
1985	if (BitWidth < `64`) {
1986	SDValue W0 = extractHvxElementReg(VecV: VQ, IdxV: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32),
1987	dl, ResTy: MVT::i32, DAG);
1988	if (BitWidth == `32`)
1989	return W0;
1990	assert(BitWidth < `32u`);
1991	return DAG.getZExtOrTrunc(Op: W0, DL: dl, VT: ResTy);
1992	}
1993
1994	// The result is >= 64 bits. The only options are 64 or 128.
1995	assert(BitWidth == `64` \|\| BitWidth == `128`);
1996	SmallVector<SDValue,`4`> Words;
1997	for (unsigned i = `0`; i != BitWidth/`32`; ++i) {
1998	SDValue W = extractHvxElementReg(
1999	VecV: VQ, IdxV: DAG.getConstant(Val: i, DL: dl, VT: MVT::i32), dl, ResTy: MVT::i32, DAG);
2000	Words.push_back(Elt: W);
2001	}
2002	SmallVector<SDValue,`2`> Combines;
2003	assert(Words.size() % `2` == `0`);
2004	for (unsigned i = `0`, e = Words.size(); i < e; i += `2`) {
2005	SDValue C = getCombine(Hi: Words [i+`1`], Lo: Words [i], dl, ResTy: MVT::i64, DAG);
2006	Combines.push_back(Elt: C);
2007	}
2008
2009	if (BitWidth == `64`)
2010	return Combines [`0`];
2011
2012	return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: ResTy, Ops: Combines);
2013	}
2014
2015	// Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2016	// Splat the input into a 32-element i32 vector, then AND each element
2017	// with a unique bitmask to isolate individual bits.
2018	if (ResTy == MVT::v32i1 &&
2019	(ValTy == MVT::i32 \|\| ValTy == MVT::v2i16 \|\| ValTy == MVT::v4i8) &&
2020	Subtarget.useHVX128BOps()) {
2021	SDValue Val32 = Val;
2022	if (ValTy == MVT::v2i16 \|\| ValTy == MVT::v4i8)
2023	Val32 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Val);
2024
2025	MVT VecTy = MVT::getVectorVT(VT: MVT::i32, NumElements: `32`);
2026	SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Val32);
2027	SmallVector<SDValue, `32`> Mask;
2028	for (unsigned i = `0`; i < `32`; ++i)
2029	Mask.push_back(Elt: DAG.getConstant(Val: `1ull` << i, DL: dl, VT: MVT::i32));
2030
2031	SDValue MaskVec = DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Mask);
2032	SDValue Anded = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: VecTy, N1: Splat, N2: MaskVec);
2033	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: Anded);
2034	}
2035
2036	if (isHvxBoolTy(Ty: ResTy) && ValTy.isScalarInteger()) {
2037	// Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2038	unsigned BitWidth = ValTy.getSizeInBits();
2039	unsigned HwLen = Subtarget.getVectorLength();
2040	assert(BitWidth == HwLen);
2041
2042	MVT ValAsVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: BitWidth / `8`);
2043	SDValue ValAsVec = DAG.getBitcast(VT: ValAsVecTy, V: Val);
2044	// Splat each byte of Val 8 times.
2045	// Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2046	// where b0, b1,..., b15 are least to most significant bytes of I.
2047	SmallVector<SDValue, `128`> Bytes;
2048	// Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2049	// These are bytes with the LSB rotated left with respect to their index.
2050	SmallVector<SDValue, `128`> Tmp;
2051	for (unsigned I = `0`; I != HwLen / `8`; ++I) {
2052	SDValue Idx = DAG.getConstant(Val: I, DL: dl, VT: MVT::i32);
2053	SDValue Byte =
2054	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i8, N1: ValAsVec, N2: Idx);
2055	for (unsigned J = `0`; J != `8`; ++J) {
2056	Bytes.push_back(Elt: Byte);
2057	Tmp.push_back(Elt: DAG.getConstant(Val: `1ull` << J, DL: dl, VT: MVT::i8));
2058	}
2059	}
2060
2061	MVT ConstantVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2062	SDValue ConstantVec = DAG.getBuildVector(VT: ConstantVecTy, DL: dl, Ops: Tmp);
2063	SDValue I2V = buildHvxVectorReg(Values: Bytes, dl, VecTy: ConstantVecTy, DAG);
2064
2065	// Each Byte in the I2V will be set iff corresponding bit is set in Val.
2066	I2V = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ConstantVecTy, Ops: {I2V, ConstantVec});
2067	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: I2V);
2068	}
2069
2070	return Op;
2071	}
2072
2073	SDValue
2074	HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2075	// Sign- and zero-extends are legal.
2076	assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2077	return DAG.getNode(Opcode: ISD::ZERO_EXTEND_VECTOR_INREG, DL: SDLoc (Op), VT: ty(Op),
2078	Operand: Op.getOperand(i: `0`));
2079	}
2080
2081	SDValue
2082	HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2083	MVT ResTy = ty(Op);
2084	if (ResTy.getVectorElementType() != MVT::i1)
2085	return Op;
2086
2087	const SDLoc &dl(Op);
2088	unsigned HwLen = Subtarget.getVectorLength();
2089	unsigned VecLen = ResTy.getVectorNumElements();
2090	assert(HwLen % VecLen == `0`);
2091	unsigned ElemSize = HwLen / VecLen;
2092
2093	MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemSize * `8`), NumElements: VecLen);
2094	SDValue S =
2095	DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: Op.getOperand(i: `0`),
2096	N2: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: `1`)),
2097	N3: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: `2`)));
2098	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: S);
2099	}
2100
2101	SDValue
2102	HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2103	if (SDValue S = getVectorShiftByInt(Op, DAG))
2104	return S;
2105	return Op;
2106	}
2107
2108	SDValue
2109	HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2110	SelectionDAG &DAG) const {
2111	unsigned Opc = Op.getOpcode();
2112	assert(Opc == ISD::FSHL \|\| Opc == ISD::FSHR);
2113
2114	// Make sure the shift amount is within the range of the bitwidth
2115	// of the element type.
2116	SDValue A = Op.getOperand(i: `0`);
2117	SDValue B = Op.getOperand(i: `1`);
2118	SDValue S = Op.getOperand(i: `2`);
2119
2120	MVT InpTy = ty(Op: A);
2121	MVT ElemTy = InpTy.getVectorElementType();
2122
2123	const SDLoc &dl(Op);
2124	unsigned ElemWidth = ElemTy.getSizeInBits();
2125	bool IsLeft = Opc == ISD::FSHL;
2126
2127	// The expansion into regular shifts produces worse code for i8 and for
2128	// right shift of i32 on v65+.
2129	bool UseShifts = ElemTy != MVT::i8;
2130	if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2131	UseShifts = false;
2132
2133	if (SDValue SplatV = getSplatValue(Op: S, DAG); SplatV && UseShifts) {
2134	// If this is a funnel shift by a scalar, lower it into regular shifts.
2135	SDValue Mask = DAG.getConstant(Val: ElemWidth - `1`, DL: dl, VT: MVT::i32);
2136	SDValue ModS =
2137	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
2138	Ops: {DAG.getZExtOrTrunc(Op: SplatV, DL: dl, VT: MVT::i32), Mask});
2139	SDValue NegS =
2140	DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
2141	Ops: {DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32), ModS});
2142	SDValue IsZero =
2143	DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: ModS, RHS: getZero(dl, Ty: MVT::i32, DAG), Cond: ISD::SETEQ);
2144	// FSHL A, B => A << \| B >>n
2145	// FSHR A, B => A <<n \| B >>
2146	SDValue Part1 =
2147	DAG.getNode(Opcode: HexagonISD::VASL, DL: dl, VT: InpTy, Ops: {A, IsLeft ? ModS : NegS});
2148	SDValue Part2 =
2149	DAG.getNode(Opcode: HexagonISD::VLSR, DL: dl, VT: InpTy, Ops: {B, IsLeft ? NegS : ModS});
2150	SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Part1, Part2});
2151	// If the shift amount was 0, pick A or B, depending on the direction.
2152	// The opposite shift will also be by 0, so the "Or" will be incorrect.
2153	return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: InpTy, Ops: {IsZero, (IsLeft ? A : B), Or});
2154	}
2155
2156	SDValue Mask = DAG.getSplatBuildVector(
2157	VT: InpTy, DL: dl, Op: DAG.getConstant(Val: ElemWidth - `1`, DL: dl, VT: ElemTy));
2158
2159	unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2160	return DAG.getNode(Opcode: MOpc, DL: dl, VT: ty(Op),
2161	Ops: {A, B, DAG.getNode(Opcode: ISD::AND, DL: dl, VT: InpTy, Ops: {S, Mask})});
2162	}
2163
2164	SDValue
2165	HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2166	const SDLoc &dl(Op);
2167	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
2168	SmallVector<SDValue> Ops(Op ->ops());
2169
2170	auto Swap = [&](SDValue P) {
2171	return DAG.getMergeValues(Ops: {P.getValue(R: `1`), P.getValue(R: `0`)}, dl);
2172	};
2173
2174	switch (IntNo) {
2175	case Intrinsic::hexagon_V6_pred_typecast:
2176	case Intrinsic::hexagon_V6_pred_typecast_128B: {
2177	MVT ResTy = ty(Op), InpTy = ty(Op: Ops [`1`]);
2178	if (isHvxBoolTy(Ty: ResTy) && isHvxBoolTy(Ty: InpTy)) {
2179	if (ResTy == InpTy)
2180	return Ops [`1`];
2181	return DAG.getNode(Opcode: HexagonISD::TYPECAST, DL: dl, VT: ResTy, Operand: Ops [`1`]);
2182	}
2183	break;
2184	}
2185	case Intrinsic::hexagon_V6_vmpyss_parts:
2186	case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2187	return Swap (DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: Op ->getVTList(),
2188	Ops: {Ops [`1`], Ops [`2`]}));
2189	case Intrinsic::hexagon_V6_vmpyuu_parts:
2190	case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2191	return Swap (DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: Op ->getVTList(),
2192	Ops: {Ops [`1`], Ops [`2`]}));
2193	case Intrinsic::hexagon_V6_vmpyus_parts:
2194	case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2195	return Swap (DAG.getNode(Opcode: HexagonISD::USMUL_LOHI, DL: dl, VTList: Op ->getVTList(),
2196	Ops: {Ops [`1`], Ops [`2`]}));
2197	}
2198	} // switch
2199
2200	return Op;
2201	}
2202
2203	SDValue
2204	HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2205	const SDLoc &dl(Op);
2206	unsigned HwLen = Subtarget.getVectorLength();
2207	MachineFunction &MF = DAG.getMachineFunction();
2208	auto *MaskN = cast<MaskedLoadStoreSDNode>(Val: Op.getNode());
2209	SDValue Mask = MaskN->getMask();
2210	SDValue Chain = MaskN->getChain();
2211	SDValue Base = MaskN->getBasePtr();
2212	auto *MemOp = MF.getMachineMemOperand(MMO: MaskN->getMemOperand(), Offset: `0`, Size: HwLen);
2213
2214	unsigned Opc = Op ->getOpcode();
2215	assert(Opc == ISD::MLOAD \|\| Opc == ISD::MSTORE);
2216
2217	if (Opc == ISD::MLOAD) {
2218	MVT ValTy = ty(Op);
2219	SDValue Load = DAG.getLoad(VT: ValTy, dl, Chain, Ptr: Base, MMO: MemOp);
2220	SDValue Thru = cast<MaskedLoadSDNode>(Val: MaskN)->getPassThru();
2221	if (isUndef(Op: Thru))
2222	return Load;
2223	SDValue VSel = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ValTy, N1: Mask, N2: Load, N3: Thru);
2224	return DAG.getMergeValues(Ops: {VSel, Load.getValue(R: `1`)}, dl);
2225	}
2226
2227	// MSTORE
2228	// HVX only has aligned masked stores.
2229
2230	// TODO: Fold negations of the mask into the store.
2231	unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2232	SDValue Value = cast<MaskedStoreSDNode>(Val: MaskN)->getValue();
2233	SDValue Offset0 = DAG.getTargetConstant(Val: `0`, DL: dl, VT: ty(Op: Base));
2234
2235	if (MaskN->getAlign().value() % HwLen == `0`) {
2236	SDValue Store = getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2237	Ops: {Mask, Base, Offset0, Value, Chain}, DAG);
2238	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Store.getNode()), NewMemRefs: {MemOp});
2239	return Store;
2240	}
2241
2242	// Unaligned case.
2243	auto StoreAlign = [&](SDValue V, SDValue A) {
2244	SDValue Z = getZero(dl, Ty: ty(Op: V), DAG);
2245	// TODO: use funnel shifts?
2246	// vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2247	// upper half.
2248	SDValue LoV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {V, Z, A}, DAG);
2249	SDValue HiV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {Z, V, A}, DAG);
2250	return std::make_pair(x&: LoV, y&: HiV);
2251	};
2252
2253	MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2254	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
2255	SDValue MaskV = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Mask);
2256	VectorPair Tmp = StoreAlign (MaskV, Base);
2257	VectorPair MaskU = {DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.first),
2258	DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.second)};
2259	VectorPair ValueU = StoreAlign (Value, Base);
2260
2261	SDValue Offset1 = DAG.getTargetConstant(Val: HwLen, DL: dl, VT: MVT::i32);
2262	SDValue StoreLo =
2263	getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2264	Ops: {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2265	SDValue StoreHi =
2266	getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2267	Ops: {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2268	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreLo.getNode()), NewMemRefs: {MemOp});
2269	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreHi.getNode()), NewMemRefs: {MemOp});
2270	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: {StoreLo, StoreHi});
2271	}
2272
2273	SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2274	SelectionDAG &DAG) const {
2275	// This conversion only applies to QFloat. IEEE extension from f16 to f32
2276	// is legal (done via a pattern).
2277	assert(Subtarget.useHVXQFloatOps());
2278
2279	assert(Op->getOpcode() == ISD::FP_EXTEND);
2280
2281	MVT VecTy = ty(Op);
2282	MVT ArgTy = ty(Op: Op.getOperand(i: `0`));
2283	const SDLoc &dl(Op);
2284	assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2285
2286	SDValue F16Vec = Op.getOperand(i: `0`);
2287
2288	APFloat FloatVal = APFloat (`1.0f`);
2289	bool Ignored;
2290	FloatVal.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, losesInfo: &Ignored);
2291	SDValue Fp16Ones = DAG.getConstantFP(Val: FloatVal, DL: dl, VT: ArgTy);
2292	SDValue VmpyVec =
2293	getInstr(MachineOpc: Hexagon::V6_vmpy_qf32_hf, dl, Ty: VecTy, Ops: {F16Vec, Fp16Ones}, DAG);
2294
2295	MVT HalfTy = typeSplit(VecTy).first;
2296	VectorPair Pair = opSplit(Vec: VmpyVec, dl, DAG);
2297	SDValue LoVec =
2298	getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.first}, DAG);
2299	SDValue HiVec =
2300	getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.second}, DAG);
2301
2302	SDValue ShuffVec =
2303	getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy,
2304	Ops: {HiVec, LoVec, DAG.getSignedConstant(Val: -`4`, DL: dl, VT: MVT::i32)}, DAG);
2305
2306	return ShuffVec;
2307	}
2308
2309	SDValue
2310	HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2311	// Catch invalid conversion ops (just in case).
2312	assert(Op.getOpcode() == ISD::FP_TO_SINT \|\|
2313	Op.getOpcode() == ISD::FP_TO_UINT);
2314
2315	MVT ResTy = ty(Op);
2316	MVT FpTy = ty(Op: Op.getOperand(i: `0`)).getVectorElementType();
2317	MVT IntTy = ResTy.getVectorElementType();
2318
2319	if (Subtarget.useHVXIEEEFPOps()) {
2320	// There are only conversions from f16.
2321	if (FpTy == MVT::f16) {
2322	// Other int types aren't legal in HVX, so we shouldn't see them here.
2323	assert(IntTy == MVT::i8 \|\| IntTy == MVT::i16 \|\| IntTy == MVT::i32);
2324	// Conversions to i8 and i16 are legal.
2325	if (IntTy == MVT::i8 \|\| IntTy == MVT::i16)
2326	return Op;
2327	}
2328	}
2329
2330	if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2331	return EqualizeFpIntConversion(Op, DAG);
2332
2333	return ExpandHvxFpToInt(Op, DAG);
2334	}
2335
2336	SDValue
2337	HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2338	// Catch invalid conversion ops (just in case).
2339	assert(Op.getOpcode() == ISD::SINT_TO_FP \|\|
2340	Op.getOpcode() == ISD::UINT_TO_FP);
2341
2342	MVT ResTy = ty(Op);
2343	MVT IntTy = ty(Op: Op.getOperand(i: `0`)).getVectorElementType();
2344	MVT FpTy = ResTy.getVectorElementType();
2345
2346	if (Subtarget.useHVXIEEEFPOps()) {
2347	// There are only conversions to f16.
2348	if (FpTy == MVT::f16) {
2349	// Other int types aren't legal in HVX, so we shouldn't see them here.
2350	assert(IntTy == MVT::i8 \|\| IntTy == MVT::i16 \|\| IntTy == MVT::i32);
2351	// i8, i16 -> f16 is legal.
2352	if (IntTy == MVT::i8 \|\| IntTy == MVT::i16)
2353	return Op;
2354	}
2355	}
2356
2357	if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2358	return EqualizeFpIntConversion(Op, DAG);
2359
2360	return ExpandHvxIntToFp(Op, DAG);
2361	}
2362
2363	HexagonTargetLowering::TypePair
2364	HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2365	// Compare the widths of elements of the two types, and extend the narrower
2366	// type to match the with of the wider type. For vector types, apply this
2367	// to the element type.
2368	assert(Ty0.isVector() == Ty1.isVector());
2369
2370	MVT ElemTy0 = Ty0.getScalarType();
2371	MVT ElemTy1 = Ty1.getScalarType();
2372
2373	unsigned Width0 = ElemTy0.getSizeInBits();
2374	unsigned Width1 = ElemTy1.getSizeInBits();
2375	unsigned MaxWidth = std::max(a: Width0, b: Width1);
2376
2377	auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2378	if (ScalarTy.isInteger())
2379	return MVT::getIntegerVT(BitWidth: Width);
2380	assert(ScalarTy.isFloatingPoint());
2381	return MVT::getFloatingPointVT(BitWidth: Width);
2382	};
2383
2384	MVT WideETy0 = getScalarWithWidth (ElemTy0, MaxWidth);
2385	MVT WideETy1 = getScalarWithWidth (ElemTy1, MaxWidth);
2386
2387	if (!Ty0.isVector()) {
2388	// Both types are scalars.
2389	return {WideETy0, WideETy1};
2390	}
2391
2392	// Vector types.
2393	unsigned NumElem = Ty0.getVectorNumElements();
2394	assert(NumElem == Ty1.getVectorNumElements());
2395
2396	return {MVT::getVectorVT(VT: WideETy0, NumElements: NumElem),
2397	MVT::getVectorVT(VT: WideETy1, NumElements: NumElem)};
2398	}
2399
2400	HexagonTargetLowering::TypePair
2401	HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2402	// Compare the numbers of elements of two vector types, and widen the
2403	// narrower one to match the number of elements in the wider one.
2404	assert(Ty0.isVector() && Ty1.isVector());
2405
2406	unsigned Len0 = Ty0.getVectorNumElements();
2407	unsigned Len1 = Ty1.getVectorNumElements();
2408	if (Len0 == Len1)
2409	return {Ty0, Ty1};
2410
2411	unsigned MaxLen = std::max(a: Len0, b: Len1);
2412	return {MVT::getVectorVT(VT: Ty0.getVectorElementType(), NumElements: MaxLen),
2413	MVT::getVectorVT(VT: Ty1.getVectorElementType(), NumElements: MaxLen)};
2414	}
2415
2416	MVT
2417	HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2418	EVT LegalTy = getTypeToTransformTo(Context&: *DAG.getContext(), VT: Ty);
2419	assert(LegalTy.isSimple());
2420	return LegalTy.getSimpleVT();
2421	}
2422
2423	MVT
2424	HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2425	unsigned HwWidth = `8` * Subtarget.getVectorLength();
2426	assert(Ty.getSizeInBits() <= HwWidth);
2427	if (Ty.getSizeInBits() == HwWidth)
2428	return Ty;
2429
2430	MVT ElemTy = Ty.getScalarType();
2431	return MVT::getVectorVT(VT: ElemTy, NumElements: HwWidth / ElemTy.getSizeInBits());
2432	}
2433
2434	HexagonTargetLowering::VectorPair
2435	HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2436	const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2437	// Compute A+B, return {A+B, O}, where O = vector predicate indicating
2438	// whether an overflow has occurred.
2439	MVT ResTy = ty(Op: A);
2440	assert(ResTy == ty(B));
2441	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: ResTy.getVectorNumElements());
2442
2443	if (!Signed) {
2444	// V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2445	// save any instructions.
2446	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2447	SDValue Ovf = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Add, RHS: A, Cond: ISD::SETULT);
2448	return {Add, Ovf};
2449	}
2450
2451	// Signed overflow has happened, if:
2452	// (A, B have the same sign) and (A+B has a different sign from either)
2453	// i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2454	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2455	SDValue NotA =
2456	DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {A, DAG.getAllOnesConstant(DL: dl, VT: ResTy)});
2457	SDValue Xor0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {NotA, B});
2458	SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {Add, B});
2459	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, Ops: {Xor0, Xor1});
2460	SDValue MSB =
2461	DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: getZero(dl, Ty: ResTy, DAG), Cond: ISD::SETLT);
2462	return {Add, MSB};
2463	}
2464
2465	HexagonTargetLowering::VectorPair
2466	HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2467	bool Signed, SelectionDAG &DAG) const {
2468	// Shift Val right by Amt bits, round the result to the nearest integer,
2469	// tie-break by rounding halves to even integer.
2470
2471	const SDLoc &dl(Val);
2472	MVT ValTy = ty(Op: Val);
2473
2474	// This should also work for signed integers.
2475	//
2476	// uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2477	// bool ovf = (inp > tmp0);
2478	// uint rup = inp & (1 << (Amt+1));
2479	//
2480	// uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2481	// uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2482	// uint tmp3 = tmp2 + rup;
2483	// uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2484	unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2485	MVT ElemTy = MVT::getIntegerVT(BitWidth: ElemWidth);
2486	MVT IntTy = tyVector(Ty: ValTy, ElemTy);
2487	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: IntTy.getVectorNumElements());
2488	unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2489
2490	SDValue Inp = DAG.getBitcast(VT: IntTy, V: Val);
2491	SDValue LowBits = DAG.getConstant(Val: (`1ull` << (Amt - `1`)) - `1`, DL: dl, VT: IntTy);
2492
2493	SDValue AmtP1 = DAG.getConstant(Val: `1ull` << Amt, DL: dl, VT: IntTy);
2494	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntTy, Ops: {Inp, AmtP1});
2495	SDValue Zero = getZero(dl, Ty: IntTy, DAG);
2496	SDValue Bit = DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: Zero, Cond: ISD::SETNE);
2497	SDValue Rup = DAG.getZExtOrTrunc(Op: Bit, DL: dl, VT: IntTy);
2498	auto [Tmp0, Ovf] = emitHvxAddWithOverflow(A: Inp, B: LowBits, dl, Signed, DAG);
2499
2500	SDValue AmtM1 = DAG.getConstant(Val: Amt - `1`, DL: dl, VT: IntTy);
2501	SDValue Tmp1 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Inp, N2: AmtM1);
2502	SDValue Tmp2 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Tmp0, N2: AmtM1);
2503	SDValue Tmp3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntTy, N1: Tmp2, N2: Rup);
2504
2505	SDValue Eq = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Tmp1, RHS: Tmp2, Cond: ISD::SETEQ);
2506	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: IntTy);
2507	SDValue Tmp4 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp2, One});
2508	SDValue Tmp5 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp3, One});
2509	SDValue Mux = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: IntTy, Ops: {Eq, Tmp5, Tmp4});
2510	return {Mux, Ovf};
2511	}
2512
2513	SDValue
2514	HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2515	SelectionDAG &DAG) const {
2516	MVT VecTy = ty(Op: A);
2517	MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2518	assert(VecTy.getVectorElementType() == MVT::i32);
2519
2520	SDValue S16 = DAG.getConstant(Val: `16`, DL: dl, VT: MVT::i32);
2521
2522	// mulhs(A,B) =
2523	// = [(Hi(A)2^16 + Lo(A)) s (Hi(B)2^16 + Lo(B))] >> 32*
2524	// = [Hi(A)2^16 s Hi(B)2^16 + Hi(A) su Lo(B)2^16*
2525	// + Lo(A) us (Hi(B)2^16 + Lo(B))] >> 32
2526	// = [Hi(A) s Hi(B)2^32 + Hi(A) su Lo(B)2^16 + Lo(A) us B] >> 32*
2527	// The low half of Lo(A)Lo(B) will be discarded (it's not added to*
2528	// anything, so it cannot produce any carry over to higher bits),
2529	// so everything in [] can be shifted by 16 without loss of precision.
2530	// = [Hi(A) s Hi(B)2^16 + Hi(A)su Lo(B) + Lo(A)B >> 16] >> 16
2531	// = [Hi(A) s Hi(B)2^16 + Hi(A)su Lo(B) + V6_vmpyewuh(A,B)] >> 16*
2532	// The final additions need to make sure to properly maintain any carry-
2533	// out bits.
2534	//
2535	// Hi(B) Lo(B)
2536	// Hi(A) Lo(A)
2537	// --------------
2538	// Lo(B)Lo(A) \| T0 = V6_vmpyewuh(B,A) does this,*
2539	// Hi(B)Lo(A) \| + dropping the low 16 bits*
2540	// Hi(A)Lo(B) \| T2*
2541	// Hi(B)Hi(A)*
2542
2543	SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh, dl, Ty: VecTy, Ops: {B, A}, DAG);
2544	// T1 = get Hi(A) into low halves.
2545	SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {A, S16}, DAG);
2546	// P0 = interleaved T1.hB.uh (full precision product)*
2547	SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyhus, dl, Ty: PairTy, Ops: {T1, B}, DAG);
2548	// T2 = T1.even(h) B.even(uh), i.e. Hi(A)Lo(B)
2549	SDValue T2 = LoHalf(V: P0, DAG);
2550	// We need to add T0+T2, recording the carry-out, which will be 1<<16
2551	// added to the final sum.
2552	// P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2553	SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2554	// P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2555	SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vaddhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2556	// T3 = full-precision(T0+T2) >> 16
2557	// The low halves are added-unsigned, the high ones are added-signed.
2558	SDValue T3 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
2559	Ops: {HiHalf(V: P2, DAG), LoHalf(V: P1, DAG), S16}, DAG);
2560	SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {B, S16}, DAG);
2561	// P3 = interleaved Hi(B)Hi(A) (full precision),*
2562	// which is now Lo(T1)Lo(T4), so we want to keep the even product.*
2563	SDValue P3 = getInstr(MachineOpc: Hexagon::V6_vmpyhv, dl, Ty: PairTy, Ops: {T1, T4}, DAG);
2564	SDValue T5 = LoHalf(V: P3, DAG);
2565	// Add:
2566	SDValue T6 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {T3, T5});
2567	return T6;
2568	}
2569
2570	SDValue
2571	HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2572	bool SignedB, const SDLoc &dl,
2573	SelectionDAG &DAG) const {
2574	MVT VecTy = ty(Op: A);
2575	MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2576	assert(VecTy.getVectorElementType() == MVT::i32);
2577
2578	SDValue S16 = DAG.getConstant(Val: `16`, DL: dl, VT: MVT::i32);
2579
2580	if (SignedA && !SignedB) {
2581	// Make A:unsigned, B:signed.
2582	std::swap(a&: A, b&: B);
2583	std::swap(a&: SignedA, b&: SignedB);
2584	}
2585
2586	// Do halfword-wise multiplications for unsignedunsigned product, then*
2587	// add corrections for signed and unsignedsigned.*
2588
2589	SDValue Lo, Hi;
2590
2591	// P0:lo = (uu) products of low halves of A and B,
2592	// P0:hi = (uu) products of high halves.
2593	SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, B}, DAG);
2594
2595	// Swap low/high halves in B
2596	SDValue T0 = getInstr(MachineOpc: Hexagon::V6_lvsplatw, dl, Ty: VecTy,
2597	Ops: {DAG.getConstant(Val: `0x02020202`, DL: dl, VT: MVT::i32)}, DAG);
2598	SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vdelta, dl, Ty: VecTy, Ops: {B, T0}, DAG);
2599	// P1 = products of even/odd halfwords.
2600	// P1:lo = (uu) products of even(A.uh) odd(B.uh)*
2601	// P1:hi = (uu) products of odd(A.uh) even(B.uh)*
2602	SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, T1}, DAG);
2603
2604	// P2:lo = low halves of P1:lo + P1:hi,
2605	// P2:hi = high halves of P1:lo + P1:hi.
2606	SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy,
2607	Ops: {HiHalf(V: P1, DAG), LoHalf(V: P1, DAG)}, DAG);
2608	// Still need to add the high halves of P0:lo to P2:lo
2609	SDValue T2 =
2610	getInstr(MachineOpc: Hexagon::V6_vlsrw, dl, Ty: VecTy, Ops: {LoHalf(V: P0, DAG), S16}, DAG);
2611	SDValue T3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {LoHalf(V: P2, DAG), T2});
2612
2613	// The high halves of T3 will contribute to the HI part of LOHI.
2614	SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
2615	Ops: {HiHalf(V: P2, DAG), T3, S16}, DAG);
2616
2617	// The low halves of P2 need to be added to high halves of the LO part.
2618	Lo = getInstr(MachineOpc: Hexagon::V6_vaslw_acc, dl, Ty: VecTy,
2619	Ops: {LoHalf(V: P0, DAG), LoHalf(V: P2, DAG), S16}, DAG);
2620	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {HiHalf(V: P0, DAG), T4});
2621
2622	if (SignedA) {
2623	assert(SignedB && "Signed A and unsigned B should have been inverted");
2624
2625	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2626	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2627	SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2628	SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2629	SDValue X0 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: VecTy, Ops: {Q0, B, Zero});
2630	SDValue X1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, X0, A}, DAG);
2631	Hi = getInstr(MachineOpc: Hexagon::V6_vsubw, dl, Ty: VecTy, Ops: {Hi, X1}, DAG);
2632	} else if (SignedB) {
2633	// Same correction as for mulhus:
2634	// mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2635	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2636	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2637	SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2638	Hi = getInstr(MachineOpc: Hexagon::V6_vsubwq, dl, Ty: VecTy, Ops: {Q1, Hi, A}, DAG);
2639	} else {
2640	assert(!SignedA && !SignedB);
2641	}
2642
2643	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2644	}
2645
2646	SDValue
2647	HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2648	SDValue B, bool SignedB,
2649	const SDLoc &dl,
2650	SelectionDAG &DAG) const {
2651	MVT VecTy = ty(Op: A);
2652	MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2653	assert(VecTy.getVectorElementType() == MVT::i32);
2654
2655	if (SignedA && !SignedB) {
2656	// Make A:unsigned, B:signed.
2657	std::swap(a&: A, b&: B);
2658	std::swap(a&: SignedA, b&: SignedB);
2659	}
2660
2661	// Do SS first, then make corrections for US or UU if needed.*
2662	SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh_64, dl, Ty: PairTy, Ops: {A, B}, DAG);
2663	SDValue P1 =
2664	getInstr(MachineOpc: Hexagon::V6_vmpyowh_64_acc, dl, Ty: PairTy, Ops: {P0, A, B}, DAG);
2665	SDValue Lo = LoHalf(V: P1, DAG);
2666	SDValue Hi = HiHalf(V: P1, DAG);
2667
2668	if (!SignedB) {
2669	assert(!SignedA && "Signed A and unsigned B should have been inverted");
2670	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2671	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2672
2673	// Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2674	// def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2675	// (V6_vaddw (HiHalf (Muls64O $A, $B)),
2676	// (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2677	// (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2678	// $A))>;
2679	SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2680	SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2681	SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vandvqv, dl, Ty: VecTy, Ops: {Q0, B}, DAG);
2682	SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, T0, A}, DAG);
2683	Hi = getInstr(MachineOpc: Hexagon::V6_vaddw, dl, Ty: VecTy, Ops: {Hi, T1}, DAG);
2684	} else if (!SignedA) {
2685	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2686	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2687
2688	// Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2689	// def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2690	// (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2691	// (HiHalf (Muls64O $A, $B)),
2692	// $B)>;
2693	SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2694	Hi = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q0, Hi, B}, DAG);
2695	}
2696
2697	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2698	}
2699
2700	SDValue
2701	HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2702	const {
2703	// Rewrite conversion between integer and floating-point in such a way that
2704	// the integer type is extended/narrowed to match the bitwidth of the
2705	// floating-point type, combined with additional integer-integer extensions
2706	// or narrowings to match the original input/result types.
2707	// E.g. f32 -> i8 ==> f32 -> i32 -> i8
2708	//
2709	// The input/result types are not required to be legal, but if they are
2710	// legal, this function should not introduce illegal types.
2711
2712	unsigned Opc = Op.getOpcode();
2713	assert(Opc == ISD::FP_TO_SINT \|\| Opc == ISD::FP_TO_UINT \|\|
2714	Opc == ISD::SINT_TO_FP \|\| Opc == ISD::UINT_TO_FP);
2715
2716	SDValue Inp = Op.getOperand(i: `0`);
2717	MVT InpTy = ty(Op: Inp);
2718	MVT ResTy = ty(Op);
2719
2720	if (InpTy == ResTy)
2721	return Op;
2722
2723	const SDLoc &dl(Op);
2724	bool Signed = Opc == ISD::FP_TO_SINT \|\| Opc == ISD::SINT_TO_FP;
2725
2726	auto [WInpTy, WResTy] = typeExtendToWider(Ty0: InpTy, Ty1: ResTy);
2727	SDValue WInp = resizeToWidth(VecV: Inp, ResTy: WInpTy, Signed, dl, DAG);
2728	SDValue Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: WResTy, Operand: WInp);
2729	SDValue Res = resizeToWidth(VecV: Conv, ResTy, Signed, dl, DAG);
2730	return Res;
2731	}
2732
2733	SDValue
2734	HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2735	unsigned Opc = Op.getOpcode();
2736	assert(Opc == ISD::FP_TO_SINT \|\| Opc == ISD::FP_TO_UINT);
2737
2738	const SDLoc &dl(Op);
2739	SDValue Op0 = Op.getOperand(i: `0`);
2740	MVT InpTy = ty(Op: Op0);
2741	MVT ResTy = ty(Op);
2742	assert(InpTy.changeTypeToInteger() == ResTy);
2743
2744	// int32_t conv_f32_to_i32(uint32_t inp) {
2745	// // s \| exp8 \| frac23
2746	//
2747	// int neg = (int32_t)inp < 0;
2748	//
2749	// // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2750	// // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2751	// // produce a large positive "expm1", which will result in max u/int.
2752	// // In all IEEE formats, bias is the largest positive number that can be
2753	// // represented in bias-width bits (i.e. 011..1).
2754	// int32_t expm1 = (inp << 1) - 0x80000000;
2755	// expm1 >>= 24;
2756	//
2757	// // Always insert the "implicit 1". Subnormal numbers will become 0
2758	// // regardless.
2759	// uint32_t frac = (inp << 8) \| 0x80000000;
2760	//
2761	// // "frac" is the fraction part represented as Q1.31. If it was
2762	// // interpreted as uint32_t, it would be the fraction part multiplied
2763	// // by 2^31.
2764	//
2765	// // Calculate the amount of right shift, since shifting further to the
2766	// // left would lose significant bits. Limit it to 32, because we want
2767	// // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2768	// // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2769	// // left by 31). "rsh" can be negative.
2770	// int32_t rsh = min(31 - (expm1 + 1), 32);
2771	//
2772	// frac >>= rsh; // rsh == 32 will produce 0
2773	//
2774	// // Everything up to this point is the same for conversion to signed
2775	// // unsigned integer.
2776	//
2777	// if (neg) // Only for signed int
2778	// frac = -frac; //
2779	// if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2780	// frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2781	// if (rsh <= 0 && !neg) //
2782	// frac = 0x7fffffff; //
2783	//
2784	// if (neg) // Only for unsigned int
2785	// frac = 0; //
2786	// if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2787	// frac = 0x7fffffff; // frac = neg ? 0 : frac;
2788	//
2789	// return frac;
2790	// }
2791
2792	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: ResTy.getVectorElementCount());
2793
2794	// Zero = V6_vd0();
2795	// Neg = V6_vgtw(Zero, Inp);
2796	// One = V6_lvsplatw(1);
2797	// M80 = V6_lvsplatw(0x80000000);
2798	// Exp00 = V6_vaslwv(Inp, One);
2799	// Exp01 = V6_vsubw(Exp00, M80);
2800	// ExpM1 = V6_vasrw(Exp01, 24);
2801	// Frc00 = V6_vaslw(Inp, 8);
2802	// Frc01 = V6_vor(Frc00, M80);
2803	// Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2804	// Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2805	// Frc02 = V6_vlsrwv(Frc01, Rsh01);
2806
2807	// if signed int:
2808	// Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2809	// Pos = V6_vgtw(Rsh01, Zero);
2810	// Frc13 = V6_vsubw(Zero, Frc02);
2811	// Frc14 = V6_vmux(Neg, Frc13, Frc02);
2812	// Int = V6_vmux(Pos, Frc14, Bnd);
2813	//
2814	// if unsigned int:
2815	// Rsn = V6_vgtw(Zero, Rsh01)
2816	// Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2817	// Int = V6_vmux(Neg, Zero, Frc23)
2818
2819	auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: InpTy);
2820	unsigned ElemWidth = `1` + ExpWidth + FracWidth;
2821	assert((`1ull` << (ExpWidth - `1`)) == (`1` + ExpBias));
2822
2823	SDValue Inp = DAG.getBitcast(VT: ResTy, V: Op0);
2824	SDValue Zero = getZero(dl, Ty: ResTy, DAG);
2825	SDValue Neg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Inp, RHS: Zero, Cond: ISD::SETLT);
2826	SDValue M80 = DAG.getConstant(Val: `1ull` << (ElemWidth - `1`), DL: dl, VT: ResTy);
2827	SDValue M7F = DAG.getConstant(Val: (`1ull` << (ElemWidth - `1`)) - `1`, DL: dl, VT: ResTy);
2828	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: ResTy);
2829	SDValue Exp00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, One});
2830	SDValue Exp01 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Exp00, M80});
2831	SDValue MNE = DAG.getConstant(Val: ElemWidth - ExpWidth, DL: dl, VT: ResTy);
2832	SDValue ExpM1 = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: ResTy, Ops: {Exp01, MNE});
2833
2834	SDValue ExpW = DAG.getConstant(Val: ExpWidth, DL: dl, VT: ResTy);
2835	SDValue Frc00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, ExpW});
2836	SDValue Frc01 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResTy, Ops: {Frc00, M80});
2837
2838	SDValue MN2 = DAG.getConstant(Val: ElemWidth - `2`, DL: dl, VT: ResTy);
2839	SDValue Rsh00 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {MN2, ExpM1});
2840	SDValue MW = DAG.getConstant(Val: ElemWidth, DL: dl, VT: ResTy);
2841	SDValue Rsh01 = DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT: ResTy, Ops: {Rsh00, MW});
2842	SDValue Frc02 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ResTy, Ops: {Frc01, Rsh01});
2843
2844	SDValue Int;
2845
2846	if (Opc == ISD::FP_TO_SINT) {
2847	SDValue Bnd = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, M80, M7F});
2848	SDValue Pos = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETGT);
2849	SDValue Frc13 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Zero, Frc02});
2850	SDValue Frc14 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, Frc13, Frc02});
2851	Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Pos, Frc14, Bnd});
2852	} else {
2853	assert(Opc == ISD::FP_TO_UINT);
2854	SDValue Rsn = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETLT);
2855	SDValue Frc23 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Rsn, N2: M7F, N3: Frc02);
2856	Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Neg, N2: Zero, N3: Frc23);
2857	}
2858
2859	return Int;
2860	}
2861
2862	SDValue
2863	HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2864	unsigned Opc = Op.getOpcode();
2865	assert(Opc == ISD::SINT_TO_FP \|\| Opc == ISD::UINT_TO_FP);
2866
2867	const SDLoc &dl(Op);
2868	SDValue Op0 = Op.getOperand(i: `0`);
2869	MVT InpTy = ty(Op: Op0);
2870	MVT ResTy = ty(Op);
2871	assert(ResTy.changeTypeToInteger() == InpTy);
2872
2873	// uint32_t vnoc1_rnd(int32_t w) {
2874	// int32_t iszero = w == 0;
2875	// int32_t isneg = w < 0;
2876	// uint32_t u = __builtin_HEXAGON_A2_abs(w);
2877	//
2878	// uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
2879	// uint32_t frac0 = (uint64_t)u << norm_left;
2880	//
2881	// // Rounding:
2882	// uint32_t frac1 = frac0 + ((1 << 8) - 1);
2883	// uint32_t renorm = (frac0 > frac1);
2884	// uint32_t rup = (int)(frac0 << 22) < 0;
2885	//
2886	// uint32_t frac2 = frac0 >> 8;
2887	// uint32_t frac3 = frac1 >> 8;
2888	// uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
2889	//
2890	// int32_t exp = 32 - norm_left + renorm + 127;
2891	// exp <<= 23;
2892	//
2893	// uint32_t sign = 0x80000000 isneg;*
2894	// uint32_t f = sign \| exp \| frac;
2895	// return iszero ? 0 : f;
2896	// }
2897
2898	MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: InpTy.getVectorElementCount());
2899	bool Signed = Opc == ISD::SINT_TO_FP;
2900
2901	auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: ResTy);
2902	unsigned ElemWidth = `1` + ExpWidth + FracWidth;
2903
2904	SDValue Zero = getZero(dl, Ty: InpTy, DAG);
2905	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: InpTy);
2906	SDValue IsZero = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ);
2907	SDValue Abs = Signed ? DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: InpTy, Operand: Op0) : Op0;
2908	SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: InpTy, Operand: Abs);
2909	SDValue NLeft = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Clz, One});
2910	SDValue Frac0 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, Ops: {Abs, NLeft});
2911
2912	auto [Frac, Ovf] = emitHvxShiftRightRnd(Val: Frac0, Amt: ExpWidth + `1`, Signed: false, DAG);
2913	if (Signed) {
2914	SDValue IsNeg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETLT);
2915	SDValue M80 = DAG.getConstant(Val: `1ull` << (ElemWidth - `1`), DL: dl, VT: InpTy);
2916	SDValue Sign = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsNeg, M80, Zero});
2917	Frac = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Sign, Frac});
2918	}
2919
2920	SDValue Rnrm = DAG.getZExtOrTrunc(Op: Ovf, DL: dl, VT: InpTy);
2921	SDValue Exp0 = DAG.getConstant(Val: ElemWidth + ExpBias, DL: dl, VT: InpTy);
2922	SDValue Exp1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Rnrm, Exp0});
2923	SDValue Exp2 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: InpTy, Ops: {Exp1, NLeft});
2924	SDValue Exp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy,
2925	Ops: {Exp2, DAG.getConstant(Val: FracWidth, DL: dl, VT: InpTy)});
2926	SDValue Flt0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Frac, Exp3});
2927	SDValue Flt1 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsZero, Zero, Flt0});
2928	SDValue Flt = DAG.getBitcast(VT: ResTy, V: Flt1);
2929
2930	return Flt;
2931	}
2932
2933	SDValue
2934	HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2935	unsigned Opc = Op.getOpcode();
2936	unsigned TLOpc;
2937	switch (Opc) {
2938	case ISD::ANY_EXTEND:
2939	case ISD::SIGN_EXTEND:
2940	case ISD::ZERO_EXTEND:
2941	TLOpc = HexagonISD::TL_EXTEND;
2942	break;
2943	case ISD::TRUNCATE:
2944	TLOpc = HexagonISD::TL_TRUNCATE;
2945	break;
2946	#ifndef NDEBUG
2947	Op.dump(&DAG);
2948	#endif
2949	llvm_unreachable("Unexpected operator");
2950	}
2951
2952	const SDLoc &dl(Op);
2953	return DAG.getNode(Opcode: TLOpc, DL: dl, VT: ty(Op), N1: Op.getOperand(i: `0`),
2954	N2: DAG.getUNDEF(VT: MVT::i128), // illegal type
2955	N3: DAG.getConstant(Val: Opc, DL: dl, VT: MVT::i32));
2956	}
2957
2958	SDValue
2959	HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2960	assert(Op.getOpcode() == HexagonISD::TL_EXTEND \|\|
2961	Op.getOpcode() == HexagonISD::TL_TRUNCATE);
2962	unsigned Opc = Op.getConstantOperandVal(i: `2`);
2963	return DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT: ty(Op), Operand: Op.getOperand(i: `0`));
2964	}
2965
2966	HexagonTargetLowering::VectorPair
2967	HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
2968	assert(!Op.isMachineOpcode());
2969	SmallVector<SDValue, `2`> OpsL, OpsH;
2970	const SDLoc &dl(Op);
2971
2972	auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
2973	MVT Ty = typeSplit(VecTy: N->getVT().getSimpleVT()).first;
2974	SDValue TV = DAG.getValueType(Ty);
2975	return std::make_pair(x&: TV, y&: TV);
2976	};
2977
2978	for (SDValue A : Op.getNode()->ops()) {
2979	auto [Lo, Hi] =
2980	ty(Op: A).isVector() ? opSplit(Vec: A, dl, DAG) : std::make_pair(x&: A, y&: A);
2981	// Special case for type operand.
2982	switch (Op.getOpcode()) {
2983	case ISD::SIGN_EXTEND_INREG:
2984	case HexagonISD::SSAT:
2985	case HexagonISD::USAT:
2986	if (const auto N = dyn_cast<const* VTSDNode>(Val: A.getNode()))
2987	std::tie(args&: Lo, args&: Hi) = SplitVTNode (N);
2988	break;
2989	}
2990	OpsL.push_back(Elt: Lo);
2991	OpsH.push_back(Elt: Hi);
2992	}
2993
2994	MVT ResTy = ty(Op);
2995	MVT HalfTy = typeSplit(VecTy: ResTy).first;
2996	SDValue L = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsL);
2997	SDValue H = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsH);
2998	return {L, H};
2999	}
3000
3001	SDValue
3002	HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3003	auto *MemN = cast<MemSDNode>(Val: Op.getNode());
3004
3005	MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3006	if (!isHvxPairTy(Ty: MemTy))
3007	return Op;
3008
3009	const SDLoc &dl(Op);
3010	unsigned HwLen = Subtarget.getVectorLength();
3011	MVT SingleTy = typeSplit(VecTy: MemTy).first;
3012	SDValue Chain = MemN->getChain();
3013	SDValue Base0 = MemN->getBasePtr();
3014	SDValue Base1 =
3015	DAG.getMemBasePlusOffset(Base: Base0, Offset: TypeSize::getFixed(ExactSize: HwLen), DL: dl);
3016	unsigned MemOpc = MemN->getOpcode();
3017
3018	MachineMemOperand MOp0 = nullptr, MOp1 = nullptr;
3019	if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3020	MachineFunction &MF = DAG.getMachineFunction();
3021	uint64_t MemSize = (MemOpc == ISD::MLOAD \|\| MemOpc == ISD::MSTORE)
3022	? (uint64_t)MemoryLocation::UnknownSize
3023	: HwLen;
3024	MOp0 = MF.getMachineMemOperand(MMO, Offset: `0`, Size: MemSize);
3025	MOp1 = MF.getMachineMemOperand(MMO, Offset: HwLen, Size: MemSize);
3026	}
3027
3028	if (MemOpc == ISD::LOAD) {
3029	assert(cast<LoadSDNode>(Op)->isUnindexed());
3030	SDValue Load0 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base0, MMO: MOp0);
3031	SDValue Load1 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base1, MMO: MOp1);
3032	return DAG.getMergeValues(
3033	Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: Load0, N2: Load1),
3034	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
3035	N1: Load0.getValue(R: `1`), N2: Load1.getValue(R: `1`)) }, dl);
3036	}
3037	if (MemOpc == ISD::STORE) {
3038	assert(cast<StoreSDNode>(Op)->isUnindexed());
3039	VectorPair Vals = opSplit(Vec: cast<StoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3040	SDValue Store0 = DAG.getStore(Chain, dl, Val: Vals.first, Ptr: Base0, MMO: MOp0);
3041	SDValue Store1 = DAG.getStore(Chain, dl, Val: Vals.second, Ptr: Base1, MMO: MOp1);
3042	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store0, N2: Store1);
3043	}
3044
3045	assert(MemOpc == ISD::MLOAD \|\| MemOpc == ISD::MSTORE);
3046
3047	auto MaskN = cast<MaskedLoadStoreSDNode>(Val&: Op);
3048	assert(MaskN->isUnindexed());
3049	VectorPair Masks = opSplit(Vec: MaskN->getMask(), dl, DAG);
3050	SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3051
3052	if (MemOpc == ISD::MLOAD) {
3053	VectorPair Thru =
3054	opSplit(Vec: cast<MaskedLoadSDNode>(Val&: Op)->getPassThru(), dl, DAG);
3055	SDValue MLoad0 =
3056	DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base0, Offset, Mask: Masks.first,
3057	Src0: Thru.first, MemVT: SingleTy, MMO: MOp0, AM: ISD::UNINDEXED,
3058	ISD::NON_EXTLOAD, IsExpanding: false);
3059	SDValue MLoad1 =
3060	DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base1, Offset, Mask: Masks.second,
3061	Src0: Thru.second, MemVT: SingleTy, MMO: MOp1, AM: ISD::UNINDEXED,
3062	ISD::NON_EXTLOAD, IsExpanding: false);
3063	return DAG.getMergeValues(
3064	Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: MLoad0, N2: MLoad1),
3065	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
3066	N1: MLoad0.getValue(R: `1`), N2: MLoad1.getValue(R: `1`)) }, dl);
3067	}
3068	if (MemOpc == ISD::MSTORE) {
3069	VectorPair Vals = opSplit(Vec: cast<MaskedStoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3070	SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Val: Vals.first, Base: Base0, Offset,
3071	Mask: Masks.first, MemVT: SingleTy, MMO: MOp0,
3072	AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3073	SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Val: Vals.second, Base: Base1, Offset,
3074	Mask: Masks.second, MemVT: SingleTy, MMO: MOp1,
3075	AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3076	return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: MStore0, N2: MStore1);
3077	}
3078
3079	std::string Name = "Unexpected operation: " + Op ->getOperationName(G: &DAG);
3080	llvm_unreachable(Name.c_str());
3081	}
3082
3083	SDValue
3084	HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3085	const SDLoc &dl(Op);
3086	auto *LoadN = cast<LoadSDNode>(Val: Op.getNode());
3087	assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3088	assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3089	"Not widening loads of i1 yet");
3090
3091	SDValue Chain = LoadN->getChain();
3092	SDValue Base = LoadN->getBasePtr();
3093	SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3094
3095	MVT ResTy = ty(Op);
3096	unsigned HwLen = Subtarget.getVectorLength();
3097	unsigned ResLen = ResTy.getStoreSize();
3098	assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3099
3100	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3101	SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3102	Ops: {DAG.getConstant(Val: ResLen, DL: dl, VT: MVT::i32)}, DAG);
3103
3104	MVT LoadTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
3105	MachineFunction &MF = DAG.getMachineFunction();
3106	auto *MemOp = MF.getMachineMemOperand(MMO: LoadN->getMemOperand(), Offset: `0`, Size: HwLen);
3107
3108	SDValue Load = DAG.getMaskedLoad(VT: LoadTy, dl, Chain, Base, Offset, Mask,
3109	Src0: DAG.getUNDEF(VT: LoadTy), MemVT: LoadTy, MMO: MemOp,
3110	AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false);
3111	SDValue Value = opCastElem(Vec: Load, ElemTy: ResTy.getVectorElementType(), DAG);
3112	return DAG.getMergeValues(Ops: {Value, Load.getValue(R: `1`)}, dl);
3113	}
3114
3115	SDValue
3116	HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3117	const SDLoc &dl(Op);
3118	auto *StoreN = cast<StoreSDNode>(Val: Op.getNode());
3119	assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3120	assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3121	"Not widening stores of i1 yet");
3122
3123	SDValue Chain = StoreN->getChain();
3124	SDValue Base = StoreN->getBasePtr();
3125	SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3126
3127	SDValue Value = opCastElem(Vec: StoreN->getValue(), ElemTy: MVT::i8, DAG);
3128	MVT ValueTy = ty(Op: Value);
3129	unsigned ValueLen = ValueTy.getVectorNumElements();
3130	unsigned HwLen = Subtarget.getVectorLength();
3131	assert(isPowerOf2_32(ValueLen));
3132
3133	for (unsigned Len = ValueLen; Len < HwLen; ) {
3134	Value = opJoin(Ops: {Value, DAG.getUNDEF(VT: ty(Op: Value))}, dl, DAG);
3135	Len = ty(Op: Value).getVectorNumElements(); // This is Len = 2*
3136	}
3137	assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3138
3139	assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3140	MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3141	SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3142	Ops: {DAG.getConstant(Val: ValueLen, DL: dl, VT: MVT::i32)}, DAG);
3143	MachineFunction &MF = DAG.getMachineFunction();
3144	auto *MemOp = MF.getMachineMemOperand(MMO: StoreN->getMemOperand(), Offset: `0`, Size: HwLen);
3145	return DAG.getMaskedStore(Chain, dl, Val: Value, Base, Offset, Mask, MemVT: ty(Op: Value),
3146	MMO: MemOp, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3147	}
3148
3149	SDValue
3150	HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3151	const SDLoc &dl(Op);
3152	SDValue Op0 = Op.getOperand(i: `0`), Op1 = Op.getOperand(i: `1`);
3153	MVT ElemTy = ty(Op: Op0).getVectorElementType();
3154	unsigned HwLen = Subtarget.getVectorLength();
3155
3156	unsigned WideOpLen = (`8` * HwLen) / ElemTy.getSizeInBits();
3157	assert(WideOpLen * ElemTy.getSizeInBits() == `8` * HwLen);
3158	MVT WideOpTy = MVT::getVectorVT(VT: ElemTy, NumElements: WideOpLen);
3159	if (!Subtarget.isHVXVectorType(VecTy: WideOpTy, IncludeBool: true))
3160	return SDValue ();
3161
3162	SDValue WideOp0 = appendUndef(Val: Op0, ResTy: WideOpTy, DAG);
3163	SDValue WideOp1 = appendUndef(Val: Op1, ResTy: WideOpTy, DAG);
3164	EVT ResTy =
3165	getSetCCResultType(DAG.getDataLayout(), C&: *DAG.getContext(), VT: WideOpTy);
3166	SDValue SetCC = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: ResTy,
3167	Ops: {WideOp0, WideOp1, Op.getOperand(i: `2`)});
3168
3169	EVT RetTy = typeLegalize(Ty: ty(Op), DAG);
3170	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: RetTy,
3171	Ops: {SetCC, getZero(dl, Ty: MVT::i32, DAG)});
3172	}
3173
3174	SDValue
3175	HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3176	unsigned Opc = Op.getOpcode();
3177	bool IsPairOp = isHvxPairTy(Ty: ty(Op)) \|\|
3178	llvm::any_of(Range: Op.getNode()->ops(), P: [this] (SDValue V) {
3179	return isHvxPairTy(Ty: ty(Op: V));
3180	});
3181
3182	if (IsPairOp) {
3183	switch (Opc) {
3184	default:
3185	break;
3186	case ISD::LOAD:
3187	case ISD::STORE:
3188	case ISD::MLOAD:
3189	case ISD::MSTORE:
3190	return SplitHvxMemOp(Op, DAG);
3191	case ISD::SINT_TO_FP:
3192	case ISD::UINT_TO_FP:
3193	case ISD::FP_TO_SINT:
3194	case ISD::FP_TO_UINT:
3195	if (ty(Op).getSizeInBits() == ty(Op: Op.getOperand(i: `0`)).getSizeInBits())
3196	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3197	break;
3198	case ISD::ABS:
3199	case ISD::CTPOP:
3200	case ISD::CTLZ:
3201	case ISD::CTTZ:
3202	case ISD::MUL:
3203	case ISD::FADD:
3204	case ISD::FSUB:
3205	case ISD::FMUL:
3206	case ISD::FMINIMUMNUM:
3207	case ISD::FMAXIMUMNUM:
3208	case ISD::MULHS:
3209	case ISD::MULHU:
3210	case ISD::AND:
3211	case ISD::OR:
3212	case ISD::XOR:
3213	case ISD::SRA:
3214	case ISD::SHL:
3215	case ISD::SRL:
3216	case ISD::FSHL:
3217	case ISD::FSHR:
3218	case ISD::SMIN:
3219	case ISD::SMAX:
3220	case ISD::UMIN:
3221	case ISD::UMAX:
3222	case ISD::SETCC:
3223	case ISD::VSELECT:
3224	case ISD::SIGN_EXTEND_INREG:
3225	case ISD::SPLAT_VECTOR:
3226	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3227	case ISD::SIGN_EXTEND:
3228	case ISD::ZERO_EXTEND:
3229	// In general, sign- and zero-extends can't be split and still
3230	// be legal. The only exception is extending bool vectors.
3231	if (ty(Op: Op.getOperand(i: `0`)).getVectorElementType() == MVT::i1)
3232	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3233	break;
3234	}
3235	}
3236
3237	switch (Opc) {
3238	default:
3239	break;
3240	case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3241	case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3242	case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3243	case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3244	case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3245	case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3246	case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3247	case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3248	case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3249	case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3250	case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3251	case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3252	case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3253	case ISD::SRA:
3254	case ISD::SHL:
3255	case ISD::SRL: return LowerHvxShift(Op, DAG);
3256	case ISD::FSHL:
3257	case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3258	case ISD::MULHS:
3259	case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3260	case ISD::SMUL_LOHI:
3261	case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3262	case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3263	case ISD::SETCC:
3264	case ISD::INTRINSIC_VOID: return Op;
3265	case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3266	case ISD::MLOAD:
3267	case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3268	// Unaligned loads will be handled by the default lowering.
3269	case ISD::LOAD: return SDValue ();
3270	case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3271	case ISD::FP_TO_SINT:
3272	case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3273	case ISD::SINT_TO_FP:
3274	case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3275
3276	// Special nodes:
3277	case HexagonISD::SMUL_LOHI:
3278	case HexagonISD::UMUL_LOHI:
3279	case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3280	}
3281	#ifndef NDEBUG
3282	Op.dumpr(&DAG);
3283	#endif
3284	llvm_unreachable("Unhandled HVX operation");
3285	}
3286
3287	SDValue
3288	HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3289	const {
3290	// Rewrite the extension/truncation/saturation op into steps where each
3291	// step changes the type widths by a factor of 2.
3292	// E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3293	//
3294	// Some of the vector types in Op may not be legal.
3295
3296	unsigned Opc = Op.getOpcode();
3297	switch (Opc) {
3298	case HexagonISD::SSAT:
3299	case HexagonISD::USAT:
3300	case HexagonISD::TL_EXTEND:
3301	case HexagonISD::TL_TRUNCATE:
3302	break;
3303	case ISD::ANY_EXTEND:
3304	case ISD::ZERO_EXTEND:
3305	case ISD::SIGN_EXTEND:
3306	case ISD::TRUNCATE:
3307	llvm_unreachable("ISD:: ops will be auto-folded");
3308	break;
3309	#ifndef NDEBUG
3310	Op.dump(&DAG);
3311	#endif
3312	llvm_unreachable("Unexpected operation");
3313	}
3314
3315	SDValue Inp = Op.getOperand(i: `0`);
3316	MVT InpTy = ty(Op: Inp);
3317	MVT ResTy = ty(Op);
3318
3319	unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3320	unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3321	assert(InpWidth != ResWidth);
3322
3323	if (InpWidth == `2` * ResWidth \|\| ResWidth == `2` * InpWidth)
3324	return Op;
3325
3326	const SDLoc &dl(Op);
3327	unsigned NumElems = InpTy.getVectorNumElements();
3328	assert(NumElems == ResTy.getVectorNumElements());
3329
3330	auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3331	MVT Ty = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NewWidth), NumElements: NumElems);
3332	switch (Opc) {
3333	case HexagonISD::SSAT:
3334	case HexagonISD::USAT:
3335	return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, DAG.getValueType(Ty)});
3336	case HexagonISD::TL_EXTEND:
3337	case HexagonISD::TL_TRUNCATE:
3338	return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, Op.getOperand(i: `1`), Op.getOperand(i: `2`)});
3339	default:
3340	llvm_unreachable("Unexpected opcode");
3341	}
3342	};
3343
3344	SDValue S = Inp;
3345	if (InpWidth < ResWidth) {
3346	assert(ResWidth % InpWidth == `0` && isPowerOf2_32(ResWidth / InpWidth));
3347	while (InpWidth * `2` <= ResWidth)
3348	S = repeatOp (InpWidth *= `2`, S);
3349	} else {
3350	// InpWidth > ResWidth
3351	assert(InpWidth % ResWidth == `0` && isPowerOf2_32(InpWidth / ResWidth));
3352	while (InpWidth / `2` >= ResWidth)
3353	S = repeatOp (InpWidth /= `2`, S);
3354	}
3355	return S;
3356	}
3357
3358	SDValue
3359	HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3360	SDValue Inp0 = Op.getOperand(i: `0`);
3361	MVT InpTy = ty(Op: Inp0);
3362	MVT ResTy = ty(Op);
3363	unsigned InpWidth = InpTy.getSizeInBits();
3364	unsigned ResWidth = ResTy.getSizeInBits();
3365	unsigned Opc = Op.getOpcode();
3366
3367	if (shouldWidenToHvx(Ty: InpTy, DAG) \|\| shouldWidenToHvx(Ty: ResTy, DAG)) {
3368	// First, make sure that the narrower type is widened to HVX.
3369	// This may cause the result to be wider than what the legalizer
3370	// expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3371	// desired type.
3372	auto [WInpTy, WResTy] =
3373	InpWidth < ResWidth ? typeWidenToWider(Ty0: typeWidenToHvx(Ty: InpTy), Ty1: ResTy)
3374	: typeWidenToWider(Ty0: InpTy, Ty1: typeWidenToHvx(Ty: ResTy));
3375	SDValue W = appendUndef(Val: Inp0, ResTy: WInpTy, DAG);
3376	SDValue S;
3377	if (Opc == HexagonISD::TL_EXTEND \|\| Opc == HexagonISD::TL_TRUNCATE) {
3378	S = DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT: WResTy, N1: W, N2: Op.getOperand(i: `1`),
3379	N3: Op.getOperand(i: `2`));
3380	} else {
3381	S = DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT: WResTy, N1: W, N2: DAG.getValueType(WResTy));
3382	}
3383	SDValue T = ExpandHvxResizeIntoSteps(Op: S, DAG);
3384	return extractSubvector(Vec: T, SubTy: typeLegalize(Ty: ResTy, DAG), SubIdx: `0`, DAG);
3385	} else if (shouldSplitToHvx(Ty: InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3386	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3387	} else {
3388	assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3389	return RemoveTLWrapper(Op, DAG);
3390	}
3391	llvm_unreachable("Unexpected situation");
3392	}
3393
3394	void
3395	HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3396	SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3397	unsigned Opc = N->getOpcode();
3398	SDValue Op(N, `0`);
3399	SDValue Inp0; // Optional first argument.
3400	if (N->getNumOperands() > `0`)
3401	Inp0 = Op.getOperand(i: `0`);
3402
3403	switch (Opc) {
3404	case ISD::ANY_EXTEND:
3405	case ISD::SIGN_EXTEND:
3406	case ISD::ZERO_EXTEND:
3407	case ISD::TRUNCATE:
3408	if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3409	Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3410	Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3411	}
3412	break;
3413	case ISD::SETCC:
3414	if (shouldWidenToHvx(Ty: ty(Op: Inp0), DAG)) {
3415	if (SDValue T = WidenHvxSetCC(Op, DAG))
3416	Results.push_back(Elt: T);
3417	}
3418	break;
3419	case ISD::STORE: {
3420	if (shouldWidenToHvx(Ty: ty(Op: cast<StoreSDNode>(Val: N)->getValue()), DAG)) {
3421	SDValue Store = WidenHvxStore(Op, DAG);
3422	Results.push_back(Elt: Store);
3423	}
3424	break;
3425	}
3426	case ISD::MLOAD:
3427	if (isHvxPairTy(Ty: ty(Op))) {
3428	SDValue S = SplitHvxMemOp(Op, DAG);
3429	assert(S->getOpcode() == ISD::MERGE_VALUES);
3430	Results.push_back(Elt: S.getOperand(i: `0`));
3431	Results.push_back(Elt: S.getOperand(i: `1`));
3432	}
3433	break;
3434	case ISD::MSTORE:
3435	if (isHvxPairTy(Ty: ty(Op: Op ->getOperand(Num: `1`)))) { // Stored value
3436	SDValue S = SplitHvxMemOp(Op, DAG);
3437	Results.push_back(Elt: S);
3438	}
3439	break;
3440	case ISD::SINT_TO_FP:
3441	case ISD::UINT_TO_FP:
3442	case ISD::FP_TO_SINT:
3443	case ISD::FP_TO_UINT:
3444	if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3445	SDValue T = EqualizeFpIntConversion(Op, DAG);
3446	Results.push_back(Elt: T);
3447	}
3448	break;
3449	case HexagonISD::SSAT:
3450	case HexagonISD::USAT:
3451	case HexagonISD::TL_EXTEND:
3452	case HexagonISD::TL_TRUNCATE:
3453	Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3454	break;
3455	default:
3456	break;
3457	}
3458	}
3459
3460	void
3461	HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3462	SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3463	unsigned Opc = N->getOpcode();
3464	SDValue Op(N, `0`);
3465	SDValue Inp0; // Optional first argument.
3466	if (N->getNumOperands() > `0`)
3467	Inp0 = Op.getOperand(i: `0`);
3468
3469	switch (Opc) {
3470	case ISD::ANY_EXTEND:
3471	case ISD::SIGN_EXTEND:
3472	case ISD::ZERO_EXTEND:
3473	case ISD::TRUNCATE:
3474	if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3475	Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3476	Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3477	}
3478	break;
3479	case ISD::SETCC:
3480	if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3481	if (SDValue T = WidenHvxSetCC(Op, DAG))
3482	Results.push_back(Elt: T);
3483	}
3484	break;
3485	case ISD::LOAD: {
3486	if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3487	SDValue Load = WidenHvxLoad(Op, DAG);
3488	assert(Load->getOpcode() == ISD::MERGE_VALUES);
3489	Results.push_back(Elt: Load.getOperand(i: `0`));
3490	Results.push_back(Elt: Load.getOperand(i: `1`));
3491	}
3492	break;
3493	}
3494	case ISD::BITCAST:
3495	if (isHvxBoolTy(Ty: ty(Op: Inp0))) {
3496	SDValue C = LowerHvxBitcast(Op, DAG);
3497	Results.push_back(Elt: C);
3498	}
3499	break;
3500	case ISD::FP_TO_SINT:
3501	case ISD::FP_TO_UINT:
3502	if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3503	SDValue T = EqualizeFpIntConversion(Op, DAG);
3504	Results.push_back(Elt: T);
3505	}
3506	break;
3507	case HexagonISD::SSAT:
3508	case HexagonISD::USAT:
3509	case HexagonISD::TL_EXTEND:
3510	case HexagonISD::TL_TRUNCATE:
3511	Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3512	break;
3513	default:
3514	break;
3515	}
3516	}
3517
3518	SDValue
3519	HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3520	DAGCombinerInfo &DCI) const {
3521	// Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3522	// to extract-subvector (shuffle V, pick even, pick odd)
3523
3524	assert(Op.getOpcode() == ISD::TRUNCATE);
3525	SelectionDAG &DAG = DCI.DAG;
3526	const SDLoc &dl(Op);
3527
3528	if (Op.getOperand(i: `0`).getOpcode() == ISD::BITCAST)
3529	return SDValue ();
3530	SDValue Cast = Op.getOperand(i: `0`);
3531	SDValue Src = Cast.getOperand(i: `0`);
3532
3533	EVT TruncTy = Op.getValueType();
3534	EVT CastTy = Cast.getValueType();
3535	EVT SrcTy = Src.getValueType();
3536	if (SrcTy.isSimple())
3537	return SDValue ();
3538	if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3539	return SDValue ();
3540	unsigned SrcLen = SrcTy.getVectorNumElements();
3541	unsigned CastLen = CastTy.getVectorNumElements();
3542	if (`2` * CastLen != SrcLen)
3543	return SDValue ();
3544
3545	SmallVector<int, `128`> Mask(SrcLen);
3546	for (int i = `0`; i != static_cast<int>(CastLen); ++i) {
3547	Mask [i] = `2` * i;
3548	Mask [i + CastLen] = `2` * i + `1`;
3549	}
3550	SDValue Deal =
3551	DAG.getVectorShuffle(VT: SrcTy, dl, N1: Src, N2: DAG.getUNDEF(VT: SrcTy), Mask);
3552	return opSplit(Vec: Deal, dl, DAG).first;
3553	}
3554
3555	SDValue
3556	HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3557	SDValue Op, DAGCombinerInfo &DCI) const {
3558	// Fold
3559	// concat (shuffle x, y, m1), (shuffle x, y, m2)
3560	// into
3561	// shuffle (concat x, y), undef, m3
3562	if (Op.getNumOperands() != `2`)
3563	return SDValue ();
3564
3565	SelectionDAG &DAG = DCI.DAG;
3566	const SDLoc &dl(Op);
3567	SDValue V0 = Op.getOperand(i: `0`);
3568	SDValue V1 = Op.getOperand(i: `1`);
3569
3570	if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3571	return SDValue ();
3572	if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3573	return SDValue ();
3574
3575	SetVector<SDValue> Order;
3576	Order.insert(X: V0.getOperand(i: `0`));
3577	Order.insert(X: V0.getOperand(i: `1`));
3578	Order.insert(X: V1.getOperand(i: `0`));
3579	Order.insert(X: V1.getOperand(i: `1`));
3580
3581	if (Order.size() > `2`)
3582	return SDValue ();
3583
3584	// In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3585	// result must be the same.
3586	EVT InpTy = V0.getValueType();
3587	assert(InpTy.isVector());
3588	unsigned InpLen = InpTy.getVectorNumElements();
3589
3590	SmallVector<int, `128`> LongMask;
3591	auto AppendToMask = [&](SDValue Shuffle) {
3592	auto *SV = cast<ShuffleVectorSDNode>(Val: Shuffle.getNode());
3593	ArrayRef<int> Mask = SV->getMask();
3594	SDValue X = Shuffle.getOperand(i: `0`);
3595	SDValue Y = Shuffle.getOperand(i: `1`);
3596	for (int M : Mask) {
3597	if (M == -`1`) {
3598	LongMask.push_back(Elt: M);
3599	continue;
3600	}
3601	SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3602	if (static_cast<unsigned>(M) >= InpLen)
3603	M -= InpLen;
3604
3605	int OutOffset = Order [`0`] == Src ? `0` : InpLen;
3606	LongMask.push_back(Elt: M + OutOffset);
3607	}
3608	};
3609
3610	AppendToMask (V0);
3611	AppendToMask (V1);
3612
3613	SDValue C0 = Order.front();
3614	SDValue C1 = Order.back(); // Can be same as front
3615	EVT LongTy = InpTy.getDoubleNumVectorElementsVT(Context&: *DAG.getContext());
3616
3617	SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: LongTy, Ops: {C0, C1});
3618	return DAG.getVectorShuffle(VT: LongTy, dl, N1: Cat, N2: DAG.getUNDEF(VT: LongTy), Mask: LongMask);
3619	}
3620
3621	SDValue
3622	HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3623	const {
3624	const SDLoc &dl(N);
3625	SelectionDAG &DAG = DCI.DAG;
3626	SDValue Op(N, `0`);
3627	unsigned Opc = Op.getOpcode();
3628
3629	SmallVector<SDValue, `4`> Ops(N->ops());
3630
3631	if (Opc == ISD::TRUNCATE)
3632	return combineTruncateBeforeLegal(Op, DCI);
3633	if (Opc == ISD::CONCAT_VECTORS)
3634	return combineConcatVectorsBeforeLegal(Op, DCI);
3635
3636	if (DCI.isBeforeLegalizeOps())
3637	return SDValue ();
3638
3639	switch (Opc) {
3640	case ISD::VSELECT: {
3641	// (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3642	SDValue Cond = Ops [`0`];
3643	if (Cond ->getOpcode() == ISD::XOR) {
3644	SDValue C0 = Cond.getOperand(i: `0`), C1 = Cond.getOperand(i: `1`);
3645	if (C1 ->getOpcode() == HexagonISD::QTRUE)
3646	return DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ty(Op), N1: C0, N2: Ops [`2`], N3: Ops [`1`]);
3647	}
3648	break;
3649	}
3650	case HexagonISD::V2Q:
3651	if (Ops [`0`].getOpcode() == ISD::SPLAT_VECTOR) {
3652	if (const auto *C = dyn_cast<ConstantSDNode>(Val: Ops [`0`].getOperand(i: `0`)))
3653	return C->isZero() ? DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: ty(Op))
3654	: DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: ty(Op));
3655	}
3656	break;
3657	case HexagonISD::Q2V:
3658	if (Ops [`0`].getOpcode() == HexagonISD::QTRUE)
3659	return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ty(Op),
3660	Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
3661	if (Ops [`0`].getOpcode() == HexagonISD::QFALSE)
3662	return getZero(dl, Ty: ty(Op), DAG);
3663	break;
3664	case HexagonISD::VINSERTW0:
3665	if (isUndef(Op: Ops [`1`]))
3666	return Ops [`0`];
3667	break;
3668	case HexagonISD::VROR: {
3669	if (Ops [`0`].getOpcode() == HexagonISD::VROR) {
3670	SDValue Vec = Ops [`0`].getOperand(i: `0`);
3671	SDValue Rot0 = Ops [`1`], Rot1 = Ops [`0`].getOperand(i: `1`);
3672	SDValue Rot = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ty(Op: Rot0), Ops: {Rot0, Rot1});
3673	return DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ty(Op), Ops: {Vec, Rot});
3674	}
3675	break;
3676	}
3677	}
3678
3679	return SDValue ();
3680	}
3681
3682	bool
3683	HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3684	if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
3685	return false;
3686	auto Action = getPreferredHvxVectorAction(VecTy: Ty);
3687	if (Action == TargetLoweringBase::TypeSplitVector)
3688	return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
3689	return false;
3690	}
3691
3692	bool
3693	HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3694	if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
3695	return false;
3696	auto Action = getPreferredHvxVectorAction(VecTy: Ty);
3697	if (Action == TargetLoweringBase::TypeWidenVector)
3698	return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
3699	return false;
3700	}
3701
3702	bool
3703	HexagonTargetLowering::isHvxOperation(SDNode N, SelectionDAG &DAG) const* {
3704	if (!Subtarget.useHVXOps())
3705	return false;
3706	// If the type of any result, or any operand type are HVX vector types,
3707	// this is an HVX operation.
3708	auto IsHvxTy = [this](EVT Ty) {
3709	return Ty.isSimple() && Subtarget.isHVXVectorType(VecTy: Ty.getSimpleVT(), IncludeBool: true);
3710	};
3711	auto IsHvxOp = [this](SDValue Op) {
3712	return Op.getValueType().isSimple() &&
3713	Subtarget.isHVXVectorType(VecTy: ty(Op), IncludeBool: true);
3714	};
3715	if (llvm::any_of(Range: N->values(), P: IsHvxTy) \|\| llvm::any_of(Range: N->ops(), P: IsHvxOp))
3716	return true;
3717
3718	// Check if this could be an HVX operation after type widening.
3719	auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3720	if (!Op.getValueType().isSimple())
3721	return false;
3722	MVT ValTy = ty(Op);
3723	return ValTy.isVector() && shouldWidenToHvx(Ty: ValTy, DAG);
3724	};
3725
3726	for (int i = `0`, e = N->getNumValues(); i != e; ++i) {
3727	if (IsWidenedToHvx (SDValue (N, i)))
3728	return true;
3729	}
3730	return llvm::any_of(Range: N->ops(), P: IsWidenedToHvx);
3731	}
3732

source code of llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp