NVPTXTargetTransformInfo.h source code [llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h]

1	//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file a TargetTransformInfoImplBase conforming object specific to the
10	/// NVPTX target machine. It uses the target's detailed information to
11	/// provide more precise answers to certain TTI queries, while letting the
12	/// target independent and default TTI implementations handle the rest.
13	///
14	//===----------------------------------------------------------------------===//
15
16	#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
17	#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
18
19	#include "MCTargetDesc/NVPTXBaseInfo.h"
20	#include "NVPTXTargetMachine.h"
21	#include "NVPTXUtilities.h"
22	#include "llvm/Analysis/TargetTransformInfo.h"
23	#include "llvm/CodeGen/BasicTTIImpl.h"
24	#include "llvm/CodeGen/TargetLowering.h"
25	#include <optional>
26
27	namespace llvm {
28
29	class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
30	typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
31	typedef TargetTransformInfo TTI;
32	friend BaseT;
33
34	const NVPTXSubtarget *ST;
35	const NVPTXTargetLowering *TLI;
36
37	const NVPTXSubtarget getST() const* { return ST; };
38	const NVPTXTargetLowering getTLI() const* { return TLI; };
39
40	public:
41	explicit NVPTXTTIImpl(const NVPTXTargetMachine TM, const* Function &F)
42	: BaseT (TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),
43	TLI(ST->getTargetLowering()) {}
44
45	bool hasBranchDivergence(const Function F = nullptr) const* override {
46	return true;
47	}
48
49	bool isSourceOfDivergence(const Value V) const* override;
50
51	unsigned getFlatAddressSpace() const override {
52	return AddressSpace::ADDRESS_SPACE_GENERIC;
53	}
54
55	bool
56	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
57	return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
58	AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
59	}
60
61	std::optional<Instruction *>
62	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
63
64	// Loads and stores can be vectorized if the alignment is at least as big as
65	// the load/store we want to vectorize.
66	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
67	unsigned AddrSpace) const override {
68	return Alignment >= ChainSizeInBytes;
69	}
70	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
71	unsigned AddrSpace) const override {
72	return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
73	}
74
75	// NVPTX has infinite registers of all kinds, but the actual machine doesn't.
76	// We conservatively return 1 here which is just enough to enable the
77	// vectorizers but disables heuristics based on the number of registers.
78	// FIXME: Return a more reasonable number, while keeping an eye on
79	// LoopVectorizer's unrolling heuristics.
80	unsigned getNumberOfRegisters(unsigned ClassID) const override { return `1`; }
81
82	// Only <2 x half> should be vectorized, so always return 32 for the vector
83	// register size.
84	TypeSize
85	getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {
86	return TypeSize::getFixed(ExactSize: `32`);
87	}
88	unsigned getMinVectorRegisterBitWidth() const override { return `32`; }
89
90	// We don't want to prevent inlining because of target-cpu and -features
91	// attributes that were added to newer versions of LLVM/Clang: There are
92	// no incompatible functions in PTX, ptxas will throw errors in such cases.
93	bool areInlineCompatible(const Function *Caller,
94	const Function Callee) const* override {
95	return true;
96	}
97
98	// Increase the inlining cost threshold by a factor of 11, reflecting that
99	// calls are particularly expensive in NVPTX.
100	unsigned getInliningThresholdMultiplier() const override { return `11`; }
101
102	InstructionCost
103	getInstructionCost(const User U, ArrayRef<const* Value *> Operands,
104	TTI::TargetCostKind CostKind) const override;
105
106	InstructionCost getArithmeticInstrCost(
107	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
108	TTI::OperandValueInfo Op1Info = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None},
109	TTI::OperandValueInfo Op2Info = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None},
110	ArrayRef<const Value *> Args = {},
111	const Instruction CxtI = nullptr) const* override;
112
113	InstructionCost getScalarizationOverhead(
114	VectorType InTy, const* APInt &DemandedElts, bool Insert, bool Extract,
115	TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
116	ArrayRef<Value > VL = {}) const* override {
117	if (!InTy->getElementCount().isFixed())
118	return InstructionCost::getInvalid();
119
120	auto VT = getTLI()->getValueType(DL, Ty: InTy);
121	auto NumElements = InTy->getElementCount().getFixedValue();
122	InstructionCost Cost = `0`;
123	if (Insert && !VL.empty()) {
124	bool AllConstant = all_of(Range: seq(Size: NumElements), P: [&](int Idx) {
125	return !DemandedElts [Idx] \|\| isa<Constant>(Val: VL [Idx]);
126	});
127	if (AllConstant) {
128	Cost += TTI::TCC_Free;
129	Insert = false;
130	}
131	}
132	if (Insert && NVPTX::isPackedVectorTy(VT) && VT.is32BitVector()) {
133	// Can be built in a single 32-bit mov (64-bit regs are emulated in SASS
134	// with 2x 32-bit regs)
135	Cost += `1`;
136	Insert = false;
137	}
138	if (Insert && VT == MVT::v4i8) {
139	InstructionCost Cost = `3`; // 3 x PRMT
140	for (auto Idx : seq(Size: NumElements))
141	if (DemandedElts [Idx])
142	Cost += `1`; // zext operand to i32
143	Insert = false;
144	}
145	return Cost + BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert,
146	Extract, CostKind,
147	ForPoisonSrc, VL);
148	}
149
150	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
151	TTI::UnrollingPreferences &UP,
152	OptimizationRemarkEmitter ORE) const* override;
153
154	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
155	TTI::PeelingPreferences &PP) const override;
156
157	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) const override {
158	// Volatile loads/stores are only supported for shared and global address
159	// spaces, or for generic AS that maps to them.
160	if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC \|\|
161	AddrSpace == llvm::ADDRESS_SPACE_GLOBAL \|\|
162	AddrSpace == llvm::ADDRESS_SPACE_SHARED))
163	return false;
164
165	switch(I->getOpcode()){
166	default:
167	return false;
168	case Instruction::Load:
169	case Instruction::Store:
170	return true;
171	}
172	}
173
174	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
175	Intrinsic::ID IID) const override;
176
177	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
178
179	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
180	Value NewV) const* override;
181	unsigned getAssumedAddrSpace(const Value V) const* override;
182
183	void collectKernelLaunchBounds(
184	const Function &F,
185	SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
186
187	bool shouldBuildRelLookupTables() const override {
188	// Self-referential globals are not supported.
189	return false;
190	}
191	};
192
193	} // end namespace llvm
194
195	#endif
196

source code of llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h