1//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file a TargetTransformInfoImplBase conforming object specific to the
10/// NVPTX target machine. It uses the target's detailed information to
11/// provide more precise answers to certain TTI queries, while letting the
12/// target independent and default TTI implementations handle the rest.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
18
19#include "MCTargetDesc/NVPTXBaseInfo.h"
20#include "NVPTXTargetMachine.h"
21#include "NVPTXUtilities.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/CodeGen/BasicTTIImpl.h"
24#include "llvm/CodeGen/TargetLowering.h"
25#include <optional>
26
27namespace llvm {
28
29class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
30 typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
31 typedef TargetTransformInfo TTI;
32 friend BaseT;
33
34 const NVPTXSubtarget *ST;
35 const NVPTXTargetLowering *TLI;
36
37 const NVPTXSubtarget *getST() const { return ST; };
38 const NVPTXTargetLowering *getTLI() const { return TLI; };
39
40public:
41 explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
42 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),
43 TLI(ST->getTargetLowering()) {}
44
45 bool hasBranchDivergence(const Function *F = nullptr) const override {
46 return true;
47 }
48
49 bool isSourceOfDivergence(const Value *V) const override;
50
51 unsigned getFlatAddressSpace() const override {
52 return AddressSpace::ADDRESS_SPACE_GENERIC;
53 }
54
55 bool
56 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
57 return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
58 AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
59 }
60
61 std::optional<Instruction *>
62 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
63
64 // Loads and stores can be vectorized if the alignment is at least as big as
65 // the load/store we want to vectorize.
66 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
67 unsigned AddrSpace) const override {
68 return Alignment >= ChainSizeInBytes;
69 }
70 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
71 unsigned AddrSpace) const override {
72 return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
73 }
74
75 // NVPTX has infinite registers of all kinds, but the actual machine doesn't.
76 // We conservatively return 1 here which is just enough to enable the
77 // vectorizers but disables heuristics based on the number of registers.
78 // FIXME: Return a more reasonable number, while keeping an eye on
79 // LoopVectorizer's unrolling heuristics.
80 unsigned getNumberOfRegisters(unsigned ClassID) const override { return 1; }
81
82 // Only <2 x half> should be vectorized, so always return 32 for the vector
83 // register size.
84 TypeSize
85 getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {
86 return TypeSize::getFixed(ExactSize: 32);
87 }
88 unsigned getMinVectorRegisterBitWidth() const override { return 32; }
89
90 // We don't want to prevent inlining because of target-cpu and -features
91 // attributes that were added to newer versions of LLVM/Clang: There are
92 // no incompatible functions in PTX, ptxas will throw errors in such cases.
93 bool areInlineCompatible(const Function *Caller,
94 const Function *Callee) const override {
95 return true;
96 }
97
98 // Increase the inlining cost threshold by a factor of 11, reflecting that
99 // calls are particularly expensive in NVPTX.
100 unsigned getInliningThresholdMultiplier() const override { return 11; }
101
102 InstructionCost
103 getInstructionCost(const User *U, ArrayRef<const Value *> Operands,
104 TTI::TargetCostKind CostKind) const override;
105
106 InstructionCost getArithmeticInstrCost(
107 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
108 TTI::OperandValueInfo Op1Info = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None},
109 TTI::OperandValueInfo Op2Info = {.Kind: .Kind: TTI::OK_AnyValue, .Properties: .Properties: TTI::OP_None},
110 ArrayRef<const Value *> Args = {},
111 const Instruction *CxtI = nullptr) const override;
112
113 InstructionCost getScalarizationOverhead(
114 VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract,
115 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
116 ArrayRef<Value *> VL = {}) const override {
117 if (!InTy->getElementCount().isFixed())
118 return InstructionCost::getInvalid();
119
120 auto VT = getTLI()->getValueType(DL, Ty: InTy);
121 auto NumElements = InTy->getElementCount().getFixedValue();
122 InstructionCost Cost = 0;
123 if (Insert && !VL.empty()) {
124 bool AllConstant = all_of(Range: seq(Size: NumElements), P: [&](int Idx) {
125 return !DemandedElts[Idx] || isa<Constant>(Val: VL[Idx]);
126 });
127 if (AllConstant) {
128 Cost += TTI::TCC_Free;
129 Insert = false;
130 }
131 }
132 if (Insert && NVPTX::isPackedVectorTy(VT) && VT.is32BitVector()) {
133 // Can be built in a single 32-bit mov (64-bit regs are emulated in SASS
134 // with 2x 32-bit regs)
135 Cost += 1;
136 Insert = false;
137 }
138 if (Insert && VT == MVT::v4i8) {
139 InstructionCost Cost = 3; // 3 x PRMT
140 for (auto Idx : seq(Size: NumElements))
141 if (DemandedElts[Idx])
142 Cost += 1; // zext operand to i32
143 Insert = false;
144 }
145 return Cost + BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert,
146 Extract, CostKind,
147 ForPoisonSrc, VL);
148 }
149
150 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
151 TTI::UnrollingPreferences &UP,
152 OptimizationRemarkEmitter *ORE) const override;
153
154 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
155 TTI::PeelingPreferences &PP) const override;
156
157 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override {
158 // Volatile loads/stores are only supported for shared and global address
159 // spaces, or for generic AS that maps to them.
160 if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
161 AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
162 AddrSpace == llvm::ADDRESS_SPACE_SHARED))
163 return false;
164
165 switch(I->getOpcode()){
166 default:
167 return false;
168 case Instruction::Load:
169 case Instruction::Store:
170 return true;
171 }
172 }
173
174 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
175 Intrinsic::ID IID) const override;
176
177 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
178
179 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
180 Value *NewV) const override;
181 unsigned getAssumedAddrSpace(const Value *V) const override;
182
183 void collectKernelLaunchBounds(
184 const Function &F,
185 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
186
187 bool shouldBuildRelLookupTables() const override {
188 // Self-referential globals are not supported.
189 return false;
190 }
191};
192
193} // end namespace llvm
194
195#endif
196

source code of llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h