1//===- GCNCreateVOPD.cpp - Create VOPD Instructions ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Combine VALU pairs into VOPD instructions
11/// Only works on wave32
12/// Has register requirements, we reject creating VOPD if the requirements are
13/// not met.
14/// shouldCombineVOPD mutator in postRA machine scheduler puts candidate
15/// instructions for VOPD back-to-back
16///
17//
18//===----------------------------------------------------------------------===//
19
20#include "AMDGPU.h"
21#include "GCNSubtarget.h"
22#include "GCNVOPDUtils.h"
23#include "SIInstrInfo.h"
24#include "Utils/AMDGPUBaseInfo.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/MachineBasicBlock.h"
28#include "llvm/CodeGen/MachineInstr.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/CodeGen/MachinePassManager.h"
31#include "llvm/Support/Debug.h"
32
33#define DEBUG_TYPE "gcn-create-vopd"
34STATISTIC(NumVOPDCreated, "Number of VOPD Insts Created.");
35
36using namespace llvm;
37
38namespace {
39
40class GCNCreateVOPD {
41private:
42 class VOPDCombineInfo {
43 public:
44 VOPDCombineInfo() = default;
45 VOPDCombineInfo(MachineInstr *First, MachineInstr *Second,
46 bool VOPD3 = false)
47 : FirstMI(First), SecondMI(Second), IsVOPD3(VOPD3) {}
48
49 MachineInstr *FirstMI;
50 MachineInstr *SecondMI;
51 bool IsVOPD3;
52 };
53
54public:
55 const GCNSubtarget *ST = nullptr;
56
57 bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
58 auto *FirstMI = CI.FirstMI;
59 auto *SecondMI = CI.SecondMI;
60 unsigned Opc1 = FirstMI->getOpcode();
61 unsigned Opc2 = SecondMI->getOpcode();
62 unsigned EncodingFamily =
63 AMDGPU::getVOPDEncodingFamily(ST: SII->getSubtarget());
64 int NewOpcode = AMDGPU::getVOPDFull(OpX: AMDGPU::getVOPDOpcode(Opc: Opc1, VOPD3: CI.IsVOPD3),
65 OpY: AMDGPU::getVOPDOpcode(Opc: Opc2, VOPD3: CI.IsVOPD3),
66 EncodingFamily, VOPD3: CI.IsVOPD3);
67 assert(NewOpcode != -1 &&
68 "Should have previously determined this as a possible VOPD\n");
69
70 auto VOPDInst = BuildMI(BB&: *FirstMI->getParent(), I: FirstMI,
71 MIMD: FirstMI->getDebugLoc(), MCID: SII->get(Opcode: NewOpcode))
72 .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
73
74 namespace VOPD = AMDGPU::VOPD;
75 MachineInstr *MI[] = {FirstMI, SecondMI};
76 auto InstInfo =
77 AMDGPU::getVOPDInstInfo(OpX: FirstMI->getDesc(), OpY: SecondMI->getDesc());
78
79 for (auto CompIdx : VOPD::COMPONENTS) {
80 auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
81 VOPDInst.add(MO: MI[CompIdx]->getOperand(i: MCOprIdx));
82 }
83
84 const AMDGPU::OpName Mods[2][3] = {
85 {AMDGPU::OpName::src0X_modifiers, AMDGPU::OpName::vsrc1X_modifiers,
86 AMDGPU::OpName::vsrc2X_modifiers},
87 {AMDGPU::OpName::src0Y_modifiers, AMDGPU::OpName::vsrc1Y_modifiers,
88 AMDGPU::OpName::vsrc2Y_modifiers}};
89 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
90 AMDGPU::OpName::src1_modifiers,
91 AMDGPU::OpName::src2_modifiers};
92 const unsigned VOPDOpc = VOPDInst->getOpcode();
93
94 for (auto CompIdx : VOPD::COMPONENTS) {
95 auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
96 bool IsVOP3 = SII->isVOP3(MI: *MI[CompIdx]);
97 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
98 if (AMDGPU::hasNamedOperand(Opcode: VOPDOpc, NamedIdx: Mods[CompIdx][CompSrcIdx])) {
99 const MachineOperand *Mod =
100 SII->getNamedOperand(MI&: *MI[CompIdx], OperandName: SrcMods[CompSrcIdx]);
101 VOPDInst.addImm(Val: Mod ? Mod->getImm() : 0);
102 }
103 auto MCOprIdx =
104 InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3: IsVOP3);
105 VOPDInst.add(MO: MI[CompIdx]->getOperand(i: MCOprIdx));
106 }
107 if (MI[CompIdx]->getOpcode() == AMDGPU::V_CNDMASK_B32_e32 && CI.IsVOPD3)
108 VOPDInst.addReg(RegNo: AMDGPU::VCC_LO);
109 }
110
111 if (CI.IsVOPD3) {
112 if (unsigned BitOp2 = AMDGPU::getBitOp2(Opc: Opc2))
113 VOPDInst.addImm(Val: BitOp2);
114 }
115
116 SII->fixImplicitOperands(MI&: *VOPDInst);
117 for (auto CompIdx : VOPD::COMPONENTS)
118 VOPDInst.copyImplicitOps(OtherMI: *MI[CompIdx]);
119
120 LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
121 << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
122
123 for (auto CompIdx : VOPD::COMPONENTS)
124 MI[CompIdx]->eraseFromParent();
125
126 ++NumVOPDCreated;
127 return true;
128 }
129
130 bool run(MachineFunction &MF) {
131 ST = &MF.getSubtarget<GCNSubtarget>();
132 if (!AMDGPU::hasVOPD(STI: *ST) || !ST->isWave32())
133 return false;
134 LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
135
136 const SIInstrInfo *SII = ST->getInstrInfo();
137 bool Changed = false;
138 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST: *ST);
139 bool HasVOPD3 = ST->hasVOPD3();
140
141 SmallVector<VOPDCombineInfo> ReplaceCandidates;
142
143 for (auto &MBB : MF) {
144 auto MII = MBB.begin(), E = MBB.end();
145 while (MII != E) {
146 auto *FirstMI = &*MII;
147 MII = next_nodbg(It: MII, End: MBB.end());
148 if (MII == MBB.end())
149 break;
150 if (FirstMI->isDebugInstr())
151 continue;
152 auto *SecondMI = &*MII;
153 unsigned Opc = FirstMI->getOpcode();
154 unsigned Opc2 = SecondMI->getOpcode();
155 VOPDCombineInfo CI;
156
157 const auto checkVOPD = [&](bool VOPD3) -> bool {
158 llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD =
159 AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
160 llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD =
161 AMDGPU::getCanBeVOPD(Opc: Opc2, EncodingFamily, VOPD3);
162
163 if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
164 CI = VOPDCombineInfo(FirstMI, SecondMI, VOPD3);
165 else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
166 CI = VOPDCombineInfo(SecondMI, FirstMI, VOPD3);
167 else
168 return false;
169 // checkVOPDRegConstraints cares about program order, but doReplace
170 // cares about X-Y order in the constituted VOPD
171 return llvm::checkVOPDRegConstraints(TII: *SII, FirstMI: *FirstMI, SecondMI: *SecondMI,
172 IsVOPD3: VOPD3);
173 };
174
175 if (checkVOPD(false) || (HasVOPD3 && checkVOPD(true))) {
176 ReplaceCandidates.push_back(Elt: CI);
177 ++MII;
178 }
179 }
180 }
181 for (auto &CI : ReplaceCandidates) {
182 Changed |= doReplace(SII, CI);
183 }
184
185 return Changed;
186 }
187};
188
189class GCNCreateVOPDLegacy : public MachineFunctionPass {
190public:
191 static char ID;
192 GCNCreateVOPDLegacy() : MachineFunctionPass(ID) {}
193
194 void getAnalysisUsage(AnalysisUsage &AU) const override {
195 AU.setPreservesCFG();
196 MachineFunctionPass::getAnalysisUsage(AU);
197 }
198
199 StringRef getPassName() const override {
200 return "GCN Create VOPD Instructions";
201 }
202 bool runOnMachineFunction(MachineFunction &MF) override {
203 if (skipFunction(F: MF.getFunction()))
204 return false;
205
206 return GCNCreateVOPD().run(MF);
207 }
208};
209
210} // namespace
211
212PreservedAnalyses
213llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
214 MachineFunctionAnalysisManager &AM) {
215 if (!GCNCreateVOPD().run(MF))
216 return PreservedAnalyses::all();
217 return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
218}
219
220char GCNCreateVOPDLegacy::ID = 0;
221
222char &llvm::GCNCreateVOPDID = GCNCreateVOPDLegacy::ID;
223
224INITIALIZE_PASS(GCNCreateVOPDLegacy, DEBUG_TYPE, "GCN Create VOPD Instructions",
225 false, false)
226

source code of llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp