Skip to content

Commit d398f3a

Browse files
author
Abhinav Garg
committed
Address review comments: Scalarize v2s16 for uniform operation
1 parent 2529613 commit d398f3a

File tree

8 files changed

+2672
-259
lines changed

8 files changed

+2672
-259
lines changed

llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
9999
"Expected inlined-at fields to agree");
100100
auto MIB = buildInstrNoInsert(TargetOpcode::DBG_VALUE);
101101

102-
auto *NumericConstant = [&] () -> const Constant* {
102+
auto *NumericConstant = [&]() -> const Constant * {
103103
if (const auto *CE = dyn_cast<ConstantExpr>(&C))
104104
if (CE->getOpcode() == Instruction::IntToPtr)
105105
return CE->getOperand(0);
@@ -203,7 +203,8 @@ MachineIRBuilder::buildPtrAdd(const DstOp &Res, const SrcOp &Op0,
203203
const SrcOp &Op1, std::optional<unsigned> Flags) {
204204
assert(Res.getLLTTy(*getMRI()).isPointerOrPointerVector() &&
205205
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
206-
assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() && "invalid offset type");
206+
assert(Op1.getLLTTy(*getMRI()).getScalarType().isScalar() &&
207+
"invalid offset type");
207208

208209
return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1}, Flags);
209210
}
@@ -314,8 +315,7 @@ MachineInstrBuilder MachineIRBuilder::buildBrIndirect(Register Tgt) {
314315
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
315316
}
316317

317-
MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr,
318-
unsigned JTI,
318+
MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr, unsigned JTI,
319319
Register IndexReg) {
320320
assert(getMRI()->getType(TablePtr).isPointer() &&
321321
"Table reg must be a pointer");
@@ -343,8 +343,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
343343

344344
if (Ty.isFixedVector()) {
345345
auto Const = buildInstr(TargetOpcode::G_CONSTANT)
346-
.addDef(getMRI()->createGenericVirtualRegister(EltTy))
347-
.addCImm(&Val);
346+
.addDef(getMRI()->createGenericVirtualRegister(EltTy))
347+
.addCImm(&Val);
348348
return buildSplatBuildVector(Res, Const);
349349
}
350350

@@ -369,8 +369,8 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
369369
LLT Ty = Res.getLLTTy(*getMRI());
370370
LLT EltTy = Ty.getScalarType();
371371

372-
assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics())
373-
== EltTy.getSizeInBits() &&
372+
assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics()) ==
373+
EltTy.getSizeInBits() &&
374374
"creating fconstant with the wrong size");
375375

376376
assert(!Ty.isPointer() && "invalid operand type");
@@ -380,8 +380,8 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
380380

381381
if (Ty.isFixedVector()) {
382382
auto Const = buildInstr(TargetOpcode::G_FCONSTANT)
383-
.addDef(getMRI()->createGenericVirtualRegister(EltTy))
384-
.addFPImm(&Val);
383+
.addDef(getMRI()->createGenericVirtualRegister(EltTy))
384+
.addFPImm(&Val);
385385

386386
return buildSplatBuildVector(Res, Const);
387387
}
@@ -403,8 +403,8 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
403403
double Val) {
404404
LLT DstTy = Res.getLLTTy(*getMRI());
405405
auto &Ctx = getMF().getFunction().getContext();
406-
auto *CFP =
407-
ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
406+
auto *CFP = ConstantFP::get(
407+
Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
408408
return buildFConstant(Res, *CFP);
409409
}
410410

@@ -466,9 +466,10 @@ MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
466466
return MIB;
467467
}
468468

469-
MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset(
470-
const DstOp &Dst, const SrcOp &BasePtr,
471-
MachineMemOperand &BaseMMO, int64_t Offset) {
469+
MachineInstrBuilder
470+
MachineIRBuilder::buildLoadFromOffset(const DstOp &Dst, const SrcOp &BasePtr,
471+
MachineMemOperand &BaseMMO,
472+
int64_t Offset) {
472473
LLT LoadTy = Dst.getLLTTy(*getMRI());
473474
MachineMemOperand *OffsetMMO =
474475
getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy);
@@ -539,9 +540,9 @@ unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const {
539540
}
540541

541542
MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res,
542-
const SrcOp &Op,
543-
bool IsFP) {
544-
unsigned ExtOp = getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
543+
const SrcOp &Op, bool IsFP) {
544+
unsigned ExtOp =
545+
getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
545546
return buildInstr(ExtOp, Res, Op);
546547
}
547548

@@ -709,9 +710,9 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
709710
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
710711
}
711712

712-
MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
713-
const SrcOp &Op) {
714-
unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
713+
MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res, const SrcOp &Op) {
714+
unsigned NumReg =
715+
Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
715716
SmallVector<DstOp, 8> TmpVec(NumReg, Res);
716717
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
717718
}
@@ -1053,10 +1054,11 @@ MachineIRBuilder::buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr,
10531054
return MIB;
10541055
}
10551056

1056-
MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(
1057-
unsigned Opcode, const DstOp &OldValRes,
1058-
const SrcOp &Addr, const SrcOp &Val,
1059-
MachineMemOperand &MMO) {
1057+
MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
1058+
const DstOp &OldValRes,
1059+
const SrcOp &Addr,
1060+
const SrcOp &Val,
1061+
MachineMemOperand &MMO) {
10601062

10611063
#ifndef NDEBUG
10621064
LLT OldValResTy = OldValRes.getLLTTy(*getMRI());
@@ -1145,16 +1147,15 @@ MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr,
11451147
}
11461148

11471149
MachineInstrBuilder
1148-
MachineIRBuilder::buildAtomicRMWFAdd(
1149-
const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
1150-
MachineMemOperand &MMO) {
1150+
MachineIRBuilder::buildAtomicRMWFAdd(const DstOp &OldValRes, const SrcOp &Addr,
1151+
const SrcOp &Val, MachineMemOperand &MMO) {
11511152
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val,
11521153
MMO);
11531154
}
11541155

11551156
MachineInstrBuilder
1156-
MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
1157-
MachineMemOperand &MMO) {
1157+
MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr,
1158+
const SrcOp &Val, MachineMemOperand &MMO) {
11581159
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val,
11591160
MMO);
11601161
}
@@ -1189,11 +1190,9 @@ MachineIRBuilder::buildAtomicRMWFMinimum(const DstOp &OldValRes,
11891190
Val, MMO);
11901191
}
11911192

1192-
MachineInstrBuilder
1193-
MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
1194-
return buildInstr(TargetOpcode::G_FENCE)
1195-
.addImm(Ordering)
1196-
.addImm(Scope);
1193+
MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering,
1194+
unsigned Scope) {
1195+
return buildInstr(TargetOpcode::G_FENCE).addImm(Ordering).addImm(Scope);
11971196
}
11981197

11991198
MachineInstrBuilder MachineIRBuilder::buildPrefetch(const SrcOp &Addr,
@@ -1276,6 +1275,7 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
12761275
SrcOps[0].getLLTTy(*getMRI()));
12771276
break;
12781277
case TargetOpcode::G_ADD:
1278+
case TargetOpcode::G_FADD:
12791279
case TargetOpcode::G_AND:
12801280
case TargetOpcode::G_MUL:
12811281
case TargetOpcode::G_OR:
@@ -1333,7 +1333,8 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
13331333
assert(DstOps.size() == 1 && "Invalid Dst");
13341334
assert(SrcOps.size() == 1 && "Invalid Srcs");
13351335
assert(DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
1336-
SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && "invalid bitcast");
1336+
SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
1337+
"invalid bitcast");
13371338
break;
13381339
}
13391340
case TargetOpcode::COPY:

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,24 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
616616
MI.eraseFromParent();
617617
}
618618

619+
void RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
620+
Register Dst = MI.getOperand(0).getReg();
621+
LLT DstTy = MRI.getType(Dst);
622+
assert(DstTy == V2S16);
623+
auto [Val0_Lo_32, Val0_Hi_32] = unpackAExt(MI.getOperand(1).getReg());
624+
auto [Val1_Lo_32, Val1_Hi_32] = unpackAExt(MI.getOperand(2).getReg());
625+
unsigned Opc = MI.getOpcode();
626+
auto Flags = MI.getFlags();
627+
auto Val0_Lo = B.buildTrunc(SgprRB_S16, Val0_Lo_32);
628+
auto Val0_Hi = B.buildTrunc(SgprRB_S16, Val0_Hi_32);
629+
auto Val1_Lo = B.buildTrunc(SgprRB_S16, Val1_Lo_32);
630+
auto Val1_Hi = B.buildTrunc(SgprRB_S16, Val1_Hi_32);
631+
auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Val0_Lo, Val1_Lo}, Flags);
632+
auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Val0_Hi, Val1_Hi}, Flags);
633+
B.buildMergeLikeInstr(Dst, {Lo.getReg(0), Hi.getReg(0)});
634+
MI.eraseFromParent();
635+
}
636+
619637
void RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
620638
Register Dst = MI.getOperand(0).getReg();
621639
LLT DstTy = MRI.getType(Dst);
@@ -688,6 +706,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
688706
return lowerUnpackBitShift(MI);
689707
case UnpackMinMax:
690708
return lowerUnpackMinMax(MI);
709+
case ScalarizeToS16:
710+
return lowerSplitTo16(MI);
691711
case Ext32To64: {
692712
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
693713
MachineInstrBuilder Hi;

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class RegBankLegalizeHelper {
7272
static constexpr LLT P6 = LLT::pointer(6, 32);
7373

7474
MachineRegisterInfo::VRegAttrs SgprRB_S32 = {SgprRB, S32};
75+
MachineRegisterInfo::VRegAttrs SgprRB_S16 = {SgprRB, S16};
7576
MachineRegisterInfo::VRegAttrs VgprRB_S32 = {VgprRB, S32};
7677
MachineRegisterInfo::VRegAttrs VccRB_S1 = {VccRB, S1};
7778

@@ -121,6 +122,7 @@ class RegBankLegalizeHelper {
121122
void lowerV_BFE(MachineInstr &MI);
122123
void lowerS_BFE(MachineInstr &MI);
123124
void lowerSplitTo32(MachineInstr &MI);
125+
void lowerSplitTo16(MachineInstr &MI);
124126
void lowerSplitTo32Select(MachineInstr &MI);
125127
void lowerSplitTo32SExtInReg(MachineInstr &MI);
126128
void lowerUnpackMinMax(MachineInstr &MI);

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
912912
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
913913
.Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
914914
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
915-
.Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
915+
.Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
916+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
917+
hasSALUFloat)
916918
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
917919
.Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
918920
.Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}});

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ enum LoweringMethodID {
221221
V_BFE,
222222
VgprToVccCopy,
223223
SplitTo32,
224+
ScalarizeToS16,
224225
SplitTo32Select,
225226
SplitTo32SExtInReg,
226227
Ext32To64,

0 commit comments

Comments
 (0)