Skip to content

Commit 56757e5

Browse files
committed
[X86] combineAndLoadToBZHI - don't do an return early return if we fail to match a load
Just continue so we can test the commutated pattern as well.
1 parent 326a615 commit 56757e5

File tree

2 files changed

+17
-35
lines changed

2 files changed

+17
-35
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+11-21
Original file line numberDiff line numberDiff line change
@@ -50027,17 +50027,14 @@ static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld) {
5002750027
return SDValue();
5002850028

5002950029
SDValue Base = Ld->getBasePtr();
50030-
5003150030
if (Base.getOpcode() != ISD::ADD)
5003250031
return SDValue();
5003350032

5003450033
SDValue ShiftedIndex = Base.getOperand(0);
50035-
5003650034
if (ShiftedIndex.getOpcode() != ISD::SHL)
5003750035
return SDValue();
5003850036

5003950037
return ShiftedIndex.getOperand(0);
50040-
5004150038
}
5004250039

5004350040
static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
@@ -50066,22 +50063,21 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
5006650063

5006750064
// Try matching the pattern for both operands.
5006850065
for (unsigned i = 0; i < 2; i++) {
50069-
SDValue N = Node->getOperand(i);
50070-
LoadSDNode *Ld = dyn_cast<LoadSDNode>(N.getNode());
50071-
50072-
// continue if the operand is not a load instruction
50066+
// continue if the operand is not a load instruction
50067+
auto *Ld = dyn_cast<LoadSDNode>(Node->getOperand(i));
5007350068
if (!Ld)
50074-
return SDValue();
50075-
50069+
continue;
5007650070
const Value *MemOp = Ld->getMemOperand()->getValue();
50077-
5007850071
if (!MemOp)
50079-
return SDValue();
50072+
continue;
50073+
// Get the Node which indexes into the array.
50074+
SDValue Index = getIndexFromUnindexedLoad(Ld);
50075+
if (!Index)
50076+
continue;
5008050077

50081-
if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(MemOp)) {
50082-
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) {
50078+
if (auto *GEP = dyn_cast<GetElementPtrInst>(MemOp)) {
50079+
if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) {
5008350080
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
50084-
5008550081
Constant *Init = GV->getInitializer();
5008650082
Type *Ty = Init->getType();
5008750083
if (!isa<ConstantDataArray>(Init) ||
@@ -50109,21 +50105,15 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
5010950105
// -> (and (load arr[idx]), inp)
5011050106
// <- (and (srl 0xFFFFFFFF, (sub 32, idx)))
5011150107
// that will be replaced with one bzhi instruction.
50112-
SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0);
50108+
SDValue Inp = Node->getOperand(i == 0 ? 1 : 0);
5011350109
SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32);
5011450110

50115-
// Get the Node which indexes into the array.
50116-
SDValue Index = getIndexFromUnindexedLoad(Ld);
50117-
if (!Index)
50118-
return SDValue();
5011950111
Index = DAG.getZExtOrTrunc(Index, dl, MVT::i32);
50120-
5012150112
SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, SizeC, Index);
5012250113
Sub = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Sub);
5012350114

5012450115
SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
5012550116
SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub);
50126-
5012750117
return DAG.getNode(ISD::AND, dl, VT, Inp, LShr);
5012850118
}
5012950119
}

llvm/test/CodeGen/X86/replace-load-and-with-bzhi.ll

+6-14
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,13 @@ entry:
2929
define i32 @f32_bzhi_commute(i32 %x, i32 %y) local_unnamed_addr {
3030
; X64-LABEL: f32_bzhi_commute:
3131
; X64: # %bb.0: # %entry
32-
; X64-NEXT: movl %edi, %eax
33-
; X64-NEXT: movslq %esi, %rcx
34-
; X64-NEXT: andl fill_table32(,%rcx,4), %eax
32+
; X64-NEXT: bzhil %esi, %edi, %eax
3533
; X64-NEXT: retq
3634
;
3735
; X86-LABEL: f32_bzhi_commute:
3836
; X86: # %bb.0: # %entry
39-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
4037
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
41-
; X86-NEXT: andl fill_table32(,%ecx,4), %eax
38+
; X86-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
4239
; X86-NEXT: retl
4340
entry:
4441
%idxprom = sext i32 %y to i64
@@ -70,16 +67,13 @@ entry:
7067
define i32 @f32_bzhi_partial_commute(i32 %x, i32 %y) local_unnamed_addr {
7168
; X64-LABEL: f32_bzhi_partial_commute:
7269
; X64: # %bb.0: # %entry
73-
; X64-NEXT: movl %edi, %eax
74-
; X64-NEXT: movslq %esi, %rcx
75-
; X64-NEXT: andl fill_table32_partial(,%rcx,4), %eax
70+
; X64-NEXT: bzhil %esi, %edi, %eax
7671
; X64-NEXT: retq
7772
;
7873
; X86-LABEL: f32_bzhi_partial_commute:
7974
; X86: # %bb.0: # %entry
80-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
8175
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
82-
; X86-NEXT: andl fill_table32_partial(,%ecx,4), %eax
76+
; X86-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
8377
; X86-NEXT: retl
8478
entry:
8579
%idxprom = sext i32 %y to i64
@@ -113,8 +107,7 @@ entry:
113107
define i64 @f64_bzhi_commute(i64 %x, i64 %y) local_unnamed_addr {
114108
; X64-LABEL: f64_bzhi_commute:
115109
; X64: # %bb.0: # %entry
116-
; X64-NEXT: movq %rdi, %rax
117-
; X64-NEXT: andq fill_table64(,%rsi,8), %rax
110+
; X64-NEXT: bzhiq %rsi, %rdi, %rax
118111
; X64-NEXT: retq
119112
;
120113
; X86-LABEL: f64_bzhi_commute:
@@ -156,8 +149,7 @@ entry:
156149
define i64 @f64_bzhi_partial_commute(i64 %x, i64 %y) local_unnamed_addr {
157150
; X64-LABEL: f64_bzhi_partial_commute:
158151
; X64: # %bb.0: # %entry
159-
; X64-NEXT: movq %rdi, %rax
160-
; X64-NEXT: andq fill_table64_partial(,%rsi,8), %rax
152+
; X64-NEXT: bzhiq %rsi, %rdi, %rax
161153
; X64-NEXT: retq
162154
;
163155
; X86-LABEL: f64_bzhi_partial_commute:

0 commit comments

Comments
 (0)