Skip to content

Commit 1f930cf

Browse files
committed
[X86] Fold not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2) (REAPPLIED)
Reapply b9483d3 with fix (typo - wasn't ensuring icmp vs zero) Fixes #78888
1 parent 9a8437f commit 1f930cf

File tree

2 files changed

+36
-26
lines changed

2 files changed

+36
-26
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49336,6 +49336,27 @@ static SDValue combineOrXorWithSETCC(SDNode *N, SDValue N0, SDValue N1,
4933649336
}
4933749337
}
4933849338

49339+
// not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2)
49340+
if (N->getOpcode() == ISD::XOR && N0.getOpcode() == X86ISD::PCMPEQ &&
49341+
N0.getOperand(0).getOpcode() == ISD::AND &&
49342+
ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()) &&
49343+
ISD::isBuildVectorAllOnes(N1.getNode())) {
49344+
MVT VT = N->getSimpleValueType(0);
49345+
APInt UndefElts;
49346+
SmallVector<APInt> EltBits;
49347+
if (getTargetConstantBitsFromNode(N0.getOperand(0).getOperand(1),
49348+
VT.getScalarSizeInBits(), UndefElts,
49349+
EltBits)) {
49350+
bool IsPow2OrUndef = true;
49351+
for (unsigned I = 0, E = EltBits.size(); I != E; ++I)
49352+
IsPow2OrUndef &= UndefElts[I] || EltBits[I].isPowerOf2();
49353+
49354+
if (IsPow2OrUndef)
49355+
return DAG.getNode(X86ISD::PCMPEQ, SDLoc(N), VT, N0.getOperand(0),
49356+
N0.getOperand(0).getOperand(1));
49357+
}
49358+
}
49359+
4933949360
return SDValue();
4934049361
}
4934149362

llvm/test/CodeGen/X86/icmp-pow2-mask.ll

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,35 +11,29 @@ define <8 x i16> @pow2_mask_v16i8(i8 zeroext %0) {
1111
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1212
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1313
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
14-
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15-
; SSE2-NEXT: pxor %xmm1, %xmm1
16-
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
17-
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
18-
; SSE2-NEXT: pxor %xmm1, %xmm0
14+
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [128,128,64,64,32,32,16,16,8,8,4,4,2,2,1,1]
15+
; SSE2-NEXT: pand %xmm1, %xmm0
16+
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
1917
; SSE2-NEXT: retq
2018
;
2119
; SSE41-LABEL: pow2_mask_v16i8:
2220
; SSE41: # %bb.0:
2321
; SSE41-NEXT: movd %edi, %xmm0
2422
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2523
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
26-
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27-
; SSE41-NEXT: pxor %xmm1, %xmm1
28-
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
29-
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
30-
; SSE41-NEXT: pxor %xmm1, %xmm0
24+
; SSE41-NEXT: movq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0]
25+
; SSE41-NEXT: pand %xmm1, %xmm0
26+
; SSE41-NEXT: pcmpeqb %xmm1, %xmm0
3127
; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
3228
; SSE41-NEXT: retq
3329
;
3430
; AVX2-LABEL: pow2_mask_v16i8:
3531
; AVX2: # %bb.0:
3632
; AVX2-NEXT: vmovd %edi, %xmm0
3733
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
38-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
39-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34+
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
35+
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4036
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
41-
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
42-
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
4337
; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
4438
; AVX2-NEXT: retq
4539
;
@@ -103,33 +97,28 @@ define i64 @pow2_mask_v8i8(i8 zeroext %0) {
10397
; SSE-NEXT: movd %edi, %xmm0
10498
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
10599
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
106-
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
107-
; SSE-NEXT: pxor %xmm1, %xmm1
108-
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
109-
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
110-
; SSE-NEXT: pxor %xmm1, %xmm0
100+
; SSE-NEXT: movq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,0,0,0,0,0,0,0,0]
101+
; SSE-NEXT: pand %xmm1, %xmm0
102+
; SSE-NEXT: pcmpeqb %xmm1, %xmm0
111103
; SSE-NEXT: movq %xmm0, %rax
112104
; SSE-NEXT: retq
113105
;
114106
; AVX2-LABEL: pow2_mask_v8i8:
115107
; AVX2: # %bb.0:
116108
; AVX2-NEXT: vmovd %edi, %xmm0
117109
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
118-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
119-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
110+
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
111+
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
120112
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
121-
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
122-
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
123113
; AVX2-NEXT: vmovq %xmm0, %rax
124114
; AVX2-NEXT: retq
125115
;
126116
; AVX512-LABEL: pow2_mask_v8i8:
127117
; AVX512: # %bb.0:
128118
; AVX512-NEXT: vpbroadcastb %edi, %xmm0
129-
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
130-
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
119+
; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1]
120+
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
131121
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
132-
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
133122
; AVX512-NEXT: vmovq %xmm0, %rax
134123
; AVX512-NEXT: retq
135124
%vec = insertelement <1 x i8> poison, i8 %0, i64 0

0 commit comments

Comments
 (0)