Skip to content

Commit 107aa6a

Browse files
authored
[X86] Combine bitcast(v1Ty insert_vector_elt(X, Y, 0)) to Y (llvm#130475)
Though it only happens in v1i1 when we generate llvm.masked.load/store intrinsics for APX cload/cstore. https://godbolt.org/z/vjsrofsqx
1 parent 2a3e782 commit 107aa6a

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -45486,6 +45486,11 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
4548645486
if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget))
4548745487
return V;
4548845488

45489+
// bitcast(v1Ty insert_vector_elt(X, Y, 0)) --> Y
45490+
if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT && SrcVT.getScalarType() == VT &&
45491+
SrcVT.getVectorNumElements() == 1)
45492+
return N0.getOperand(1);
45493+
4548945494
// Convert a bitcasted integer logic operation that has one bitcasted
4549045495
// floating-point operand into a floating-point logic operation. This may
4549145496
// create a load of a constant, but that is cheaper than materializing the

llvm/test/CodeGen/X86/apx/cf.ll

+15
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,18 @@ entry:
124124
call void @llvm.masked.store.v4i64.p0(<4 x i64> %0, ptr %p, i32 8, <4 x i1> %cond2)
125125
ret void
126126
}
127+
128+
define void @single_cmp(i32 %a, i32 %b, ptr %c, ptr %d) #2 {
129+
; CHECK-LABEL: single_cmp:
130+
; CHECK: # %bb.0: # %entry
131+
; CHECK-NEXT: cmpl %esi, %edi
132+
; CHECK-NEXT: cfcmovnew (%rdx), %ax
133+
; CHECK-NEXT: cfcmovnew %ax, (%rcx)
134+
; CHECK-NEXT: retq
135+
entry:
136+
%0 = icmp ne i32 %a, %b
137+
%1 = insertelement <1 x i1> poison, i1 %0, i64 0
138+
%2 = tail call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %c, i32 2, <1 x i1> %1, <1 x i16> poison)
139+
tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %2, ptr %d, i32 2, <1 x i1> %1)
140+
ret void
141+
}

0 commit comments

Comments
 (0)