diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1992ef67164d8..be45a678bbcfe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45486,6 +45486,11 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget)) return V; + // bitcast(v1Ty insert_vector_elt(X, Y, 0)) --> Y + if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT && SrcVT.getScalarType() == VT && + SrcVT.getVectorNumElements() == 1) + return N0.getOperand(1); + // Convert a bitcasted integer logic operation that has one bitcasted // floating-point operand into a floating-point logic operation. This may // create a load of a constant, but that is cheaper than materializing the diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index c71d7768834f3..a64d7df11a4d0 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -124,3 +124,18 @@ entry: call void @llvm.masked.store.v4i64.p0(<4 x i64> %0, ptr %p, i32 8, <4 x i1> %cond2) ret void } + +define void @single_cmp(i32 %a, i32 %b, ptr %c, ptr %d) #2 { +; CHECK-LABEL: single_cmp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: cfcmovnew (%rdx), %ax +; CHECK-NEXT: cfcmovnew %ax, (%rcx) +; CHECK-NEXT: retq +entry: + %0 = icmp ne i32 %a, %b + %1 = insertelement <1 x i1> poison, i1 %0, i64 0 + %2 = tail call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %c, i32 2, <1 x i1> %1, <1 x i16> poison) + tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %2, ptr %d, i32 2, <1 x i1> %1) + ret void +}