diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index da3c834417d6b..02b79c67af3ee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16012,6 +16012,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) return CombineLD; + // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x))) + // => int_vt (any_extend elt_vt:x) + if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) { + SDValue SrcScalar = N0.getOperand(0); + if (SrcScalar.getValueType().isScalarInteger()) + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar); + } + // Remove double bitcasts from shuffles - this is often a legacy of // XformToShuffleWithZero being used to combine bitmaskings (of // float vectors bitcast to integer vectors) into shuffles. diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll index 949e6f38e9b42..e14666cdac5c2 100644 --- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll +++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll @@ -332,3 +332,33 @@ define amdgpu_kernel void @scalar_to_vector_test6(ptr addrspace(1) %out, i8 zero store <2 x half> %bc, ptr addrspace(1) %out ret void } + +; bitcast (scalar_to_vector x) -> any_extend x +define i64 @bitcast_combine_scalar_to_vector_v4i16(i16 %arg) { +; SI-LABEL: bitcast_combine_scalar_to_vector_v4i16: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; SI-NEXT: v_and_b32_e32 v2, 0xff00, v0 +; SI-NEXT: v_bfe_u32 v0, v0, 8, 8 +; SI-NEXT: v_or_b32_e32 v2, v0, v2 +; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v2 +; SI-NEXT: v_or_b32_e32 v0, v1, v3 +; SI-NEXT: v_or_b32_e32 v1, v2, v3 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; GFX89-LABEL: bitcast_combine_scalar_to_vector_v4i16: +; GFX89: ; %bb.0: +; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-NEXT: v_and_b32_e32 v1, 0xffffff00, v0 +; GFX89-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX89-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX89-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX89-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX89-NEXT: s_setpc_b64 s[30:31] + %arg.cast = bitcast i16 %arg to <2 x i8> + %tmp1 = shufflevector <2 x i8> %arg.cast, <2 x i8> poison, <8 x i32> + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> + %cast = bitcast <8 x i8> %tmp2 to i64 + ret i64 %cast +}