Skip to content

Commit 412d59f

Browse files
committed
[DAG] combineShiftToMULH - handle zext nneg as sext
Fixes poor codegen on AVX512 targets for a test case from #109790
1 parent bdd3559 commit 412d59f

File tree

2 files changed

+13
-35
lines changed

2 files changed

+13
-35
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -10287,8 +10287,10 @@ static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
1028710287
SDValue LeftOp = ShiftOperand.getOperand(0);
1028810288
SDValue RightOp = ShiftOperand.getOperand(1);
1028910289

10290-
bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10291-
bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10290+
// Treat zext nneg as sext - we might need to support handling these as zext
10291+
// as well in the future, but for now just prefer sext.
10292+
bool IsSignExt = sd_match(LeftOp, m_SExtLike(m_Value()));
10293+
bool IsZeroExt = sd_match(LeftOp, m_ZExt(m_Value()));
1029210294

1029310295
if (!IsSignExt && !IsZeroExt)
1029410296
return SDValue();

llvm/test/CodeGen/X86/pmulh.ll

+9-33
Original file line numberDiff line numberDiff line change
@@ -953,39 +953,15 @@ define void @PR109790(ptr sret([32 x i8]) %ret, ptr %a) {
953953
; SSE-NEXT: movdqa %xmm0, 16(%rdi)
954954
; SSE-NEXT: retq
955955
;
956-
; AVX2-LABEL: PR109790:
957-
; AVX2: # %bb.0:
958-
; AVX2-NEXT: movq %rdi, %rax
959-
; AVX2-NEXT: vmovdqa (%rsi), %ymm0
960-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
961-
; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536]
962-
; AVX2-NEXT: vmovdqa %ymm0, (%rdi)
963-
; AVX2-NEXT: vzeroupper
964-
; AVX2-NEXT: retq
965-
;
966-
; AVX512F-LABEL: PR109790:
967-
; AVX512F: # %bb.0:
968-
; AVX512F-NEXT: movq %rdi, %rax
969-
; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
970-
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
971-
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
972-
; AVX512F-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
973-
; AVX512F-NEXT: vpsrld $16, %zmm0, %zmm0
974-
; AVX512F-NEXT: vpmovdw %zmm0, (%rdi)
975-
; AVX512F-NEXT: vzeroupper
976-
; AVX512F-NEXT: retq
977-
;
978-
; AVX512BW-LABEL: PR109790:
979-
; AVX512BW: # %bb.0:
980-
; AVX512BW-NEXT: movq %rdi, %rax
981-
; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0
982-
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
983-
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
984-
; AVX512BW-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0]
985-
; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0
986-
; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi)
987-
; AVX512BW-NEXT: vzeroupper
988-
; AVX512BW-NEXT: retq
956+
; AVX-LABEL: PR109790:
957+
; AVX: # %bb.0:
958+
; AVX-NEXT: movq %rdi, %rax
959+
; AVX-NEXT: vmovdqa (%rsi), %ymm0
960+
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
961+
; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536]
962+
; AVX-NEXT: vmovdqa %ymm0, (%rdi)
963+
; AVX-NEXT: vzeroupper
964+
; AVX-NEXT: retq
989965
%load = load <16 x i16>, ptr %a, align 32
990966
%and = and <16 x i16> %load, <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>
991967
%ext = zext nneg <16 x i16> %and to <16 x i32>

0 commit comments

Comments
 (0)