diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2399936ffd827..becd03e619d32 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58010,6 +58010,23 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; + // Canonicalize hidden LEA pattern: + // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) + // iff c < 4 + if (VT == MVT::i32 || VT == MVT::i64) { + SDValue Y, Z, Shift; + APInt Amt; + if (sd_match( + N, m_Add(m_OneUse(m_Sub(m_AllOf(m_Value(Shift), + m_Shl(m_Value(), m_ConstInt(Amt))), + m_Value(Y))), + m_Value(Z))) && + Amt.ult(4) && !isa(Z)) { + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::ADD, DL, VT, Shift, Z), Y); + } + } + // add(psadbw(X,0),psadbw(Y,0)) -> psadbw(add(X,Y),0) // iff X and Y won't overflow. if (Op0.getOpcode() == X86ISD::PSADBW && Op1.getOpcode() == X86ISD::PSADBW && diff --git a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll index 522b42e07c6e0..beea6d36fe874 100644 --- a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll +++ b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll @@ -104,16 +104,16 @@ define i32 @PR55714_i32(i32 %n, i32 %q) { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: leal (,%ecx,8), %eax -; X86-NEXT: subl %ecx, %eax ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: PR55714_i32: ; X64: # %bb.0: ; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: leal (,%rsi,8), %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rsi,8), %eax ; X64-NEXT: subl %esi, %eax -; X64-NEXT: addl %edi, %eax ; X64-NEXT: retq %mul = mul i32 %q, 7 %add = add i32 %mul, %n @@ -123,21 +123,19 @@ define i32 @PR55714_i32(i32 %n, i32 %q) { define i64 @PR55714_i64(i64 %n, i64 %q) { ; X86-LABEL: PR55714_i64: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (,%eax,8), %ecx -; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl $7, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ecx, %edx +; X86-NEXT: leal (%edx,%ecx,8), %edx +; X86-NEXT: subl %ecx, %edx ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl ; ; X64-LABEL: PR55714_i64: ; X64: # %bb.0: -; X64-NEXT: leaq (,%rsi,8), %rax +; X64-NEXT: leaq (%rdi,%rsi,8), %rax ; X64-NEXT: subq %rsi, %rax -; X64-NEXT: addq %rdi, %rax ; X64-NEXT: retq %mul = mul i64 %q, 7 %add = add i64 %mul, %n diff --git a/llvm/test/CodeGen/X86/apx/reloc-opt.ll b/llvm/test/CodeGen/X86/apx/reloc-opt.ll index a5ab94b00d64b..ecc3d3297ceab 100644 --- a/llvm/test/CodeGen/X86/apx/reloc-opt.ll +++ b/llvm/test/CodeGen/X86/apx/reloc-opt.ll @@ -13,11 +13,9 @@ ; CHECK-LABEL: test_regclass_not_updated_by_regalloc_1 -; APXREL: movq (%rip), %r16 -; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4 -; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4 -; NOAPXREL: movq (%rip), %rdi -; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4 +; CHECK-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4 +; CHECK: movq (%rip), %rdi +; CHECK-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4 @gvar = external global [20000 x i8] diff --git a/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll b/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll index 345014edd0e9d..5e94598565aa9 100644 --- a/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll +++ b/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll @@ -7,7 +7,6 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind { ; SSE2-LABEL: dot_ext_v8i8_v8i32: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pushq %r14 ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: movzbl (%rdi), %eax ; SSE2-NEXT: movzbl (%rdi,%rsi), %ecx @@ -18,9 +17,9 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind { ; SSE2-NEXT: leaq (%rsi,%rsi,4), %rbx ; SSE2-NEXT: movzbl (%rdi,%rbx), %ebx ; SSE2-NEXT: movzbl (%rdi,%r9,2), %r9d -; SSE2-NEXT: leaq (,%rsi,8), %r14 -; SSE2-NEXT: subq %rsi, %r14 -; SSE2-NEXT: movzbl (%rdi,%r14), %esi +; SSE2-NEXT: leaq (%rdi,%rsi,8), %rdi +; SSE2-NEXT: subq %rsi, %rdi +; SSE2-NEXT: movzbl (%rdi), %esi ; SSE2-NEXT: shll $16, %ecx ; SSE2-NEXT: orl %eax, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 @@ -38,7 +37,6 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind { ; SSE2-NEXT: paddd %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: popq %rbx -; SSE2-NEXT: popq %r14 ; SSE2-NEXT: retq ; ; SSE4-LABEL: dot_ext_v8i8_v8i32: @@ -46,7 +44,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind { ; SSE4-NEXT: movzbl (%rdi), %eax ; SSE4-NEXT: leaq (%rsi,%rsi,4), %rcx ; SSE4-NEXT: leaq (%rsi,%rsi,2), %r8 -; SSE4-NEXT: leaq (,%rsi,8), %r9 +; SSE4-NEXT: leaq (%rdi,%rsi,8), %r9 ; SSE4-NEXT: subq %rsi, %r9 ; SSE4-NEXT: movd %eax, %xmm0 ; SSE4-NEXT: pinsrb $2, (%rdi,%rsi), %xmm0 @@ -55,7 +53,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind { ; SSE4-NEXT: pinsrb $8, (%rdi,%rsi,4), %xmm0 ; SSE4-NEXT: pinsrb $10, (%rdi,%rcx), %xmm0 ; SSE4-NEXT: pinsrb $12, (%rdi,%r8,2), %xmm0 -; SSE4-NEXT: pinsrb $14, (%rdi,%r9), %xmm0 +; SSE4-NEXT: pinsrb $14, (%r9), %xmm0 ; SSE4-NEXT: movdqu (%rdx), %xmm1 ; SSE4-NEXT: pmaddwd %xmm0, %xmm1 ; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] @@ -70,7 +68,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind { ; AVX-NEXT: movzbl (%rdi), %eax ; AVX-NEXT: leaq (%rsi,%rsi,2), %rcx ; AVX-NEXT: leaq (%rsi,%rsi,4), %r8 -; AVX-NEXT: leaq (,%rsi,8), %r9 +; AVX-NEXT: leaq (%rdi,%rsi,8), %r9 ; AVX-NEXT: subq %rsi, %r9 ; AVX-NEXT: vmovd %eax, %xmm0 ; AVX-NEXT: vpinsrb $2, (%rdi,%rsi), %xmm0, %xmm0 @@ -79,7 +77,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind { ; AVX-NEXT: vpinsrb $8, (%rdi,%rsi,4), %xmm0, %xmm0 ; AVX-NEXT: vpinsrb $10, (%rdi,%r8), %xmm0, %xmm0 ; AVX-NEXT: vpinsrb $12, (%rdi,%rcx,2), %xmm0, %xmm0 -; AVX-NEXT: vpinsrb $14, (%rdi,%r9), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $14, (%r9), %xmm0, %xmm0 ; AVX-NEXT: vpmaddwd (%rdx), %xmm0, %xmm0 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll index 03dd5351c78ac..40d591f8d1be8 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i64.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll @@ -166,12 +166,11 @@ define i64 @test_mul_by_6(i64 %x) { define i64 @test_mul_by_7(i64 %x) { ; X86-LABEL: test_mul_by_7: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (,%eax,8), %ecx -; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl $7, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ecx, %edx +; X86-NEXT: leal (%edx,%ecx,8), %edx +; X86-NEXT: subl %ecx, %edx ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_7: @@ -733,16 +732,17 @@ define i64 @test_mul_by_22(i64 %x) { ret i64 %mul } -define i64 @test_mul_by_23(i64 %x) { +define i64 @test_mul_by_23(i64 %x) nounwind { ; X86-LABEL: test_mul_by_23: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,2), %ecx -; X86-NEXT: shll $3, %ecx -; X86-NEXT: subl %eax, %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %esi ; X86-NEXT: movl $23, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ecx, %edx +; X86-NEXT: leal (%edx,%esi,8), %edx +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X86-NOOPT-LABEL: test_mul_by_23: