diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td index de09177d1dc06..cf9f50c2784bb 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td +++ b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td @@ -22,7 +22,7 @@ def Ampere1Model : SchedMachineModel { let LoadLatency = 4; // Optimistic load latency let MispredictPenalty = 10; // Branch mispredict penalty let LoopMicroOpBufferSize = 32; // Instruction queue size - let CompleteModel = 1; + let CompleteModel = 0; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, SMEUnsupported.F, @@ -936,9 +936,13 @@ def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; def : InstRW<[Ampere1Write_1cyc_1A], (instregex "ADC(W|X)r", "SBC(W|X)r")>; def : InstRW<[Ampere1Write_Arith], - (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r")>; + (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r[sx]")>; +def : InstRW<[Ampere1Write_1cyc_1AB], + (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r[ri]")>; def : InstRW<[Ampere1Write_ArithFlagsetting], - (instregex "(ADD|AND|BIC|SUB)S(W|X)r")>; + (instregex "(ADD|AND|BIC|SUB)S(W|X)r[sx]")>; +def : InstRW<[Ampere1Write_1cyc_1A], + (instregex "(ADD|AND|BIC|SUB)S(W|X)r[ri]")>; def : InstRW<[Ampere1Write_1cyc_1A], (instregex "(ADC|SBC)S(W|X)r")>; def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>; diff --git a/llvm/test/CodeGen/AArch64/ampere1-sched-add.mir b/llvm/test/CodeGen/AArch64/ampere1-sched-add.mir new file mode 100644 index 0000000000000..e578b5d7f04f3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ampere1-sched-add.mir @@ -0,0 +1,55 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -run-pass=machine-scheduler %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + define i32 @test_add(ptr %0) #0 { + %2 = ptrtoint ptr %0 to i64 + %3 = and i64 %2, -64 + %4 = inttoptr i64 %3 to ptr + %5 = load i32, ptr %4, align 64 + %6 = getelementptr inbounds i32, ptr %4, i64 1 + %7 = load i32, ptr %6, align 4 + %8 = add nsw i32 %7, %5 + ret i32 %8 + } + + attributes #0 = { "target-cpu"="ampere1" } + +... +--- +name: test_add +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64, preferred-register: '' } + - { id: 1, class: gpr64sp, preferred-register: '' } + - { id: 2, class: gpr32, preferred-register: '' } + - { id: 3, class: gpr32, preferred-register: '' } + - { id: 4, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } +body: | + bb.0 (%ir-block.1): + liveins: $x0 + + ; CHECK-LABEL: name: test_add + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[COPY]], 7865 + ; CHECK-NEXT: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[ANDXri]], 0 :: (load (s32) from %ir.4, align 64) + ; CHECK-NEXT: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[ANDXri]], 1 :: (load (s32) from %ir.6) + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = nsw ADDWrr [[LDRWui1]], [[LDRWui]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr64 = COPY $x0 + %1:gpr64sp = ANDXri %0, 7865 + %2:gpr32 = LDRWui %1, 0 :: (load (s32) from %ir.4, align 64) + %3:gpr32 = LDRWui %1, 1 :: (load (s32) from %ir.6) + %4:gpr32 = nsw ADDWrr %3, %2 + $w0 = COPY %4 + RET_ReallyLR implicit $w0 + +...