Skip to content

Commit 4624668

Browse files
author
Rin Dobrescu
authored
[AArch64] Update Neoverse V2 FSQRT execution units in schedule model. (#86803)
This patch updates the SVE FSQRT instruction execution units to be able to run on VX0 and VX2.
1 parent 16da9d5 commit 4624668

File tree

2 files changed

+11
-13
lines changed

2 files changed

+11
-13
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,14 +1076,12 @@ def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let
10761076
def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; }
10771077
def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
10781078
def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; }
1079-
def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
10801079
def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1081-
def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ReleaseAtCycles = [12]; }
10821080
def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; }
10831081
def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; }
10841082
def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; }
1083+
def V2Write_16cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [14]; }
10851084
def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; }
1086-
def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ReleaseAtCycles = [14]; }
10871085

10881086
// Miscellaneous
10891087
// -----------------------------------------------------------------------------
@@ -2567,13 +2565,13 @@ def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
25672565
def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
25682566

25692567
// Floating point square root, F16
2570-
def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H")>;
2568+
def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FSQRT_ZPmZ_H")>;
25712569

25722570
// Floating point square root, F32
2573-
def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S")>;
2571+
def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FSQRT_ZPmZ_S")>;
25742572

25752573
// Floating point square root, F64
2576-
def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D")>;
2574+
def : InstRW<[V2Write_16cyc_1V02_14rc], (instregex "^FSQRT_ZPmZ_D")>;
25772575

25782576
// Floating point trigonometric exponentiation
25792577
def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;

llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4278,9 +4278,9 @@ zip2 z31.s, z31.s, z31.s
42784278
# CHECK-NEXT: 1 3 0.25 fscale z0.d, p7/m, z0.d, z31.d
42794279
# CHECK-NEXT: 1 3 0.25 fscale z0.h, p7/m, z0.h, z31.h
42804280
# CHECK-NEXT: 1 3 0.25 fscale z0.s, p7/m, z0.s, z31.s
4281-
# CHECK-NEXT: 1 16 14.00 fsqrt z31.d, p7/m, z31.d
4282-
# CHECK-NEXT: 1 13 12.00 fsqrt z31.h, p7/m, z31.h
4283-
# CHECK-NEXT: 1 10 9.00 fsqrt z31.s, p7/m, z31.s
4281+
# CHECK-NEXT: 1 16 7.00 fsqrt z31.d, p7/m, z31.d
4282+
# CHECK-NEXT: 1 13 6.00 fsqrt z31.h, p7/m, z31.h
4283+
# CHECK-NEXT: 1 10 4.50 fsqrt z31.s, p7/m, z31.s
42844284
# CHECK-NEXT: 1 2 0.25 fsub z0.d, p0/m, z0.d, #0.5
42854285
# CHECK-NEXT: 1 2 0.25 fsub z0.d, p7/m, z0.d, z31.d
42864286
# CHECK-NEXT: 1 2 0.25 fsub z0.d, z1.d, z31.d
@@ -6861,7 +6861,7 @@ zip2 z31.s, z31.s, z31.s
68616861

68626862
# CHECK: Resource pressure per iteration:
68636863
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
6864-
# CHECK-NEXT: - - - - 245.00 651.00 651.00 570.50 272.50 83.75 83.75 81.75 81.75 1554.25 1281.75 776.75 748.25
6864+
# CHECK-NEXT: - - - - 245.00 651.00 651.00 570.50 272.50 83.75 83.75 81.75 81.75 1536.75 1281.75 794.25 748.25
68656865

68666866
# CHECK: Resource pressure by instruction:
68676867
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -7718,9 +7718,9 @@ zip2 z31.s, z31.s, z31.s
77187718
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.d, p7/m, z0.d, z31.d
77197719
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.h, p7/m, z0.h, z31.h
77207720
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.s, p7/m, z0.s, z31.s
7721-
# CHECK-NEXT: - - - - - - - - - - - - - 14.00 - - - fsqrt z31.d, p7/m, z31.d
7722-
# CHECK-NEXT: - - - - - - - - - - - - - 12.00 - - - fsqrt z31.h, p7/m, z31.h
7723-
# CHECK-NEXT: - - - - - - - - - - - - - 9.00 - - - fsqrt z31.s, p7/m, z31.s
7721+
# CHECK-NEXT: - - - - - - - - - - - - - 7.00 - 7.00 - fsqrt z31.d, p7/m, z31.d
7722+
# CHECK-NEXT: - - - - - - - - - - - - - 6.00 - 6.00 - fsqrt z31.h, p7/m, z31.h
7723+
# CHECK-NEXT: - - - - - - - - - - - - - 4.50 - 4.50 - fsqrt z31.s, p7/m, z31.s
77247724
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p0/m, z0.d, #0.5
77257725
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p7/m, z0.d, z31.d
77267726
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, z1.d, z31.d

0 commit comments

Comments
 (0)