Skip to content

Commit 3351e1b

Browse files
davemgreentstellar
authored andcommitted
[AArch64] Enable AvoidLDAPUR for cpu=generic between armv8.4 and armv9.3. (llvm#125261)
As added in llvm#124274, CPUs in this range can suffer from performance issues with ldapur. As the gain from ldar->ldapr is expected to be greater than the minor gain from ldapr->ldapur, this opts to avoid the instruction under the default -mcpu=generic when the -march is less that armv8.8 / armv9.3. I renamed AArch64Subtarget::Others to AArch64Subtarget::Generic to be clearer what it means. (cherry picked from commit 6424abc)
1 parent de5dcad commit 3351e1b

File tree

4 files changed

+14
-6
lines changed

4 files changed

+14
-6
lines changed

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,12 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
125125
// this in the future so we can specify it together with the subtarget
126126
// features.
127127
switch (ARMProcFamily) {
128-
case Others:
128+
case Generic:
129+
// Using TuneCPU=generic we avoid ldapur instructions to line up with the
130+
// cpus that use the AvoidLDAPUR feature. We don't want this to be on
131+
// forever, so it is enabled between armv8.4 and armv8.7/armv9.2.
132+
if (hasV8_4aOps() && !hasV8_8aOps())
133+
AvoidLDAPUR = true;
129134
break;
130135
case Carmel:
131136
CacheLineSize = 64;

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@ class Triple;
3838
class AArch64Subtarget final : public AArch64GenSubtargetInfo {
3939
public:
4040
enum ARMProcFamilyEnum : uint8_t {
41-
Others,
41+
Generic,
4242
#define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
4343
#include "llvm/TargetParser/AArch64TargetParserDef.inc"
4444
#undef ARM_PROCESSOR_FAMILY
4545
};
4646

4747
protected:
4848
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
49-
ARMProcFamilyEnum ARMProcFamily = Others;
49+
ARMProcFamilyEnum ARMProcFamily = Generic;
5050

5151
// Enable 64-bit vectorization in SLP.
5252
unsigned MinVectorRegisterBitWidth = 64;

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4272,7 +4272,7 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
42724272
// If mcpu is omitted, getProcFamily() returns AArch64Subtarget::Others, so by
42734273
// checking for that case, we can ensure that the default behaviour is
42744274
// unchanged
4275-
if (ST->getProcFamily() != AArch64Subtarget::Others &&
4275+
if (ST->getProcFamily() != AArch64Subtarget::Generic &&
42764276
!ST->getSchedModel().isOutOfOrder()) {
42774277
UP.Runtime = true;
42784278
UP.Partial = true;

llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "(?!^\s*lda.*\bsp\b)^\s*.*\bsp\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
2-
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL
3-
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
2+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.8a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL
3+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
44
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo,avoid-ldapur -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
55
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v2 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
66
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
77
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
88
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x4 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
99
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x925 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
10+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
11+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.8a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
12+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v9.3a -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
1013

1114
define i8 @load_atomic_i8_aligned_unordered(ptr %ptr) {
1215
; CHECK-LABEL: load_atomic_i8_aligned_unordered:

0 commit comments

Comments
 (0)