Skip to content

[AArch64] Avoid generating LDAPUR on certain cores #124274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/lib/Target/AArch64/AArch64Features.td
Original file line number Diff line number Diff line change
Expand Up @@ -805,10 +805,14 @@ def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedO
def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly",
"true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">;

def FeatureUseFixedOverScalableIfEqualCost: SubtargetFeature<"use-fixed-over-scalable-if-equal-cost",
def FeatureUseFixedOverScalableIfEqualCost : SubtargetFeature<"use-fixed-over-scalable-if-equal-cost",
"UseFixedOverScalableIfEqualCost", "true",
"Prefer fixed width loop vectorization over scalable if the cost-model assigns equal costs">;

// For performance reasons we prefer to use ldapr to ldapur on certain cores.
def FeatureAvoidLDAPUR : SubtargetFeature<"avoid-ldapur", "AvoidLDAPUR", "true",
"Prefer add+ldapr to offset ldapur">;

//===----------------------------------------------------------------------===//
// Architectures.
//
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrAtomics.td
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ let Predicates = [HasRCPC3, HasNEON] in {
}

// v8.4a FEAT_LRCPC2 patterns
let Predicates = [HasRCPC_IMMO] in {
let Predicates = [HasRCPC_IMMO, UseLDAPUR] in {
// Load-Acquire RCpc Register unscaled loads
def : Pat<(acquiring_load<atomic_load_az_8>
(am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
Expand All @@ -589,7 +589,9 @@ let Predicates = [HasRCPC_IMMO] in {
def : Pat<(acquiring_load<atomic_load_64>
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(LDAPURXi GPR64sp:$Rn, simm9:$offset)>;
}

let Predicates = [HasRCPC_IMMO] in {
// Store-Release Register unscaled stores
def : Pat<(releasing_store<atomic_store_8>
(am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,8 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;

def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;

def UseLDAPUR : Predicate<"!Subtarget->avoidLDAPUR()">;

def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Processors.td
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeatureUseFixedOverScalableIfEqualCost,
FeatureAvoidLDAPUR,
FeaturePredictableSelectIsExpensive]>;

def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
Expand All @@ -250,6 +251,7 @@ def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeatureUseFixedOverScalableIfEqualCost,
FeatureAvoidLDAPUR,
FeaturePredictableSelectIsExpensive]>;

def TuneX925 : SubtargetFeature<"cortex-x925", "ARMProcFamily",
Expand All @@ -260,6 +262,7 @@ def TuneX925 : SubtargetFeature<"cortex-x925", "ARMProcFamily",
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeatureUseFixedOverScalableIfEqualCost,
FeatureAvoidLDAPUR,
FeaturePredictableSelectIsExpensive]>;

def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
Expand Down Expand Up @@ -540,6 +543,7 @@ def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeatureUseFixedOverScalableIfEqualCost,
FeatureAvoidLDAPUR,
FeaturePredictableSelectIsExpensive]>;

def TuneNeoverseV3 : SubtargetFeature<"neoversev3", "ARMProcFamily", "NeoverseV3",
Expand All @@ -549,6 +553,7 @@ def TuneNeoverseV3 : SubtargetFeature<"neoversev3", "ARMProcFamily", "NeoverseV3
FeatureFuseAdrpAdd,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeatureAvoidLDAPUR,
FeaturePredictableSelectIsExpensive]>;

def TuneNeoverseV3AE : SubtargetFeature<"neoversev3AE", "ARMProcFamily", "NeoverseV3",
Expand All @@ -558,6 +563,7 @@ def TuneNeoverseV3AE : SubtargetFeature<"neoversev3AE", "ARMProcFamily", "Neover
FeatureFuseAdrpAdd,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeatureAvoidLDAPUR,
FeaturePredictableSelectIsExpensive]>;

def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
Expand Down
144 changes: 111 additions & 33 deletions llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "(?!^\s*lda.*\bsp\b)^\s*.*\bsp\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=true -global-isel-abort=2 -O0 | FileCheck %s --check-prefixes=CHECK,GISEL
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-NOAVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+v8.4a -mattr=+rcpc-immo,avoid-ldapur -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v2 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=neoverse-v3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x3 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x4 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mcpu=cortex-x925 -global-isel=false -O1 | FileCheck %s --check-prefixes=CHECK,SDAG,SDAG-AVOIDLDAPUR

define i8 @load_atomic_i8_aligned_unordered(ptr %ptr) {
; CHECK-LABEL: load_atomic_i8_aligned_unordered:
Expand Down Expand Up @@ -39,8 +45,12 @@ define i8 @load_atomic_i8_aligned_acquire(ptr %ptr) {
; GISEL: add x8, x0, #4
; GISEL: ldaprb w0, [x8]
;
; SDAG-LABEL: load_atomic_i8_aligned_acquire:
; SDAG: ldapurb w0, [x0, #4]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire:
; SDAG-NOAVOIDLDAPUR: ldapurb w0, [x0, #4]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire:
; SDAG-AVOIDLDAPUR: add x8, x0, #4
; SDAG-AVOIDLDAPUR: ldaprb w0, [x8]
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
%r = load atomic i8, ptr %gep acquire, align 1
ret i8 %r
Expand All @@ -51,8 +61,12 @@ define i8 @load_atomic_i8_aligned_acquire_const(ptr readonly %ptr) {
; GISEL: add x8, x0, #4
; GISEL: ldaprb w0, [x8]
;
; SDAG-LABEL: load_atomic_i8_aligned_acquire_const:
; SDAG: ldapurb w0, [x0, #4]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire_const:
; SDAG-NOAVOIDLDAPUR: ldapurb w0, [x0, #4]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire_const:
; SDAG-AVOIDLDAPUR: add x8, x0, #4
; SDAG-AVOIDLDAPUR: ldaprb w0, [x8]
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
%r = load atomic i8, ptr %gep acquire, align 1
ret i8 %r
Expand Down Expand Up @@ -113,8 +127,12 @@ define i16 @load_atomic_i16_aligned_acquire(ptr %ptr) {
; GISEL: add x8, x0, #8
; GISEL: ldaprh w0, [x8]
;
; SDAG-LABEL: load_atomic_i16_aligned_acquire:
; SDAG: ldapurh w0, [x0, #8]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire:
; SDAG-NOAVOIDLDAPUR: ldapurh w0, [x0, #8]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire:
; SDAG-AVOIDLDAPUR: add x8, x0, #8
; SDAG-AVOIDLDAPUR: ldaprh w0, [x8]
%gep = getelementptr inbounds i16, ptr %ptr, i32 4
%r = load atomic i16, ptr %gep acquire, align 2
ret i16 %r
Expand All @@ -125,8 +143,12 @@ define i16 @load_atomic_i16_aligned_acquire_const(ptr readonly %ptr) {
; GISEL: add x8, x0, #8
; GISEL: ldaprh w0, [x8]
;
; SDAG-LABEL: load_atomic_i16_aligned_acquire_const:
; SDAG: ldapurh w0, [x0, #8]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire_const:
; SDAG-NOAVOIDLDAPUR: ldapurh w0, [x0, #8]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire_const:
; SDAG-AVOIDLDAPUR: add x8, x0, #8
; SDAG-AVOIDLDAPUR: ldaprh w0, [x8]
%gep = getelementptr inbounds i16, ptr %ptr, i32 4
%r = load atomic i16, ptr %gep acquire, align 2
ret i16 %r
Expand Down Expand Up @@ -183,16 +205,30 @@ define i32 @load_atomic_i32_aligned_monotonic_const(ptr readonly %ptr) {
}

define i32 @load_atomic_i32_aligned_acquire(ptr %ptr) {
; CHECK-LABEL: load_atomic_i32_aligned_acquire:
; CHECK: ldapur w0, [x0, #16]
; GISEL-LABEL: load_atomic_i32_aligned_acquire:
; GISEL: ldapur w0, [x0, #16]
;
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire:
; SDAG-NOAVOIDLDAPUR: ldapur w0, [x0, #16]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire:
; SDAG-AVOIDLDAPUR: add x8, x0, #16
; SDAG-AVOIDLDAPUR: ldapr w0, [x8]
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
%r = load atomic i32, ptr %gep acquire, align 4
ret i32 %r
}

define i32 @load_atomic_i32_aligned_acquire_const(ptr readonly %ptr) {
; CHECK-LABEL: load_atomic_i32_aligned_acquire_const:
; CHECK: ldapur w0, [x0, #16]
; GISEL-LABEL: load_atomic_i32_aligned_acquire_const:
; GISEL: ldapur w0, [x0, #16]
;
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire_const:
; SDAG-NOAVOIDLDAPUR: ldapur w0, [x0, #16]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire_const:
; SDAG-AVOIDLDAPUR: add x8, x0, #16
; SDAG-AVOIDLDAPUR: ldapr w0, [x8]
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
%r = load atomic i32, ptr %gep acquire, align 4
ret i32 %r
Expand Down Expand Up @@ -249,16 +285,30 @@ define i64 @load_atomic_i64_aligned_monotonic_const(ptr readonly %ptr) {
}

define i64 @load_atomic_i64_aligned_acquire(ptr %ptr) {
; CHECK-LABEL: load_atomic_i64_aligned_acquire:
; CHECK: ldapur x0, [x0, #32]
; GISEL-LABEL: load_atomic_i64_aligned_acquire:
; GISEL: ldapur x0, [x0, #32]
;
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire:
; SDAG-NOAVOIDLDAPUR: ldapur x0, [x0, #32]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire:
; SDAG-AVOIDLDAPUR: add x8, x0, #32
; SDAG-AVOIDLDAPUR: ldapr x0, [x8]
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
%r = load atomic i64, ptr %gep acquire, align 8
ret i64 %r
}

define i64 @load_atomic_i64_aligned_acquire_const(ptr readonly %ptr) {
; CHECK-LABEL: load_atomic_i64_aligned_acquire_const:
; CHECK: ldapur x0, [x0, #32]
; GISEL-LABEL: load_atomic_i64_aligned_acquire_const:
; GISEL: ldapur x0, [x0, #32]
;
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire_const:
; SDAG-NOAVOIDLDAPUR: ldapur x0, [x0, #32]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire_const:
; SDAG-AVOIDLDAPUR: add x8, x0, #32
; SDAG-AVOIDLDAPUR: ldapr x0, [x8]
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
%r = load atomic i64, ptr %gep acquire, align 8
ret i64 %r
Expand Down Expand Up @@ -387,8 +437,12 @@ define i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) {
; GISEL: add x8, x0, #4
; GISEL: ldaprb w0, [x8]
;
; SDAG-LABEL: load_atomic_i8_unaligned_acquire:
; SDAG: ldapurb w0, [x0, #4]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire:
; SDAG-NOAVOIDLDAPUR: ldapurb w0, [x0, #4]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire:
; SDAG-AVOIDLDAPUR: add x8, x0, #4
; SDAG-AVOIDLDAPUR: ldaprb w0, [x8]
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
%r = load atomic i8, ptr %gep acquire, align 1
ret i8 %r
Expand All @@ -399,8 +453,12 @@ define i8 @load_atomic_i8_unaligned_acquire_const(ptr readonly %ptr) {
; GISEL: add x8, x0, #4
; GISEL: ldaprb w0, [x8]
;
; SDAG-LABEL: load_atomic_i8_unaligned_acquire_const:
; SDAG: ldapurb w0, [x0, #4]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire_const:
; SDAG-NOAVOIDLDAPUR: ldapurb w0, [x0, #4]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire_const:
; SDAG-AVOIDLDAPUR: add x8, x0, #4
; SDAG-AVOIDLDAPUR: ldaprb w0, [x8]
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
%r = load atomic i8, ptr %gep acquire, align 1
ret i8 %r
Expand Down Expand Up @@ -846,9 +904,14 @@ define i8 @load_atomic_i8_from_gep() {
; GISEL: add x8, x8, #1
; GISEL: ldaprb w0, [x8]
;
; SDAG-LABEL: load_atomic_i8_from_gep:
; SDAG: bl init
; SDAG: ldapurb w0, [sp, #13]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_from_gep:
; SDAG-NOAVOIDLDAPUR: bl init
; SDAG-NOAVOIDLDAPUR: ldapurb w0, [sp, #13]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_from_gep:
; SDAG-AVOIDLDAPUR: bl init
; SDAG-AVOIDLDAPUR: orr x8, x19, #0x1
; SDAG-AVOIDLDAPUR: ldaprb w0, [x8]
%a = alloca [3 x i8]
call void @init(ptr %a)
%arrayidx = getelementptr [3 x i8], ptr %a, i64 0, i64 1
Expand All @@ -862,9 +925,14 @@ define i16 @load_atomic_i16_from_gep() {
; GISEL: add x8, x8, #2
; GISEL: ldaprh w0, [x8]
;
; SDAG-LABEL: load_atomic_i16_from_gep:
; SDAG: bl init
; SDAG: ldapurh w0, [sp, #10]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_from_gep:
; SDAG-NOAVOIDLDAPUR: bl init
; SDAG-NOAVOIDLDAPUR: ldapurh w0, [sp, #10]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i16_from_gep:
; SDAG-AVOIDLDAPUR: bl init
; SDAG-AVOIDLDAPUR: orr x8, x19, #0x2
; SDAG-AVOIDLDAPUR: ldaprh w0, [x8]
%a = alloca [3 x i16]
call void @init(ptr %a)
%arrayidx = getelementptr [3 x i16], ptr %a, i64 0, i64 1
Expand All @@ -877,9 +945,14 @@ define i32 @load_atomic_i32_from_gep() {
; GISEL: bl init
; GISEL: ldapur w0, [x8, #4]
;
; SDAG-LABEL: load_atomic_i32_from_gep:
; SDAG: bl init
; SDAG: ldapur w0, [sp, #8]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_from_gep:
; SDAG-NOAVOIDLDAPUR: bl init
; SDAG-NOAVOIDLDAPUR: ldapur w0, [sp, #8]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i32_from_gep:
; SDAG-AVOIDLDAPUR: bl init
; SDAG-AVOIDLDAPUR: add x8, x19, #4
; SDAG-AVOIDLDAPUR: ldapr w0, [x8]
%a = alloca [3 x i32]
call void @init(ptr %a)
%arrayidx = getelementptr [3 x i32], ptr %a, i64 0, i64 1
Expand All @@ -892,9 +965,14 @@ define i64 @load_atomic_i64_from_gep() {
; GISEL: bl init
; GISEL: ldapur x0, [x8, #8]
;
; SDAG-LABEL: load_atomic_i64_from_gep:
; SDAG: bl init
; SDAG: ldapur x0, [sp, #16]
; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_from_gep:
; SDAG-NOAVOIDLDAPUR: bl init
; SDAG-NOAVOIDLDAPUR: ldapur x0, [sp, #16]
;
; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i64_from_gep:
; SDAG-AVOIDLDAPUR: bl init
; SDAG-AVOIDLDAPUR: add x8, x19, #8
; SDAG-AVOIDLDAPUR: ldapr x0, [x8]
%a = alloca [3 x i64]
call void @init(ptr %a)
%arrayidx = getelementptr [3 x i64], ptr %a, i64 0, i64 1
Expand Down
Loading