[AArch64] Enable FeatureFuseAdrpAdd for all Arm cpus

davemgreen · davemgreen · commit bebc96956b76 · 2022-09-26T09:55:10.000+01:00
The commit D120104 enabled FeatureFuseAdrpAdd for -mcpu=generic, allowing the linker to relax adrp;add pairs where possible. D132075 extended that to neoverse-n1, this patch extends it to all other cortex and neoverse cpus for the same reasons. Differential Revision: https://reviews.llvm.org/D134521
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
@@ -668,19 +668,22 @@ def TuneA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
 def TuneA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
                                    "Cortex-A53 ARM processors", [
                                    FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
                                    FeatureBalanceFPOps,
                                    FeatureCustomCheapAsMoveHandling,
                                    FeaturePostRAScheduler]>;
 
 def TuneA55     : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
                                    "Cortex-A55 ARM processors", [
                                    FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
                                    FeaturePostRAScheduler,
                                    FeatureFuseAddress]>;
 
 def TuneA510    : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
                                    "Cortex-A510 ARM processors", [
                                    FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
                                    FeaturePostRAScheduler
                                    ]>;
 
@@ -709,27 +712,32 @@ def TuneA72     : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
 
 def TuneA73     : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
                                    "Cortex-A73 ARM processors", [
-                                   FeatureFuseAES]>;
+                                   FeatureFuseAES,
+                                   FeatureFuseAdrpAdd]>;
 
 def TuneA75     : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
                                    "Cortex-A75 ARM processors", [
-                                   FeatureFuseAES]>;
+                                   FeatureFuseAES,
+                                   FeatureFuseAdrpAdd]>;
 
 def TuneA76     : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
                                    "Cortex-A76 ARM processors", [
                                    FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
                                    FeatureLSLFast]>;
 
 def TuneA77     : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
                                    "Cortex-A77 ARM processors", [
                                    FeatureCmpBccFusion,
                                    FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
                                    FeatureLSLFast]>;
 
 def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
                                "Cortex-A78 ARM processors", [
                                FeatureCmpBccFusion,
                                FeatureFuseAES,
+                               FeatureFuseAdrpAdd,
                                FeatureLSLFast,
                                FeaturePostRAScheduler]>;
 
@@ -738,13 +746,15 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
                                 "Cortex-A78C ARM processors", [
                                 FeatureCmpBccFusion,
                                 FeatureFuseAES,
+                                FeatureFuseAdrpAdd,
                                 FeatureLSLFast,
                                 FeaturePostRAScheduler]>;
 
 def TuneA710    : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
                                    "Cortex-A710 ARM processors", [
                                    FeatureCmpBccFusion,
                                    FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
                                    FeatureLSLFast,
                                    FeaturePostRAScheduler]>;
 
@@ -757,13 +767,15 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
                                   "Cortex-X1 ARM processors", [
                                   FeatureCmpBccFusion,
                                   FeatureFuseAES,
+                                  FeatureFuseAdrpAdd,
                                   FeatureLSLFast,
                                   FeaturePostRAScheduler]>;
 
 def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
                                   "Cortex-X2 ARM processors", [
                                   FeatureCmpBccFusion,
                                   FeatureFuseAES,
+                                  FeatureFuseAdrpAdd,
                                   FeatureLSLFast,
                                   FeaturePostRAScheduler]>;
 
@@ -941,6 +953,7 @@ def TuneFalkor  : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
 def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1",
                                       "Neoverse E1 ARM processors", [
                                       FeatureFuseAES,
+                                      FeatureFuseAdrpAdd,
                                       FeaturePostRAScheduler]>;
 
 def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1",
@@ -953,18 +966,21 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1
 def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2",
                                       "Neoverse N2 ARM processors", [
                                       FeatureFuseAES,
+                                      FeatureFuseAdrpAdd,
                                       FeatureLSLFast,
                                       FeaturePostRAScheduler]>;
 
 def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB",
                                       "Neoverse 512-TVB ARM processors", [
                                       FeatureFuseAES,
+                                      FeatureFuseAdrpAdd,
                                       FeatureLSLFast,
                                       FeaturePostRAScheduler]>;
 
 def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1",
                                       "Neoverse V1 ARM processors", [
                                       FeatureFuseAES,
+                                      FeatureFuseAdrpAdd,
                                       FeatureLSLFast,
                                       FeaturePostRAScheduler]>;
 
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll b/llvm/test/CodeGen/AArch64/misched-fusion-addadrp.ll
@@ -0,0 +1,37 @@
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-adrp-add | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic         | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53      | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a55      | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a510     | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73      | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a75      | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a76      | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a77      | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78      | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a710     | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n1     | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v1     | FileCheck %s
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n2     | FileCheck %s
+
+@g = common local_unnamed_addr global i8* null, align 8
+
+define dso_local i8* @addldr(i32 %a, i32 %b) {
+; CHECK-LABEL: addldr:
+; CHECK: adrp [[R:x[0-9]+]], addldr
+; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:addldr
+entry:
+  %add = add nsw i32 %b, %a
+  %idx.ext = sext i32 %add to i64
+  %add.ptr = getelementptr i8, i8* bitcast (i8* (i32, i32)* @addldr to i8*), i64 %idx.ext
+  store i8* %add.ptr, i8** @g, align 8
+  ret i8* %add.ptr
+}
+
+
+define double @litf() {
+; CHECK-LABEL: litf:
+; CHECK:      adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
+; CHECK-NEXT: ldr  {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
+entry:
+  ret double 0x400921FB54442D18
+}