Skip to content

Commit cdfd728

Browse files
committed
Rebase and address comments.
1 parent 1af8cfb commit cdfd728

File tree

3 files changed

+39
-31
lines changed

3 files changed

+39
-31
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1529,7 +1529,7 @@ static void markKernelArgsAsInreg(SetVector<Function *> &Functions,
15291529
for (auto *F : Functions) {
15301530
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
15311531
if (!ST.hasKernargPreload() ||
1532-
F->getCallingConv() != CallingConv::AMDGPU_KERNEL || F->arg_empty())
1532+
F->getCallingConv() != CallingConv::AMDGPU_KERNEL)
15331533
continue;
15341534

15351535
PreloadKernelArgInfo PreloadInfo(*F, ST);
@@ -1541,6 +1541,8 @@ static void markKernelArgsAsInreg(SetVector<Function *> &Functions,
15411541
for (Argument &Arg : F->args()) {
15421542
// Avoid incompatible attributes and guard against running this pass
15431543
// twice.
1544+
//
1545+
// TODO: Preload byref kernel arguments
15441546
if (Arg.hasByRefAttr() || Arg.hasNestAttr() ||
15451547
Arg.hasAttribute("amdgpu-hidden-argument"))
15461548
break;

llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-IR-lowering.ll

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,30 @@
33
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-kernarg-preload-count=100 -S < %s | FileCheck -check-prefix=PRELOAD %s
44

55

6-
define amdgpu_kernel void @incompatible_attribute_block_count_x(ptr addrspace(1) byref(i32) %out) {
6+
define amdgpu_kernel void @incompatible_attribute_block_count_x(ptr addrspace(1) %out, ptr addrspace(1) byref(i32) %arg) {
77
; NO-PRELOAD-LABEL: define {{[^@]+}}@incompatible_attribute_block_count_x
8-
; NO-PRELOAD-SAME: (ptr addrspace(1) byref(i32) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
8+
; NO-PRELOAD-SAME: (ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) byref(i32) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
99
; NO-PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
10-
; NO-PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
11-
; NO-PRELOAD-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT]], align 4
10+
; NO-PRELOAD-NEXT: [[LOAD0:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
11+
; NO-PRELOAD-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[ARG]], align 4
12+
; NO-PRELOAD-NEXT: [[ADD:%.*]] = add i32 [[LOAD0]], [[LOAD1]]
13+
; NO-PRELOAD-NEXT: store i32 [[ADD]], ptr addrspace(1) [[OUT]], align 4
1214
; NO-PRELOAD-NEXT: ret void
1315
;
1416
; PRELOAD-LABEL: define {{[^@]+}}@incompatible_attribute_block_count_x
15-
; PRELOAD-SAME: (ptr addrspace(1) byref(i32) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
17+
; PRELOAD-SAME: (ptr addrspace(1) inreg [[OUT:%.*]], ptr addrspace(1) byref(i32) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
1618
; PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
17-
; PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
18-
; PRELOAD-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT]], align 4
19+
; PRELOAD-NEXT: [[LOAD0:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
20+
; PRELOAD-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[ARG]], align 4
21+
; PRELOAD-NEXT: [[ADD:%.*]] = add i32 [[LOAD0]], [[LOAD1]]
22+
; PRELOAD-NEXT: store i32 [[ADD]], ptr addrspace(1) [[OUT]], align 4
1923
; PRELOAD-NEXT: ret void
2024
;
2125
%imp_arg_ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
22-
%load = load i32, ptr addrspace(4) %imp_arg_ptr
23-
store i32 %load, ptr addrspace(1) %out
26+
%load0 = load i32, ptr addrspace(4) %imp_arg_ptr
27+
%load1 = load i32, ptr addrspace(1) %arg
28+
%add = add i32 %load0, %load1
29+
store i32 %add, ptr addrspace(1) %out
2430
ret void
2531
}
2632

@@ -87,16 +93,16 @@ define amdgpu_kernel void @preload_unused_arg_block_count_x(ptr addrspace(1) %ou
8793
ret void
8894
}
8995

90-
define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) %out, i512 inreg) {
96+
define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) %out, <16 x i32> inreg) {
9197
; NO-PRELOAD-LABEL: define {{[^@]+}}@no_free_sgprs_block_count_x
92-
; NO-PRELOAD-SAME: (ptr addrspace(1) [[OUT:%.*]], i512 inreg [[TMP0:%.*]]) #[[ATTR0]] {
98+
; NO-PRELOAD-SAME: (ptr addrspace(1) [[OUT:%.*]], <16 x i32> inreg [[TMP0:%.*]]) #[[ATTR0]] {
9399
; NO-PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
94100
; NO-PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
95101
; NO-PRELOAD-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT]], align 4
96102
; NO-PRELOAD-NEXT: ret void
97103
;
98104
; PRELOAD-LABEL: define {{[^@]+}}@no_free_sgprs_block_count_x
99-
; PRELOAD-SAME: (ptr addrspace(1) inreg [[OUT:%.*]], i512 inreg [[TMP0:%.*]]) #[[ATTR0]] {
105+
; PRELOAD-SAME: (ptr addrspace(1) inreg [[OUT:%.*]], <16 x i32> inreg [[TMP0:%.*]]) #[[ATTR0]] {
100106
; PRELOAD-NEXT: [[IMP_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
101107
; PRELOAD-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[IMP_ARG_PTR]], align 4
102108
; PRELOAD-NEXT: store i32 [[LOAD]], ptr addrspace(1) [[OUT]], align 4

llvm/test/CodeGen/AMDGPU/preload-kernargs-IR-lowering.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -469,17 +469,17 @@ define amdgpu_kernel void @ptr1_byref_i32_i32_staggered_kernel(ptr addrspace(1)
469469

470470
define amdgpu_kernel void @ptr1_v8i32_kernel(ptr addrspace(1) nocapture %out, <8 x i32> %in) {
471471
; NO-PRELOAD-LABEL: define {{[^@]+}}@ptr1_v8i32_kernel
472-
; NO-PRELOAD-SAME: (ptr addrspace(1) nocapture [[OUT:%.*]], <8 x i32> [[IN:%.*]]) #[[ATTR0]] {
472+
; NO-PRELOAD-SAME: (ptr addrspace(1) captures(none) [[OUT:%.*]], <8 x i32> [[IN:%.*]]) #[[ATTR0]] {
473473
; NO-PRELOAD-NEXT: store <8 x i32> [[IN]], ptr addrspace(1) [[OUT]], align 4
474474
; NO-PRELOAD-NEXT: ret void
475475
;
476476
; PRELOAD-2-LABEL: define {{[^@]+}}@ptr1_v8i32_kernel
477-
; PRELOAD-2-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <8 x i32> [[IN:%.*]]) #[[ATTR0]] {
477+
; PRELOAD-2-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <8 x i32> [[IN:%.*]]) #[[ATTR0]] {
478478
; PRELOAD-2-NEXT: store <8 x i32> [[IN]], ptr addrspace(1) [[OUT]], align 4
479479
; PRELOAD-2-NEXT: ret void
480480
;
481481
; PRELOAD-ALL-LABEL: define {{[^@]+}}@ptr1_v8i32_kernel
482-
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <8 x i32> [[IN:%.*]]) #[[ATTR0]] {
482+
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <8 x i32> [[IN:%.*]]) #[[ATTR0]] {
483483
; PRELOAD-ALL-NEXT: store <8 x i32> [[IN]], ptr addrspace(1) [[OUT]], align 4
484484
; PRELOAD-ALL-NEXT: ret void
485485
;
@@ -489,17 +489,17 @@ define amdgpu_kernel void @ptr1_v8i32_kernel(ptr addrspace(1) nocapture %out, <8
489489

490490
define amdgpu_kernel void @ptr1_v3i16_kernel(ptr addrspace(1) nocapture %out, <3 x i16> %in) {
491491
; NO-PRELOAD-LABEL: define {{[^@]+}}@ptr1_v3i16_kernel
492-
; NO-PRELOAD-SAME: (ptr addrspace(1) nocapture [[OUT:%.*]], <3 x i16> [[IN:%.*]]) #[[ATTR0]] {
492+
; NO-PRELOAD-SAME: (ptr addrspace(1) captures(none) [[OUT:%.*]], <3 x i16> [[IN:%.*]]) #[[ATTR0]] {
493493
; NO-PRELOAD-NEXT: store <3 x i16> [[IN]], ptr addrspace(1) [[OUT]], align 4
494494
; NO-PRELOAD-NEXT: ret void
495495
;
496496
; PRELOAD-2-LABEL: define {{[^@]+}}@ptr1_v3i16_kernel
497-
; PRELOAD-2-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <3 x i16> inreg [[IN:%.*]]) #[[ATTR0]] {
497+
; PRELOAD-2-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <3 x i16> inreg [[IN:%.*]]) #[[ATTR0]] {
498498
; PRELOAD-2-NEXT: store <3 x i16> [[IN]], ptr addrspace(1) [[OUT]], align 4
499499
; PRELOAD-2-NEXT: ret void
500500
;
501501
; PRELOAD-ALL-LABEL: define {{[^@]+}}@ptr1_v3i16_kernel
502-
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <3 x i16> inreg [[IN:%.*]]) #[[ATTR0]] {
502+
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <3 x i16> inreg [[IN:%.*]]) #[[ATTR0]] {
503503
; PRELOAD-ALL-NEXT: store <3 x i16> [[IN]], ptr addrspace(1) [[OUT]], align 4
504504
; PRELOAD-ALL-NEXT: ret void
505505
;
@@ -509,17 +509,17 @@ define amdgpu_kernel void @ptr1_v3i16_kernel(ptr addrspace(1) nocapture %out, <3
509509

510510
define amdgpu_kernel void @ptr1_v3i32_kernel(ptr addrspace(1) nocapture %out, <3 x i32> %in) {
511511
; NO-PRELOAD-LABEL: define {{[^@]+}}@ptr1_v3i32_kernel
512-
; NO-PRELOAD-SAME: (ptr addrspace(1) nocapture [[OUT:%.*]], <3 x i32> [[IN:%.*]]) #[[ATTR0]] {
512+
; NO-PRELOAD-SAME: (ptr addrspace(1) captures(none) [[OUT:%.*]], <3 x i32> [[IN:%.*]]) #[[ATTR0]] {
513513
; NO-PRELOAD-NEXT: store <3 x i32> [[IN]], ptr addrspace(1) [[OUT]], align 4
514514
; NO-PRELOAD-NEXT: ret void
515515
;
516516
; PRELOAD-2-LABEL: define {{[^@]+}}@ptr1_v3i32_kernel
517-
; PRELOAD-2-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <3 x i32> inreg [[IN:%.*]]) #[[ATTR0]] {
517+
; PRELOAD-2-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <3 x i32> inreg [[IN:%.*]]) #[[ATTR0]] {
518518
; PRELOAD-2-NEXT: store <3 x i32> [[IN]], ptr addrspace(1) [[OUT]], align 4
519519
; PRELOAD-2-NEXT: ret void
520520
;
521521
; PRELOAD-ALL-LABEL: define {{[^@]+}}@ptr1_v3i32_kernel
522-
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <3 x i32> inreg [[IN:%.*]]) #[[ATTR0]] {
522+
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <3 x i32> inreg [[IN:%.*]]) #[[ATTR0]] {
523523
; PRELOAD-ALL-NEXT: store <3 x i32> [[IN]], ptr addrspace(1) [[OUT]], align 4
524524
; PRELOAD-ALL-NEXT: ret void
525525
;
@@ -529,17 +529,17 @@ define amdgpu_kernel void @ptr1_v3i32_kernel(ptr addrspace(1) nocapture %out, <3
529529

530530
define amdgpu_kernel void @ptr1_v3f32_kernel(ptr addrspace(1) nocapture %out, <3 x float> %in) {
531531
; NO-PRELOAD-LABEL: define {{[^@]+}}@ptr1_v3f32_kernel
532-
; NO-PRELOAD-SAME: (ptr addrspace(1) nocapture [[OUT:%.*]], <3 x float> [[IN:%.*]]) #[[ATTR0]] {
532+
; NO-PRELOAD-SAME: (ptr addrspace(1) captures(none) [[OUT:%.*]], <3 x float> [[IN:%.*]]) #[[ATTR0]] {
533533
; NO-PRELOAD-NEXT: store <3 x float> [[IN]], ptr addrspace(1) [[OUT]], align 4
534534
; NO-PRELOAD-NEXT: ret void
535535
;
536536
; PRELOAD-2-LABEL: define {{[^@]+}}@ptr1_v3f32_kernel
537-
; PRELOAD-2-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <3 x float> inreg [[IN:%.*]]) #[[ATTR0]] {
537+
; PRELOAD-2-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <3 x float> inreg [[IN:%.*]]) #[[ATTR0]] {
538538
; PRELOAD-2-NEXT: store <3 x float> [[IN]], ptr addrspace(1) [[OUT]], align 4
539539
; PRELOAD-2-NEXT: ret void
540540
;
541541
; PRELOAD-ALL-LABEL: define {{[^@]+}}@ptr1_v3f32_kernel
542-
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <3 x float> inreg [[IN:%.*]]) #[[ATTR0]] {
542+
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <3 x float> inreg [[IN:%.*]]) #[[ATTR0]] {
543543
; PRELOAD-ALL-NEXT: store <3 x float> [[IN]], ptr addrspace(1) [[OUT]], align 4
544544
; PRELOAD-ALL-NEXT: ret void
545545
;
@@ -549,17 +549,17 @@ define amdgpu_kernel void @ptr1_v3f32_kernel(ptr addrspace(1) nocapture %out, <3
549549

550550
define amdgpu_kernel void @ptr1_v5i8_kernel(ptr addrspace(1) nocapture %out, <5 x i8> %in) {
551551
; NO-PRELOAD-LABEL: define {{[^@]+}}@ptr1_v5i8_kernel
552-
; NO-PRELOAD-SAME: (ptr addrspace(1) nocapture [[OUT:%.*]], <5 x i8> [[IN:%.*]]) #[[ATTR0]] {
552+
; NO-PRELOAD-SAME: (ptr addrspace(1) captures(none) [[OUT:%.*]], <5 x i8> [[IN:%.*]]) #[[ATTR0]] {
553553
; NO-PRELOAD-NEXT: store <5 x i8> [[IN]], ptr addrspace(1) [[OUT]], align 4
554554
; NO-PRELOAD-NEXT: ret void
555555
;
556556
; PRELOAD-2-LABEL: define {{[^@]+}}@ptr1_v5i8_kernel
557-
; PRELOAD-2-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <5 x i8> inreg [[IN:%.*]]) #[[ATTR0]] {
557+
; PRELOAD-2-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <5 x i8> inreg [[IN:%.*]]) #[[ATTR0]] {
558558
; PRELOAD-2-NEXT: store <5 x i8> [[IN]], ptr addrspace(1) [[OUT]], align 4
559559
; PRELOAD-2-NEXT: ret void
560560
;
561561
; PRELOAD-ALL-LABEL: define {{[^@]+}}@ptr1_v5i8_kernel
562-
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <5 x i8> inreg [[IN:%.*]]) #[[ATTR0]] {
562+
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <5 x i8> inreg [[IN:%.*]]) #[[ATTR0]] {
563563
; PRELOAD-ALL-NEXT: store <5 x i8> [[IN]], ptr addrspace(1) [[OUT]], align 4
564564
; PRELOAD-ALL-NEXT: ret void
565565
;
@@ -569,17 +569,17 @@ define amdgpu_kernel void @ptr1_v5i8_kernel(ptr addrspace(1) nocapture %out, <5
569569

570570
define amdgpu_kernel void @ptr1_v5f64_kernel(ptr addrspace(1) nocapture %out, <5 x double> %in) {
571571
; NO-PRELOAD-LABEL: define {{[^@]+}}@ptr1_v5f64_kernel
572-
; NO-PRELOAD-SAME: (ptr addrspace(1) nocapture [[OUT:%.*]], <5 x double> [[IN:%.*]]) #[[ATTR0]] {
572+
; NO-PRELOAD-SAME: (ptr addrspace(1) captures(none) [[OUT:%.*]], <5 x double> [[IN:%.*]]) #[[ATTR0]] {
573573
; NO-PRELOAD-NEXT: store <5 x double> [[IN]], ptr addrspace(1) [[OUT]], align 8
574574
; NO-PRELOAD-NEXT: ret void
575575
;
576576
; PRELOAD-2-LABEL: define {{[^@]+}}@ptr1_v5f64_kernel
577-
; PRELOAD-2-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <5 x double> [[IN:%.*]]) #[[ATTR0]] {
577+
; PRELOAD-2-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <5 x double> [[IN:%.*]]) #[[ATTR0]] {
578578
; PRELOAD-2-NEXT: store <5 x double> [[IN]], ptr addrspace(1) [[OUT]], align 8
579579
; PRELOAD-2-NEXT: ret void
580580
;
581581
; PRELOAD-ALL-LABEL: define {{[^@]+}}@ptr1_v5f64_kernel
582-
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg nocapture [[OUT:%.*]], <5 x double> [[IN:%.*]]) #[[ATTR0]] {
582+
; PRELOAD-ALL-SAME: (ptr addrspace(1) inreg captures(none) [[OUT:%.*]], <5 x double> [[IN:%.*]]) #[[ATTR0]] {
583583
; PRELOAD-ALL-NEXT: store <5 x double> [[IN]], ptr addrspace(1) [[OUT]], align 8
584584
; PRELOAD-ALL-NEXT: ret void
585585
;

0 commit comments

Comments
 (0)