@@ -594,4 +594,36 @@ define amdgpu_kernel void @preloadremainder_xyz(ptr addrspace(1) inreg %out) #0
594
594
ret void
595
595
}
596
596
597
+ define amdgpu_kernel void @no_free_sgprs_preloadremainder_z (ptr addrspace (1 ) inreg %out ) {
598
+ ; GFX940-LABEL: no_free_sgprs_preloadremainder_z:
599
+ ; GFX940: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
600
+ ; GFX940-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
601
+ ; GFX940-NEXT: ; %bb.0:
602
+ ; GFX940-NEXT: s_load_dword s0, s[2:3], 0x1c
603
+ ; GFX940-NEXT: v_mov_b32_e32 v0, 0
604
+ ; GFX940-NEXT: s_waitcnt lgkmcnt(0)
605
+ ; GFX940-NEXT: s_lshr_b32 s0, s0, 16
606
+ ; GFX940-NEXT: v_mov_b32_e32 v1, s0
607
+ ; GFX940-NEXT: global_store_dword v0, v1, s[6:7] sc0 sc1
608
+ ; GFX940-NEXT: s_endpgm
609
+ ;
610
+ ; GFX90a-LABEL: no_free_sgprs_preloadremainder_z:
611
+ ; GFX90a: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments.
612
+ ; GFX90a-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0
613
+ ; GFX90a-NEXT: ; %bb.0:
614
+ ; GFX90a-NEXT: s_load_dword s0, s[6:7], 0x1c
615
+ ; GFX90a-NEXT: v_mov_b32_e32 v0, 0
616
+ ; GFX90a-NEXT: s_waitcnt lgkmcnt(0)
617
+ ; GFX90a-NEXT: s_lshr_b32 s0, s0, 16
618
+ ; GFX90a-NEXT: v_mov_b32_e32 v1, s0
619
+ ; GFX90a-NEXT: global_store_dword v0, v1, s[10:11]
620
+ ; GFX90a-NEXT: s_endpgm
621
+ %imp_arg_ptr = call ptr addrspace (4 ) @llvm.amdgcn.implicitarg.ptr ()
622
+ %gep = getelementptr i8 , ptr addrspace (4 ) %imp_arg_ptr , i32 22
623
+ %load = load i16 , ptr addrspace (4 ) %gep
624
+ %conv = zext i16 %load to i32
625
+ store i32 %conv , ptr addrspace (1 ) %out
626
+ ret void
627
+ }
628
+
597
629
attributes #0 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" ="false" }
0 commit comments