Skip to content

[AMDGPU] Illegal VGPR to SGPR Copy When Argument Passing Has SGPR to VGPR Spill #130443

@shiltian

Description

@shiltian

This is a follow up of #113782. The same input IR now has illegal VGPR to SGPR copy.

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 %s -o -

declare hidden void @external_void_func_a15i32_inreg([16 x i32] inreg)

define void @test_call_external_void_func_a15i32_inreg([16 x i32] inreg %arg0) {
  call void @external_void_func_a15i32_inreg([16 x i32] inreg %arg0)
  ret void
}
Error output
error: <unknown>:0:0: in function test_call_external_void_func_a15i32_inreg void ([16 x i32]): illegal VGPR to SGPR copy

error: <unknown>:0:0: in function test_call_external_void_func_a15i32_inreg void ([16 x i32]): illegal VGPR to SGPR copy

        .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
        .amdhsa_code_object_version 5
        .text
        .globl  test_call_external_void_func_a15i32_inreg ; -- Begin function test_call_external_void_func_a15i32_inreg
        .p2align        2
        .type   test_call_external_void_func_a15i32_inreg,@function
test_call_external_void_func_a15i32_inreg: ; @test_call_external_void_func_a15i32_inreg
; %bb.0:
        s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
        s_mov_b32 s40, s33
        s_mov_b32 s33, s32
        s_or_saveexec_b64 s[42:43], -1
        buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
        s_mov_b64 exec, s[42:43]
        v_writelane_b32 v40, s40, 2
        s_addk_i32 s32, 0x400
        v_writelane_b32 v40, s30, 0
        v_writelane_b32 v40, s31, 1
        s_getpc_b64 s[40:41]
        s_add_u32 s40, s40, external_void_func_a15i32_inreg@rel32@lo+4
        s_addc_u32 s41, s41, external_void_func_a15i32_inreg@rel32@hi+12
        s_mov_b32 s3, s19
        s_mov_b32 s2, s18
        s_mov_b32 s1, s17
        s_mov_b32 s0, s16
        s_mov_b32 s16, s20
        s_mov_b32 s17, s21
        s_mov_b32 s18, s22
        s_mov_b32 s19, s23
        s_mov_b32 s20, s24
        s_mov_b32 s21, s25
        s_mov_b32 s22, s26
        s_mov_b32 s23, s27
        s_mov_b32 s24, s28
        s_mov_b32 s25, s29
         ; illegal copy v0 to s26
         ; illegal copy v1 to s27
        s_swappc_b64 s[30:31], s[40:41]
        v_readlane_b32 s31, v40, 1
        v_readlane_b32 s30, v40, 0
        s_mov_b32 s32, s33
        v_readlane_b32 s4, v40, 2
        s_or_saveexec_b64 s[6:7], -1
        buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
        s_mov_b64 exec, s[6:7]
        s_mov_b32 s33, s4
        s_waitcnt vmcnt(0)
        s_setpc_b64 s[30:31]
.Lfunc_end0:
        .size   test_call_external_void_func_a15i32_inreg, .Lfunc_end0-test_call_external_void_func_a15i32_inreg
                                        ; -- End function
        .set test_call_external_void_func_a15i32_inreg.num_vgpr, max(41, amdgpu.max_num_vgpr)
        .set test_call_external_void_func_a15i32_inreg.num_agpr, max(0, amdgpu.max_num_agpr)
        .set test_call_external_void_func_a15i32_inreg.numbered_sgpr, max(44, amdgpu.max_num_sgpr)
        .set test_call_external_void_func_a15i32_inreg.private_seg_size, 16
        .set test_call_external_void_func_a15i32_inreg.uses_vcc, 1
        .set test_call_external_void_func_a15i32_inreg.uses_flat_scratch, 1
        .set test_call_external_void_func_a15i32_inreg.has_dyn_sized_stack, 1
        .set test_call_external_void_func_a15i32_inreg.has_recursion, 1
        .set test_call_external_void_func_a15i32_inreg.has_indirect_call, 1
        .section        .AMDGPU.csdata,"",@progbits
; Function info:
; codeLenInByte = 192
; TotalNumSgprs: test_call_external_void_func_a15i32_inreg.numbered_sgpr+6
; NumVgprs: max(41, amdgpu.max_num_vgpr)
; ScratchSize: 16
; MemoryBound: 0
        .section        .AMDGPU.gpr_maximums,"",@progbits
        .set amdgpu.max_num_vgpr, 41
        .set amdgpu.max_num_agpr, 0
        .set amdgpu.max_num_sgpr, 44
        .section        .AMDGPU.csdata,"",@progbits
        .hidden external_void_func_a15i32_inreg
        .section        ".note.GNU-stack","",@progbits
        .amdgpu_metadata
---
amdhsa.kernels:  []
amdhsa.target:   amdgcn-amd-amdhsa--gfx900
amdhsa.version:
  - 1
  - 2
...

        .end_amdgpu_metadata

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions