Skip to content

Vulkan backend fails to compile a number of shaders on Adreno #6395

@woachk

Description

@woachk

Hello,

Tried to run llama.cpp with Vulkan on Adreno 690 (Snapdragon 8cx Gen 3) on Windows 11 version 24H2 and this is what I get:

ggml_vk_create_pipeline(matmul_q4_k_f32_l, main, 3, 56, (128,128,1), specialization_constants, 128)
Thread 0, Frame 0:
vkCreateShaderModule(device, pCreateInfo, pAllocator, pShaderModule) returns VkResult VK_SUCCESS (0):
    device:                         VkDevice = 00000174669B6F50
    pCreateInfo:                    const VkShaderModuleCreateInfo* = 0000004041920B80:
        sType:                          VkStructureType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO (16)
        pNext:                          const void* = NULL
        flags:                          VkShaderModuleCreateFlags = 0
        codeSize:                       size_t = 12072
        pCode:                          const uint32_t* = SHADER DATA
    pAllocator:                     const VkAllocationCallbacks* = NULL
    pShaderModule:                  VkShaderModule* = 0000017466C7FBC0

Thread 0, Frame 0:
vkCreateDescriptorSetLayout(device, pCreateInfo, pAllocator, pSetLayout) returns VkResult VK_SUCCESS (0):
    device:                         VkDevice = 00000174669B6F50
    pCreateInfo:                    const VkDescriptorSetLayoutCreateInfo* = 0000004041920C48:
        sType:                          VkStructureType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO (32)
        pNext:                          const void* = VkDescriptorSetLayoutBindingFlagsCreateInfo
        flags:                          VkDescriptorSetLayoutCreateFlags = 0
        bindingCount:                   uint32_t = 3
        pBindings:                      const VkDescriptorSetLayoutBinding* = 000001745EDD4BD0
            pBindings[0]:                   const VkDescriptorSetLayoutBinding = 000001745EDD4BD0:
                binding:                        uint32_t = 0
                descriptorType:                 VkDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER (7)
                descriptorCount:                uint32_t = 1
                stageFlags:                     VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
                pImmutableSamplers:             const VkSampler* = UNUSED
            pBindings[1]:                   const VkDescriptorSetLayoutBinding = 000001745EDD4BE8:
                binding:                        uint32_t = 1
                descriptorType:                 VkDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER (7)
                descriptorCount:                uint32_t = 1
                stageFlags:                     VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
                pImmutableSamplers:             const VkSampler* = UNUSED
            pBindings[2]:                   const VkDescriptorSetLayoutBinding = 000001745EDD4C00:
                binding:                        uint32_t = 2
                descriptorType:                 VkDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER (7)
                descriptorCount:                uint32_t = 1
                stageFlags:                     VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
                pImmutableSamplers:             const VkSampler* = UNUSED
        pNext:                          VkDescriptorSetLayoutBindingFlagsCreateInfo = 0000004041920C08:
            sType:                          VkStructureType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO (1000161000)
            pNext:                          const void* = NULL
            bindingCount:                   uint32_t = 3
            pBindingFlags:                  const VkDescriptorBindingFlags* = 00000174667DE6F0
                pBindingFlags[0]:               const VkDescriptorBindingFlags = 0
                pBindingFlags[1]:               const VkDescriptorBindingFlags = 0
                pBindingFlags[2]:               const VkDescriptorBindingFlags = 0
    pAllocator:                     const VkAllocationCallbacks* = NULL
    pSetLayout:                     VkDescriptorSetLayout* = 0000017466A468F0

Thread 0, Frame 0:
vkCreatePipelineLayout(device, pCreateInfo, pAllocator, pPipelineLayout) returns VkResult VK_SUCCESS (0):
    device:                         VkDevice = 00000174669B6F50
    pCreateInfo:                    const VkPipelineLayoutCreateInfo* = 0000004041920D90:
        sType:                          VkStructureType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO (30)
        pNext:                          const void* = NULL
        flags:                          VkPipelineLayoutCreateFlags = 0
        setLayoutCount:                 uint32_t = 1
        pSetLayouts:                    const VkDescriptorSetLayout* = 0000017466B13170
            pSetLayouts[0]:                 const VkDescriptorSetLayout = 0000017466A468F0
        pushConstantRangeCount:         uint32_t = 1
        pPushConstantRanges:            const VkPushConstantRange* = 0000004041920C30
            pPushConstantRanges[0]:         const VkPushConstantRange = 0000004041920C30:
                stageFlags:                     VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
                offset:                         uint32_t = 0
                size:                           uint32_t = 56
    pAllocator:                     const VkAllocationCallbacks* = NULL
    pPipelineLayout:                VkPipelineLayout* = 0000017472C5FC60

Thread 0, Frame 0:
vkCreateComputePipelines(device, pipelineCache, createInfoCount, pCreateInfos, pAllocator, pPipelines) returns VkResult VK_ERROR_UNKNOWN (-13):
    device:                         VkDevice = 00000174669B6F50
    pipelineCache:                  VkPipelineCache = 0000000000000000
    createInfoCount:                uint32_t = 1
    pCreateInfos:                   const VkComputePipelineCreateInfo* = 0000004041920E60
        pCreateInfos[0]:                const VkComputePipelineCreateInfo = 0000004041920E60:
            sType:                          VkStructureType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO (29)
            pNext:                          const void* = NULL
            flags:                          VkPipelineCreateFlags = 0
            stage:                          VkPipelineShaderStageCreateInfo = 0000004041920E78:
                sType:                          VkStructureType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO (18)
                pNext:                          const void* = NULL
                flags:                          VkPipelineShaderStageCreateFlags = 0
                stage:                          VkShaderStageFlagBits = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
                module:                         VkShaderModule = 0000017466C7FBC0
                pName:                          const char* = "main"
                pSpecializationInfo:            const VkSpecializationInfo* = 0000004041920E00:
                    mapEntryCount:                  uint32_t = 10
                    pMapEntries:                    const VkSpecializationMapEntry* = 000001746699DEF0
                        pMapEntries[0]:                 const VkSpecializationMapEntry = 000001746699DEF0:
                            constantID:                     uint32_t = 0
                            offset:                         uint32_t = 0
                            size:                           size_t = 4
                        pMapEntries[1]:                 const VkSpecializationMapEntry = 000001746699DF00:
                            constantID:                     uint32_t = 1
                            offset:                         uint32_t = 4
                            size:                           size_t = 4
                        pMapEntries[2]:                 const VkSpecializationMapEntry = 000001746699DF10:
                            constantID:                     uint32_t = 2
                            offset:                         uint32_t = 8
                            size:                           size_t = 4
                        pMapEntries[3]:                 const VkSpecializationMapEntry = 000001746699DF20:
                            constantID:                     uint32_t = 3
                            offset:                         uint32_t = 12
                            size:                           size_t = 4
                        pMapEntries[4]:                 const VkSpecializationMapEntry = 000001746699DF30:
                            constantID:                     uint32_t = 4
                            offset:                         uint32_t = 16
                            size:                           size_t = 4
                        pMapEntries[5]:                 const VkSpecializationMapEntry = 000001746699DF40:
                            constantID:                     uint32_t = 5
                            offset:                         uint32_t = 20
                            size:                           size_t = 4
                        pMapEntries[6]:                 const VkSpecializationMapEntry = 000001746699DF50:
                            constantID:                     uint32_t = 6
                            offset:                         uint32_t = 24
                            size:                           size_t = 4
                        pMapEntries[7]:                 const VkSpecializationMapEntry = 000001746699DF60:
                            constantID:                     uint32_t = 7
                            offset:                         uint32_t = 28
                            size:                           size_t = 4
                        pMapEntries[8]:                 const VkSpecializationMapEntry = 000001746699DF70:
                            constantID:                     uint32_t = 8
                            offset:                         uint32_t = 32
                            size:                           size_t = 4
                        pMapEntries[9]:                 const VkSpecializationMapEntry = 000001746699DF80:
                            constantID:                     uint32_t = 9
                            offset:                         uint32_t = 36
                            size:                           size_t = 4
                    dataSize:                       size_t = 40
                    pData:                          const void* = 000001746531B410
            layout:                         VkPipelineLayout = 0000017472C5FC60
            basePipelineHandle:             VkPipeline = 0000000000000000
            basePipelineIndex:              int32_t = 0
    pAllocator:                     const VkAllocationCallbacks* = NULL
    pPipelines:                     VkPipeline* = 0000004041920AC8
        pPipelines[0]:                  VkPipeline = 0000000000000000

When uncommenting shaders it turned out that the problematic ones also included dequant_q4_0 among other ones.

This is bug #5739 on Android.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions