-
Notifications
You must be signed in to change notification settings - Fork 13.1k
Closed
Labels
Description
Hello,
Tried to run llama.cpp with Vulkan on Adreno 690 (Snapdragon 8cx Gen 3) on Windows 11 version 24H2 and this is what I get:
ggml_vk_create_pipeline(matmul_q4_k_f32_l, main, 3, 56, (128,128,1), specialization_constants, 128)
Thread 0, Frame 0:
vkCreateShaderModule(device, pCreateInfo, pAllocator, pShaderModule) returns VkResult VK_SUCCESS (0):
device: VkDevice = 00000174669B6F50
pCreateInfo: const VkShaderModuleCreateInfo* = 0000004041920B80:
sType: VkStructureType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO (16)
pNext: const void* = NULL
flags: VkShaderModuleCreateFlags = 0
codeSize: size_t = 12072
pCode: const uint32_t* = SHADER DATA
pAllocator: const VkAllocationCallbacks* = NULL
pShaderModule: VkShaderModule* = 0000017466C7FBC0
Thread 0, Frame 0:
vkCreateDescriptorSetLayout(device, pCreateInfo, pAllocator, pSetLayout) returns VkResult VK_SUCCESS (0):
device: VkDevice = 00000174669B6F50
pCreateInfo: const VkDescriptorSetLayoutCreateInfo* = 0000004041920C48:
sType: VkStructureType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO (32)
pNext: const void* = VkDescriptorSetLayoutBindingFlagsCreateInfo
flags: VkDescriptorSetLayoutCreateFlags = 0
bindingCount: uint32_t = 3
pBindings: const VkDescriptorSetLayoutBinding* = 000001745EDD4BD0
pBindings[0]: const VkDescriptorSetLayoutBinding = 000001745EDD4BD0:
binding: uint32_t = 0
descriptorType: VkDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER (7)
descriptorCount: uint32_t = 1
stageFlags: VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
pImmutableSamplers: const VkSampler* = UNUSED
pBindings[1]: const VkDescriptorSetLayoutBinding = 000001745EDD4BE8:
binding: uint32_t = 1
descriptorType: VkDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER (7)
descriptorCount: uint32_t = 1
stageFlags: VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
pImmutableSamplers: const VkSampler* = UNUSED
pBindings[2]: const VkDescriptorSetLayoutBinding = 000001745EDD4C00:
binding: uint32_t = 2
descriptorType: VkDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER (7)
descriptorCount: uint32_t = 1
stageFlags: VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
pImmutableSamplers: const VkSampler* = UNUSED
pNext: VkDescriptorSetLayoutBindingFlagsCreateInfo = 0000004041920C08:
sType: VkStructureType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO (1000161000)
pNext: const void* = NULL
bindingCount: uint32_t = 3
pBindingFlags: const VkDescriptorBindingFlags* = 00000174667DE6F0
pBindingFlags[0]: const VkDescriptorBindingFlags = 0
pBindingFlags[1]: const VkDescriptorBindingFlags = 0
pBindingFlags[2]: const VkDescriptorBindingFlags = 0
pAllocator: const VkAllocationCallbacks* = NULL
pSetLayout: VkDescriptorSetLayout* = 0000017466A468F0
Thread 0, Frame 0:
vkCreatePipelineLayout(device, pCreateInfo, pAllocator, pPipelineLayout) returns VkResult VK_SUCCESS (0):
device: VkDevice = 00000174669B6F50
pCreateInfo: const VkPipelineLayoutCreateInfo* = 0000004041920D90:
sType: VkStructureType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO (30)
pNext: const void* = NULL
flags: VkPipelineLayoutCreateFlags = 0
setLayoutCount: uint32_t = 1
pSetLayouts: const VkDescriptorSetLayout* = 0000017466B13170
pSetLayouts[0]: const VkDescriptorSetLayout = 0000017466A468F0
pushConstantRangeCount: uint32_t = 1
pPushConstantRanges: const VkPushConstantRange* = 0000004041920C30
pPushConstantRanges[0]: const VkPushConstantRange = 0000004041920C30:
stageFlags: VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
offset: uint32_t = 0
size: uint32_t = 56
pAllocator: const VkAllocationCallbacks* = NULL
pPipelineLayout: VkPipelineLayout* = 0000017472C5FC60
Thread 0, Frame 0:
vkCreateComputePipelines(device, pipelineCache, createInfoCount, pCreateInfos, pAllocator, pPipelines) returns VkResult VK_ERROR_UNKNOWN (-13):
device: VkDevice = 00000174669B6F50
pipelineCache: VkPipelineCache = 0000000000000000
createInfoCount: uint32_t = 1
pCreateInfos: const VkComputePipelineCreateInfo* = 0000004041920E60
pCreateInfos[0]: const VkComputePipelineCreateInfo = 0000004041920E60:
sType: VkStructureType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO (29)
pNext: const void* = NULL
flags: VkPipelineCreateFlags = 0
stage: VkPipelineShaderStageCreateInfo = 0000004041920E78:
sType: VkStructureType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO (18)
pNext: const void* = NULL
flags: VkPipelineShaderStageCreateFlags = 0
stage: VkShaderStageFlagBits = 32 (VK_SHADER_STAGE_COMPUTE_BIT)
module: VkShaderModule = 0000017466C7FBC0
pName: const char* = "main"
pSpecializationInfo: const VkSpecializationInfo* = 0000004041920E00:
mapEntryCount: uint32_t = 10
pMapEntries: const VkSpecializationMapEntry* = 000001746699DEF0
pMapEntries[0]: const VkSpecializationMapEntry = 000001746699DEF0:
constantID: uint32_t = 0
offset: uint32_t = 0
size: size_t = 4
pMapEntries[1]: const VkSpecializationMapEntry = 000001746699DF00:
constantID: uint32_t = 1
offset: uint32_t = 4
size: size_t = 4
pMapEntries[2]: const VkSpecializationMapEntry = 000001746699DF10:
constantID: uint32_t = 2
offset: uint32_t = 8
size: size_t = 4
pMapEntries[3]: const VkSpecializationMapEntry = 000001746699DF20:
constantID: uint32_t = 3
offset: uint32_t = 12
size: size_t = 4
pMapEntries[4]: const VkSpecializationMapEntry = 000001746699DF30:
constantID: uint32_t = 4
offset: uint32_t = 16
size: size_t = 4
pMapEntries[5]: const VkSpecializationMapEntry = 000001746699DF40:
constantID: uint32_t = 5
offset: uint32_t = 20
size: size_t = 4
pMapEntries[6]: const VkSpecializationMapEntry = 000001746699DF50:
constantID: uint32_t = 6
offset: uint32_t = 24
size: size_t = 4
pMapEntries[7]: const VkSpecializationMapEntry = 000001746699DF60:
constantID: uint32_t = 7
offset: uint32_t = 28
size: size_t = 4
pMapEntries[8]: const VkSpecializationMapEntry = 000001746699DF70:
constantID: uint32_t = 8
offset: uint32_t = 32
size: size_t = 4
pMapEntries[9]: const VkSpecializationMapEntry = 000001746699DF80:
constantID: uint32_t = 9
offset: uint32_t = 36
size: size_t = 4
dataSize: size_t = 40
pData: const void* = 000001746531B410
layout: VkPipelineLayout = 0000017472C5FC60
basePipelineHandle: VkPipeline = 0000000000000000
basePipelineIndex: int32_t = 0
pAllocator: const VkAllocationCallbacks* = NULL
pPipelines: VkPipeline* = 0000004041920AC8
pPipelines[0]: VkPipeline = 0000000000000000
When uncommenting shaders it turned out that the problematic ones also included dequant_q4_0
among other ones.
This is bug #5739 on Android.
teleprint-me