Skip to content

vulkan: define all quant data structures in types.comp #10440

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 76 additions & 71 deletions ggml/src/ggml-vulkan/vulkan-shaders/types.comp
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@
#endif
#endif

#if defined(DATA_A_Q4_0)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 32
#define QUANT_R 2
#define QUANT_K_Q4_0 32
#define QUANT_R_Q4_0 2

struct block_q4_0
{
Expand All @@ -46,14 +44,15 @@ struct block_q4_0_packed16
uint16_t qs[16/2];
};

#if defined(DATA_A_Q4_0)
#define QUANT_K QUANT_K_Q4_0
#define QUANT_R QUANT_R_Q4_0
#define A_TYPE block_q4_0
#define A_TYPE_PACKED16 block_q4_0_packed16
#endif

#if defined(DATA_A_Q4_1)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 32
#define QUANT_R 2
#define QUANT_K_Q4_1 32
#define QUANT_R_Q4_1 2

struct block_q4_1
{
Expand All @@ -69,15 +68,15 @@ struct block_q4_1_packed16
uint16_t qs[16/2];
};

#if defined(DATA_A_Q4_1)
#define QUANT_K QUANT_K_Q4_1
#define QUANT_R QUANT_R_Q4_1
#define A_TYPE block_q4_1
#define A_TYPE_PACKED16 block_q4_1_packed16
#endif

#if defined(DATA_A_Q5_0)
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#define QUANT_K 32
#define QUANT_R 2
#define QUANT_K_Q5_0 32
#define QUANT_R_Q5_0 2

struct block_q5_0
{
Expand All @@ -93,15 +92,15 @@ struct block_q5_0_packed16
uint16_t qs[16/2];
};

#if defined(DATA_A_Q5_0)
#define QUANT_K QUANT_K_Q5_0
#define QUANT_R QUANT_R_Q5_0
#define A_TYPE block_q5_0
#define A_TYPE_PACKED16 block_q5_0_packed16
#endif

#if defined(DATA_A_Q5_1)
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#define QUANT_K 32
#define QUANT_R 2
#define QUANT_K_Q5_1 32
#define QUANT_R_Q5_1 2

struct block_q5_1
{
Expand All @@ -119,14 +118,15 @@ struct block_q5_1_packed16
uint16_t qs[16/2];
};

#if defined(DATA_A_Q5_1)
#define QUANT_K QUANT_K_Q5_1
#define QUANT_R QUANT_R_Q5_1
#define A_TYPE block_q5_1
#define A_TYPE_PACKED16 block_q5_1_packed16
#endif

#if defined(DATA_A_Q8_0)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 32
#define QUANT_R 1
#define QUANT_K_Q8_0 32
#define QUANT_R_Q8_0 1

struct block_q8_0
{
Expand All @@ -139,164 +139,164 @@ struct block_q8_0_packed16
uint16_t qs[32/2];
};

#if defined(DATA_A_Q8_0)
#define QUANT_K QUANT_K_Q8_0
#define QUANT_R QUANT_R_Q8_0
#define A_TYPE block_q8_0
#define A_TYPE_PACKED16 block_q8_0_packed16
#endif

// K-quants
#if defined(DATA_A_Q2_K)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 256
#define QUANT_K_Q2_K 256

struct block_q2_K
{
uint8_t scales[QUANT_K/16];
uint8_t qs[QUANT_K/4];
uint8_t scales[QUANT_K_Q2_K/16];
uint8_t qs[QUANT_K_Q2_K/4];
f16vec2 d;
};

struct block_q2_K_packed16
{
uint16_t scales[QUANT_K/16/2];
uint16_t qs[QUANT_K/4/2];
uint16_t scales[QUANT_K_Q2_K/16/2];
uint16_t qs[QUANT_K_Q2_K/4/2];
f16vec2 d;
};

struct block_q2_K_packed32
{
uint32_t scales[QUANT_K/16/4];
uint32_t qs[QUANT_K/4/4];
uint32_t scales[QUANT_K_Q2_K/16/4];
uint32_t qs[QUANT_K_Q2_K/4/4];
f16vec2 d;
};

#if defined(DATA_A_Q2_K)
#define QUANT_K QUANT_K_Q2_K
#define A_TYPE block_q2_K
#define A_TYPE_PACKED16 block_q2_K_packed16
#define A_TYPE_PACKED32 block_q2_K_packed32
#endif

#if defined(DATA_A_Q3_K)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 256
#define QUANT_K_Q3_K 256

struct block_q3_K
{
uint8_t hmask[QUANT_K/8];
uint8_t qs[QUANT_K/4];
uint8_t hmask[QUANT_K_Q3_K/8];
uint8_t qs[QUANT_K_Q3_K/4];
uint8_t scales[12];
float16_t d;
};

struct block_q3_K_packed16
{
uint16_t hmask[QUANT_K/8/2];
uint16_t qs[QUANT_K/4/2];
uint16_t hmask[QUANT_K_Q3_K/8/2];
uint16_t qs[QUANT_K_Q3_K/4/2];
uint16_t scales[12/2];
float16_t d;
};

#if defined(DATA_A_Q3_K)
#define QUANT_K QUANT_K_Q3_K
#define A_TYPE block_q3_K
#define A_TYPE_PACKED16 block_q3_K_packed16
#endif

#if defined(DATA_A_Q4_K)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 256
#define QUANT_K_Q4_K 256

struct block_q4_K
{
f16vec2 d;
uint8_t scales[3*QUANT_K/64];
uint8_t qs[QUANT_K/2];
uint8_t scales[3*QUANT_K_Q4_K/64];
uint8_t qs[QUANT_K_Q4_K/2];
};

struct block_q4_K_packed16
{
f16vec2 d;
uint16_t scales[3*QUANT_K/64/2];
uint16_t qs[QUANT_K/2/2];
uint16_t scales[3*QUANT_K_Q4_K/64/2];
uint16_t qs[QUANT_K_Q4_K/2/2];
};

struct block_q4_K_packed32
{
f16vec2 d;
uint32_t scales[3*QUANT_K/64/4];
uint32_t qs[QUANT_K/2/4];
uint32_t scales[3*QUANT_K_Q4_K/64/4];
uint32_t qs[QUANT_K_Q4_K/2/4];
};

#if defined(DATA_A_Q4_K)
#define QUANT_K QUANT_K_Q4_K
#define A_TYPE block_q4_K
#define A_TYPE_PACKED16 block_q4_K_packed16
#define A_TYPE_PACKED32 block_q4_K_packed32
#endif

#if defined(DATA_A_Q5_K)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 256
#define QUANT_K_Q5_K 256

struct block_q5_K
{
f16vec2 d;
uint8_t scales[12];
uint8_t qh[QUANT_K/8];
uint8_t qs[QUANT_K/2];
uint8_t qh[QUANT_K_Q5_K/8];
uint8_t qs[QUANT_K_Q5_K/2];
};

struct block_q5_K_packed16
{
f16vec2 d;
uint16_t scales[12/2];
uint16_t qh[QUANT_K/8/2];
uint16_t qs[QUANT_K/2/2];
uint16_t qh[QUANT_K_Q5_K/8/2];
uint16_t qs[QUANT_K_Q5_K/2/2];
};

#if defined(DATA_A_Q5_K)
#define QUANT_K QUANT_K_Q5_K
#define A_TYPE block_q5_K
#define A_TYPE_PACKED16 block_q5_K_packed16
#endif

#if defined(DATA_A_Q6_K)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 256
#define QUANT_K_Q6_K 256

struct block_q6_K
{
uint8_t ql[QUANT_K/2];
uint8_t qh[QUANT_K/4];
int8_t scales[QUANT_K/16];
uint8_t ql[QUANT_K_Q6_K/2];
uint8_t qh[QUANT_K_Q6_K/4];
int8_t scales[QUANT_K_Q6_K/16];
float16_t d;
};

struct block_q6_K_packed16
{
uint16_t ql[QUANT_K/2/2];
uint16_t qh[QUANT_K/4/2];
int8_t scales[QUANT_K/16];
uint16_t ql[QUANT_K_Q6_K/2/2];
uint16_t qh[QUANT_K_Q6_K/4/2];
int8_t scales[QUANT_K_Q6_K/16];
float16_t d;
};

#if defined(DATA_A_Q6_K)
#define QUANT_K QUANT_K_Q6_K
#define A_TYPE block_q6_K
#define A_TYPE_PACKED16 block_q6_K_packed16
#endif

// IQuants

#if defined(DATA_A_IQ4_NL)
#extension GL_EXT_shader_16bit_storage : require
#define QUANT_K 32
#define QUANT_R 2
#define QUANT_K_IQ4_NL 32
#define QUANT_R_IQ4_NL 2

struct block_iq4_nl
{
float16_t d;
uint8_t qs[QUANT_K/2];
uint8_t qs[QUANT_K_IQ4_NL/2];
};

struct block_iq4_nl_packed16
{
float16_t d;
uint16_t qs[QUANT_K/2/2];
uint16_t qs[QUANT_K_IQ4_NL/2/2];
};

#define A_TYPE block_iq4_nl
#define A_TYPE_PACKED16 block_iq4_nl_packed16
#if defined(DATA_A_IQ4_NL)

const int8_t kvalues_iq4nl_const[16] = {
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
Expand All @@ -313,6 +313,11 @@ void init_iq4nl_shmem()
}
barrier();
}

#define QUANT_K QUANT_K_IQ4_NL
#define QUANT_R QUANT_R_IQ4_NL
#define A_TYPE block_iq4_nl
#define A_TYPE_PACKED16 block_iq4_nl_packed16
#endif

#endif // !defined(GGML_TYPES_COMP)
Loading