Skip to content

Commit e3ec684

Browse files
committed
reinsert cqs
1 parent d48aad3 commit e3ec684

File tree

2 files changed

+16
-0
lines changed

2 files changed

+16
-0
lines changed

include/llama.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,14 @@ extern "C" {
370370
enum llama_ftype ftype; // quantize to this llama_ftype
371371
enum ggml_type output_tensor_type; // output tensor type
372372
enum ggml_type token_embedding_type; // token embeddings tensor type
373+
enum ggml_type attn_q_type; // attention query tensor type
374+
enum ggml_type attn_k_type; // attention key tensor type
375+
enum ggml_type attn_v_type; // attention value tensor type
376+
enum ggml_type attn_qkv_type; // attention query-key-value tensor type
377+
enum ggml_type attn_output_type; // attention output tensor type
378+
enum ggml_type ffn_gate_type; // feedforward network gate type
379+
enum ggml_type ffn_down_type; // feedforward network down type
380+
enum ggml_type ffn_up_type; // feedforward network up type
373381
bool allow_requantize; // allow quantizing non-f32/f16 tensors
374382
bool quantize_output_tensor; // quantize output.weight
375383
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored

src/llama.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19981,6 +19981,14 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
1998119981
/*.ftype =*/ LLAMA_FTYPE_MOSTLY_Q5_1,
1998219982
/*.output_tensor_type =*/ GGML_TYPE_COUNT,
1998319983
/*.token_embedding_type =*/ GGML_TYPE_COUNT,
19984+
/*.attn_q_type =*/ GGML_TYPE_COUNT,
19985+
/*.attn_k_type =*/ GGML_TYPE_COUNT,
19986+
/*.attn_v_type =*/ GGML_TYPE_COUNT,
19987+
/*.attn_qkv_type =*/ GGML_TYPE_COUNT,
19988+
/*.attn_output_type =*/ GGML_TYPE_COUNT,
19989+
/*.ffn_gate_type =*/ GGML_TYPE_COUNT,
19990+
/*.ffn_down_type =*/ GGML_TYPE_COUNT,
19991+
/*.ffn_up_type =*/ GGML_TYPE_COUNT,
1998419992
/*.allow_requantize =*/ false,
1998519993
/*.quantize_output_tensor =*/ true,
1998619994
/*.only_copy =*/ false,

0 commit comments

Comments
 (0)