File tree Expand file tree Collapse file tree 2 files changed +16
-0
lines changed Expand file tree Collapse file tree 2 files changed +16
-0
lines changed Original file line number Diff line number Diff line change @@ -370,6 +370,14 @@ extern "C" {
370
370
enum llama_ftype ftype; // quantize to this llama_ftype
371
371
enum ggml_type output_tensor_type; // output tensor type
372
372
enum ggml_type token_embedding_type; // token embeddings tensor type
373
+ enum ggml_type attn_q_type; // attention query tensor type
374
+ enum ggml_type attn_k_type; // attention key tensor type
375
+ enum ggml_type attn_v_type; // attention value tensor type
376
+ enum ggml_type attn_qkv_type; // attention query-key-value tensor type
377
+ enum ggml_type attn_output_type; // attention output tensor type
378
+ enum ggml_type ffn_gate_type; // feedforward network gate type
379
+ enum ggml_type ffn_down_type; // feedforward network down type
380
+ enum ggml_type ffn_up_type; // feedforward network up type
373
381
bool allow_requantize; // allow quantizing non-f32/f16 tensors
374
382
bool quantize_output_tensor; // quantize output.weight
375
383
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
Original file line number Diff line number Diff line change @@ -19981,6 +19981,14 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
19981
19981
/*.ftype =*/ LLAMA_FTYPE_MOSTLY_Q5_1,
19982
19982
/*.output_tensor_type =*/ GGML_TYPE_COUNT,
19983
19983
/*.token_embedding_type =*/ GGML_TYPE_COUNT,
19984
+ /*.attn_q_type =*/ GGML_TYPE_COUNT,
19985
+ /*.attn_k_type =*/ GGML_TYPE_COUNT,
19986
+ /*.attn_v_type =*/ GGML_TYPE_COUNT,
19987
+ /*.attn_qkv_type =*/ GGML_TYPE_COUNT,
19988
+ /*.attn_output_type =*/ GGML_TYPE_COUNT,
19989
+ /*.ffn_gate_type =*/ GGML_TYPE_COUNT,
19990
+ /*.ffn_down_type =*/ GGML_TYPE_COUNT,
19991
+ /*.ffn_up_type =*/ GGML_TYPE_COUNT,
19984
19992
/*.allow_requantize =*/ false,
19985
19993
/*.quantize_output_tensor =*/ true,
19986
19994
/*.only_copy =*/ false,
You can’t perform that action at this time.
0 commit comments