Skip to content

Convert: mixed k-quant with legacy quant fallback #447

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ struct SDParams {
std::string stacked_id_embeddings_path;
std::string input_id_images_path;
sd_type_t wtype = SD_TYPE_COUNT;
sd_type_t ftype = SD_TYPE_COUNT;
std::string lora_model_dir;
std::string output_path = "output.png";
std::string input_path;
Expand Down Expand Up @@ -126,7 +127,8 @@ void print_params(SDParams params) {
printf(" n_threads: %d\n", params.n_threads);
printf(" mode: %s\n", modes_str[params.mode]);
printf(" model_path: %s\n", params.model_path.c_str());
printf(" wtype: %s\n", params.wtype < SD_TYPE_COUNT ? sd_type_name(params.wtype) : "unspecified");
printf(" wtype: %s\n", params.wtype < SD_TYPE_COUNT ? sd_type_name(params.wtype) : "unspecified");
printf(" fallback_type: %s\n", params.ftype < SD_TYPE_COUNT ? sd_type_name(params.ftype) : "unspecified");
printf(" clip_l_path: %s\n", params.clip_l_path.c_str());
printf(" clip_g_path: %s\n", params.clip_g_path.c_str());
printf(" t5xxl_path: %s\n", params.t5xxl_path.c_str());
Expand Down Expand Up @@ -190,6 +192,8 @@ void print_usage(int argc, const char* argv[]) {
printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n");
printf(" --type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_k, q3_k, q4_k)\n");
printf(" If not specified, the default is the type of the weight file\n");
printf(" --fallback-type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0) to be used as fallback for convert\n");
printf(" Used only if --type is q2_k, q3_k, or q4_k. The default is the type of the weight file\n");
printf(" --lora-model-dir [DIR] lora model directory\n");
printf(" -i, --init-img [IMAGE] path to the input image, required by img2img\n");
printf(" --control-image [IMAGE] path to image condition, control net\n");
Expand Down Expand Up @@ -355,6 +359,31 @@ void parse_args(int argc, const char** argv, SDParams& params) {
type.c_str());
exit(1);
}
} else if (arg == "--fallback-type") {
if (++i >= argc) {
invalid_arg = true;
break;
}
std::string type = argv[i];
if (type == "f32") {
params.ftype = SD_TYPE_F32;
} else if (type == "f16") {
params.ftype = SD_TYPE_F16;
} else if (type == "q4_0") {
params.ftype = SD_TYPE_Q4_0;
} else if (type == "q4_1") {
params.ftype = SD_TYPE_Q4_1;
} else if (type == "q5_0") {
params.ftype = SD_TYPE_Q5_0;
} else if (type == "q5_1") {
params.ftype = SD_TYPE_Q5_1;
} else if (type == "q8_0") {
params.ftype = SD_TYPE_Q8_0;
} else {
fprintf(stderr, "error: invalid fallback weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0]\n",
type.c_str());
exit(1);
}
} else if (arg == "--lora-model-dir") {
if (++i >= argc) {
invalid_arg = true;
Expand Down Expand Up @@ -694,7 +723,7 @@ int main(int argc, const char* argv[]) {
}

if (params.mode == CONVERT) {
bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype);
bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype, params.ftype);
if (!success) {
fprintf(stderr,
"convert '%s'/'%s' to '%s' failed\n",
Expand Down
48 changes: 34 additions & 14 deletions model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1758,9 +1758,7 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type) {
const std::string& name = tensor_storage.name;
if (type != GGML_TYPE_COUNT) {
if (ggml_is_quantized(type) && tensor_storage.ne[0] % ggml_blck_size(type) != 0) {
// Pass, do not convert
} else if (ends_with(name, ".bias")) {
if (ends_with(name, ".bias")) {
// Pass, do not convert
} else if (ends_with(name, ".scale")) {
// Pass, do not convert
Expand All @@ -1786,11 +1784,37 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
return false;
}

bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) {
bool ModelLoader::tensor_can_be_converted(const TensorStorage& tensor_storage, ggml_type type) {
return !ggml_is_quantized(type) || tensor_storage.ne[0] % ggml_blck_size(type) == 0;
}

void ModelLoader::tensor_set_type(ggml_type& tensor_type, const TensorStorage& tensor_storage, ggml_type type, ggml_type fallback_type) {
if (tensor_should_be_converted(tensor_storage, type)) {
if (tensor_can_be_converted(tensor_storage, type)) {
tensor_type = type;
} else {
if ((type == GGML_TYPE_Q2_K ||
type == GGML_TYPE_Q3_K ||
type == GGML_TYPE_Q4_K ||
type == GGML_TYPE_Q5_K ||
type == GGML_TYPE_Q6_K ||
type == GGML_TYPE_Q8_K) &&
fallback_type != GGML_TYPE_COUNT) {
// try use fallback quant instead of k quant
if (tensor_can_be_converted(tensor_storage, fallback_type)) {
// fallback works
tensor_type = fallback_type;
}
}
}
}
}

bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type, ggml_type fallback_type /*= GGML_TYPE_COUNT*/) {
auto backend = ggml_backend_cpu_init();
size_t mem_size = 1 * 1024 * 1024; // for padding
mem_size += tensor_storages.size() * ggml_tensor_overhead();
mem_size += get_params_mem_size(backend, type);
mem_size += get_params_mem_size(backend, type, fallback_type);
LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
ggml_context* ggml_ctx = ggml_init({mem_size, NULL, false});

Expand All @@ -1800,9 +1824,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
const std::string& name = tensor_storage.name;

ggml_type tensor_type = tensor_storage.type;
if (tensor_should_be_converted(tensor_storage, type)) {
tensor_type = type;
}
tensor_set_type(tensor_type, tensor_storage, type, fallback_type);

ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
if (tensor == NULL) {
Expand Down Expand Up @@ -1836,7 +1858,7 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
return success;
}

int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type) {
int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type, ggml_type fallback_type /*= GGML_TYPE_COUNT*/) {
size_t alignment = 128;
if (backend != NULL) {
alignment = ggml_backend_get_alignment(backend);
Expand All @@ -1851,16 +1873,14 @@ int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type)
}

for (auto& tensor_storage : processed_tensor_storages) {
if (tensor_should_be_converted(tensor_storage, type)) {
tensor_storage.type = type;
}
tensor_set_type(tensor_storage.type, tensor_storage, type, fallback_type);
mem_size += tensor_storage.nbytes() + alignment;
}

return mem_size;
}

bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type) {
bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type, sd_type_t fallback_type /*= SD_TYPE_COUNT*/) {
ModelLoader model_loader;

if (!model_loader.init_from_file(input_path)) {
Expand All @@ -1874,6 +1894,6 @@ bool convert(const char* input_path, const char* vae_path, const char* output_pa
return false;
}
}
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type);
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type, (ggml_type)fallback_type);
return success;
}
8 changes: 5 additions & 3 deletions model.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,14 @@ class ModelLoader {
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
ggml_backend_t backend,
std::set<std::string> ignore_tensors = {});
bool save_to_gguf_file(const std::string& file_path, ggml_type type);
bool save_to_gguf_file(const std::string& file_path, ggml_type type, ggml_type fallback_type = GGML_TYPE_COUNT);
bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type);
int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
bool tensor_can_be_converted(const TensorStorage& tensor_storage, ggml_type type);
int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT, ggml_type fallback_type = GGML_TYPE_COUNT);
~ModelLoader() = default;
void tensor_set_type(ggml_type& tensor_type, const TensorStorage& tensor_storage, ggml_type type = GGML_TYPE_COUNT, ggml_type fallback_type = GGML_TYPE_COUNT);

static std::string load_merges();
static std::string load_merges();
static std::string load_t5_tokenizer_json();
};

Expand Down
2 changes: 1 addition & 1 deletion stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);

SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);

SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, enum sd_type_t output_type);
SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, enum sd_type_t output_type, enum sd_type_t fallback_type = SD_TYPE_COUNT);

SD_API uint8_t* preprocess_canny(uint8_t* img,
int width,
Expand Down
Loading