Skip to content

Expose type name from ggml #970

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions examples/quantize-stats/quantize-stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
#include <unordered_map>
#include <vector>

static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32" };
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just in case anyone wonders, this ordering was a bug I introduced in #709. Thanks to designated initalizers being allowed in C11, this should not happen in the future.

static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");

struct quantize_stats_params {
std::string model = "models/7B/ggml-model-f16.bin";
bool verbose = false;
Expand Down Expand Up @@ -224,7 +221,7 @@ int main(int argc, char ** argv) {
break;
}
int j;
for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], type_strs[j]) != 0; j++) {
for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], ggml_type_name((ggml_type) i)) != 0; j++) {
// find match
}
if (j < GGML_TYPE_COUNT) {
Expand Down Expand Up @@ -279,7 +276,7 @@ int main(int argc, char ** argv) {
continue;
}
if (params.verbose) {
printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), type_strs[kv_tensor.second->type], ggml_nelements(kv_tensor.second));
printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), ggml_type_name(kv_tensor.second->type), ggml_nelements(kv_tensor.second));
}
if (kv_tensor.second->type == GGML_TYPE_F16) {
is_f16 = true;
Expand All @@ -304,13 +301,14 @@ int main(int argc, char ** argv) {

// loop throught quantization types
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
const ggml_type type = (ggml_type) i;
if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {
continue;
}
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
if (params.verbose) {
printf("testing %s ...\n", type_strs[i]);
printf("testing %s ...\n", ggml_type_name(type));
}

error_stats global_stats {};
Expand All @@ -322,7 +320,7 @@ int main(int argc, char ** argv) {
if (params.verbose) {
printf(" %s ...\n", kv_tensor.first.c_str());
}
std::string layer_name { type_strs[i] };
std::string layer_name { ggml_type_name(type) };
layer_name += "::" + kv_tensor.first;
test_roundtrip_on_layer(
layer_name,
Expand All @@ -337,7 +335,7 @@ int main(int argc, char ** argv) {
);
}

print_error_stats(type_strs[i], global_stats, params.print_histogram);
print_error_stats(ggml_type_name(type), global_stats, params.print_histogram);
}
}

Expand Down
17 changes: 17 additions & 0 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -2671,6 +2671,18 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
};
static_assert(GGML_TYPE_COUNT == 7, "GGML_TYPE_SIZE is outdated");


static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
[GGML_TYPE_F32] = "f32",
[GGML_TYPE_F16] = "f16",
[GGML_TYPE_Q4_0] = "q4_0",
[GGML_TYPE_Q4_1] = "q4_1",
[GGML_TYPE_I8] = "i8",
[GGML_TYPE_I16] = "i16",
[GGML_TYPE_I32] = "i32",
};
static_assert(GGML_TYPE_COUNT == 7, "GGML_TYPE_NAME is outdated");

static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
"NONE",

Expand Down Expand Up @@ -2895,6 +2907,11 @@ float ggml_type_sizef(enum ggml_type type) {
return ((float)(GGML_TYPE_SIZE[type]))/GGML_BLCK_SIZE[type];
}

const char * ggml_type_name(enum ggml_type type) {
return GGML_TYPE_NAME[type];
}


size_t ggml_element_size(const struct ggml_tensor * tensor) {
return GGML_TYPE_SIZE[tensor->type];
}
Expand Down
2 changes: 2 additions & 0 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ int ggml_blck_size (enum ggml_type type);
size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float

const char * ggml_type_name(enum ggml_type type);

size_t ggml_element_size(const struct ggml_tensor * tensor);

struct ggml_context * ggml_init(struct ggml_init_params params);
Expand Down
14 changes: 2 additions & 12 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,16 +269,6 @@ static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
return ret;
}

static const char * llama_format_type(enum ggml_type type) {
switch (type) {
case GGML_TYPE_F32: return "f32";
case GGML_TYPE_F16: return "f16";
case GGML_TYPE_Q4_0: return "q4_0";
case GGML_TYPE_Q4_1: return "q4_1";
default: LLAMA_ASSERT(false);
}
}

static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml_type type) {
size_t size = ggml_type_size(type);
for (uint32_t dim : ne) {
Expand Down Expand Up @@ -1582,7 +1572,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
printf("[%zu/%zu] %36s - %s, type = %6s, ",
++idx, model_loader->tensors_map.tensors.size(),
tensor.name.c_str(), llama_format_tensor_shape(tensor.ne).c_str(),
llama_format_type(tensor.type));
ggml_type_name(tensor.type));

// This used to be a regex, but <regex> has an extreme cost to compile times.
bool quantize = tensor.name.rfind("weight") == tensor.name.size() - 6; // ends with 'weight'?
Expand Down Expand Up @@ -1615,7 +1605,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
f32_data[i] = ggml_fp16_to_fp32(f16_data[i]);
}
} else {
throw format("type %s unsupported for integer quantization", llama_format_type(tensor.type));
throw format("type %s unsupported for integer quantization", ggml_type_name(tensor.type));
}

printf("quantizing .. ");
Expand Down