From 1a06deeb14d7ba330f0c0770afeb70d8d5da6ee6 Mon Sep 17 00:00:00 2001 From: slaren <2141330+slaren@users.noreply.github.com> Date: Sun, 14 May 2023 21:37:48 +0200 Subject: [PATCH 1/2] benchmark-matmul: fix command line parsing, replace macros with functions, report results in GFLOPS --- examples/benchmark/benchmark-matmult.cpp | 57 +++++++++--------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 6117ae3abf877..27fd38cbdc4d3 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -15,7 +15,7 @@ #include #include -float tensor_sum_elements(struct ggml_tensor * tensor) { +float tensor_sum_elements(const ggml_tensor * tensor) { float sum = 0; if (tensor->type==GGML_TYPE_F32) { for (int j = 0; j < tensor->ne[1]; j++) { @@ -27,21 +27,13 @@ float tensor_sum_elements(struct ggml_tensor * tensor) { return sum; } - -/* - These are mapping to unknown - GGML_TYPE_I8, - GGML_TYPE_I16, - GGML_TYPE_I32, - GGML_TYPE_COUNT, -*/ - -#define TENSOR_TYPE_AS_STR(TYPE) TYPE == GGML_TYPE_F32 ? "FP32" : TYPE == GGML_TYPE_F16 ? "FP16" : TYPE == GGML_TYPE_Q4_0 ? "Q4_0" : TYPE == GGML_TYPE_Q4_1 ? "Q4_1" : "UNKNOWN" - -#define TENSOR_DUMP(TENSOR) printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", #TENSOR, \ - TENSOR->type,TENSOR_TYPE_AS_STR(TENSOR->type),\ - (int) TENSOR->ne[0], (int) TENSOR->ne[1], (int) TENSOR->ne[2], TENSOR->nb[0], TENSOR->nb[1], TENSOR->nb[2]); \ - { float sum = tensor_sum_elements(TENSOR); printf("Sum of tensor %s is %6.2f\n",#TENSOR, sum); } +void tensor_dump(const ggml_tensor * tensor) { + printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", "tensor", + tensor->type, ggml_type_name(tensor->type), + (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]); + float sum = tensor_sum_elements(tensor); + printf("Sum of tensor %s is %6.2f\n","tensor", sum); +} struct benchmark_params_struct { int32_t n_threads = 1; @@ -59,8 +51,6 @@ void print_usage(int /*argc*/, char ** argv, struct benchmark_params_struct para } int main(int argc, char ** argv) { - - struct benchmark_params_struct benchmark_params; bool invalid_param = false; @@ -84,11 +74,11 @@ int main(int argc, char ** argv) { print_usage(argc, argv, benchmark_params); exit(0); } - if (invalid_param) { - fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str()); - print_usage(argc, argv, benchmark_params); - exit(1); - } + } + if (invalid_param) { + fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str()); + print_usage(argc, argv, benchmark_params); + exit(1); } fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); @@ -165,12 +155,12 @@ int main(int argc, char ** argv) { gf.n_threads=benchmark_params.n_threads; printf("cgraph->n_threads=%i\n",gf.n_threads); - TENSOR_DUMP(m11); - TENSOR_DUMP(m2); + tensor_dump(m11); + tensor_dump(m2); ggml_graph_compute(ctx, &gf); - TENSOR_DUMP(gf.nodes[0]); + tensor_dump(gf.nodes[0]); printf("\n------ Test 2 - Matrix Mult via Q4_0 code ------------------------------------------------------------------------------\n"); @@ -216,9 +206,8 @@ int main(int argc, char ** argv) { // Let's use the F32 result from above as a reference for the q4_0 multiplication float sum_of_F32_reference = tensor_sum_elements(gf.nodes[0]); - - printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; FLOPS_per_u_Second\n"); - printf("==============================================================================================\n"); + printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; gigaFLOPS\n"); + printf("=====================================================================================\n"); for (int i=0;i Date: Sun, 14 May 2023 21:56:03 +0200 Subject: [PATCH 2/2] fix tensor names --- examples/benchmark/benchmark-matmult.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 27fd38cbdc4d3..7d237be02112b 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -27,14 +27,16 @@ float tensor_sum_elements(const ggml_tensor * tensor) { return sum; } -void tensor_dump(const ggml_tensor * tensor) { - printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", "tensor", +void tensor_dump(const ggml_tensor * tensor, const char * name) { + printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d, nb = (%5li, %5li, %5li) - ", name, tensor->type, ggml_type_name(tensor->type), (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]); float sum = tensor_sum_elements(tensor); - printf("Sum of tensor %s is %6.2f\n","tensor", sum); + printf("Sum of tensor %s is %6.2f\n", name, sum); } +#define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor) + struct benchmark_params_struct { int32_t n_threads = 1; int32_t n_iterations = 10; @@ -155,12 +157,12 @@ int main(int argc, char ** argv) { gf.n_threads=benchmark_params.n_threads; printf("cgraph->n_threads=%i\n",gf.n_threads); - tensor_dump(m11); - tensor_dump(m2); + TENSOR_DUMP(m11); + TENSOR_DUMP(m2); ggml_graph_compute(ctx, &gf); - tensor_dump(gf.nodes[0]); + TENSOR_DUMP(gf.nodes[0]); printf("\n------ Test 2 - Matrix Mult via Q4_0 code ------------------------------------------------------------------------------\n"); @@ -224,7 +226,7 @@ int main(int argc, char ** argv) { usec,gflops); #ifdef VERBOSE_DEBUGGING - tensor_dump("res",gf31.nodes[0]) + TENSOR_DUMP("res",gf31.nodes[0]) #endif // Check that the matrix multiplication result is in the right ballpark