Skip to content

Commit 7d5a8c1

Browse files
committed
ggml : reduce hash table reset cost
1 parent 01aec4a commit 7d5a8c1

File tree

7 files changed

+371
-263
lines changed

7 files changed

+371
-263
lines changed

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,9 +325,9 @@ ifdef LLAMA_DEBUG
325325
endif
326326
else
327327
MK_CPPFLAGS += -DNDEBUG
328-
MK_CFLAGS += -O3
329-
MK_CXXFLAGS += -O3
330-
MK_NVCCFLAGS += -O3
328+
MK_CFLAGS += -O3 -g
329+
MK_CXXFLAGS += -O3 -g
330+
MK_NVCCFLAGS += -O3 -g
331331
endif
332332

333333
ifdef LLAMA_SANITIZE_THREAD

ggml/include/ggml.h

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -254,18 +254,8 @@
254254

255255
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
256256

257-
#define GGML_ASSERT(x) \
258-
do { \
259-
if (!(x)) { \
260-
fflush(stdout); \
261-
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
262-
ggml_print_backtrace(); \
263-
abort(); \
264-
} \
265-
} while (0)
266-
267257
#ifndef NDEBUG
268-
#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
258+
#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
269259
#elif defined(__GNUC__)
270260
#define GGML_UNREACHABLE() __builtin_unreachable()
271261
#elif defined(_MSC_VER)
@@ -274,6 +264,16 @@
274264
#define GGML_UNREACHABLE() ((void) 0)
275265
#endif
276266

267+
#ifdef __cplusplus
268+
#define GGML_NORETURN [[noreturn]]
269+
#elif defined(_MSC_VER)
270+
#define GGML_NORETURN __declspec(noreturn)
271+
#else
272+
#define GGML_NORETURN _Noreturn
273+
#endif
274+
275+
#define GGML_ASSERT(x) if (!(x)) ggml_abort(__FILE__, __LINE__, #x)
276+
277277
// used to copy the number of elements and stride in bytes of tensors into local variables.
278278
// main purpose is to reduce code duplication and improve readability.
279279
//
@@ -322,6 +322,8 @@
322322
extern "C" {
323323
#endif
324324

325+
GGML_API GGML_NORETURN void ggml_abort(const char * file, int line, const char * expr);
326+
325327
enum ggml_status {
326328
GGML_STATUS_ALLOC_FAILED = -2,
327329
GGML_STATUS_FAILED = -1,
@@ -636,8 +638,11 @@ extern "C" {
636638
GGML_CGRAPH_EVAL_ORDER_COUNT
637639
};
638640

641+
typedef uint32_t ggml_bitset_t;
642+
639643
struct ggml_hash_set {
640644
size_t size;
645+
ggml_bitset_t * used;
641646
struct ggml_tensor ** keys;
642647
};
643648

@@ -651,7 +656,7 @@ extern "C" {
651656
struct ggml_tensor ** grads;
652657
struct ggml_tensor ** leafs;
653658

654-
struct ggml_hash_set visited_hash_table;
659+
struct ggml_hash_set visited_hash_set;
655660

656661
enum ggml_cgraph_eval_order order;
657662
};
@@ -698,8 +703,6 @@ extern "C" {
698703
GGML_API int64_t ggml_cycles(void);
699704
GGML_API int64_t ggml_cycles_per_ms(void);
700705

701-
GGML_API void ggml_print_backtrace(void);
702-
703706
// accepts a UTF-8 path, even on Windows
704707
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
705708

@@ -2005,8 +2008,8 @@ extern "C" {
20052008

20062009
// ggml_graph_plan() has to be called before ggml_graph_compute()
20072010
// when plan.work_size > 0, caller must allocate memory for plan.work_data
2008-
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
2009-
GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
2011+
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
2012+
GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
20102013
// same as ggml_graph_compute() but the work data is allocated as a part of the context
20112014
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
20122015
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);

ggml/src/ggml-alloc.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
443443
}
444444
}
445445

446-
free(galloc->hash_set.keys);
446+
ggml_hash_set_free(&galloc->hash_set);
447447
free(galloc->hash_values);
448448
free(galloc->bufts);
449449
free(galloc->buffers);
@@ -456,7 +456,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
456456
typedef struct ggml_gallocr * ggml_gallocr_t;
457457

458458
static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) {
459-
size_t i = ggml_hash_find_or_insert(galloc->hash_set, t);
459+
size_t i = ggml_hash_find_or_insert(&galloc->hash_set, t);
460460
return &galloc->hash_values[i];
461461
}
462462

@@ -565,8 +565,8 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {
565565

566566
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
567567
// clear hash tables
568-
memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
569-
memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
568+
ggml_hash_set_reset(&galloc->hash_set);
569+
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
570570

571571
// allocate leafs
572572
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
@@ -671,21 +671,19 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
671671
}
672672

673673
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
674-
size_t hash_size = graph->visited_hash_table.size;
674+
size_t min_hash_size = graph->n_nodes + graph->n_leafs;
675+
// add 25% margin to avoid hash collisions
676+
min_hash_size += min_hash_size / 4;
675677

676678
// initialize hash table
677-
if (galloc->hash_set.size < hash_size) {
678-
free(galloc->hash_set.keys);
679-
free(galloc->hash_values);
680-
galloc->hash_set.size = hash_size;
681-
galloc->hash_set.keys = calloc(hash_size, sizeof(struct ggml_tensor *));
682-
galloc->hash_values = calloc(hash_size, sizeof(struct hash_node));
679+
if (galloc->hash_set.size < min_hash_size) {
680+
ggml_hash_set_free(&galloc->hash_set);
681+
galloc->hash_set = ggml_hash_set_new(min_hash_size);
683682
GGML_ASSERT(galloc->hash_set.keys != NULL);
683+
684+
free(galloc->hash_values);
685+
galloc->hash_values = malloc(sizeof(struct hash_node) * galloc->hash_set.size);
684686
GGML_ASSERT(galloc->hash_values != NULL);
685-
} else {
686-
// reset hash table
687-
memset(galloc->hash_set.keys, 0, sizeof(struct ggml_tensor *) * galloc->hash_set.size);
688-
memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
689687
}
690688

691689
// reset allocators
@@ -817,7 +815,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
817815
}
818816

819817
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
820-
ggml_backend_buffer_type_t buft = talloc->buffer_id != -1 ? galloc->bufts[talloc->buffer_id] : NULL;
818+
ggml_backend_buffer_type_t buft = galloc->bufts[talloc->buffer_id];
821819
size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(buft, node);
822820
return talloc->size_max >= node_size;
823821
}

0 commit comments

Comments
 (0)