Skip to content

Improve graph build time #2329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 32 additions & 11 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -15742,6 +15742,34 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
}
}

static_assert(GGML_GRAPH_HASHTABLE_SIZE > GGML_MAX_NODES * 2, "GGML_GRAPH_HT_SIZE is too small");

static size_t hash(void * p) {
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
}

static bool hash_insert(void * hash_table[], void * p) {
size_t h = hash(p);

// linear probing
size_t i = h;
while (hash_table[i] != NULL && hash_table[i] != p) {
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
if (i == h) {
// hash table is full
GGML_ASSERT(false);
}
}

if (hash_table[i] == p) {
return true;
}

// insert
hash_table[i] = p;
return false;
}

static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
if (node->grad == NULL) {
// this usually happens when we generate intermediate nodes from constants in the backward pass
Expand All @@ -15752,16 +15780,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
}

// check if already visited
for (int i = 0; i < cgraph->n_nodes; i++) {
if (cgraph->nodes[i] == node) {
return;
}
}

for (int i = 0; i < cgraph->n_leafs; i++) {
if (cgraph->leafs[i] == node) {
return;
}
if (hash_insert(cgraph->visited_hash_table, node)) {
return;
}

for (int i = 0; i < GGML_MAX_SRC; ++i) {
Expand Down Expand Up @@ -15824,6 +15844,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
/*.nodes =*/ { NULL },
/*.grads =*/ { NULL },
/*.leafs =*/ { NULL },
/*.hash_table =*/ { NULL },
/*.perf_runs =*/ 0,
/*.perf_cycles =*/ 0,
/*.perf_time_us =*/ 0,
Expand Down Expand Up @@ -15865,7 +15886,7 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg

if (node->is_param) {
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
ggml_build_forward_impl(&result, node->grad, true);
ggml_build_forward_expand(&result, node->grad);
}
}

Expand Down
9 changes: 8 additions & 1 deletion ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ extern "C" {

void * extra; // extra things e.g. for ggml-cuda.cu

char padding[8];
char padding[4];
};

static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
Expand All @@ -459,6 +459,11 @@ extern "C" {
void * abort_callback_data;
};

// next prime after GGML_MAX_NODES
// #define GGML_GRAPH_HASHTABLE_SIZE 4099
// next prime after GGML_MAX_NODES * 2 (nodes + leafs)
#define GGML_GRAPH_HASHTABLE_SIZE 8273

// computation graph
struct ggml_cgraph {
int n_nodes;
Expand All @@ -468,6 +473,8 @@ extern "C" {
struct ggml_tensor * grads[GGML_MAX_NODES];
struct ggml_tensor * leafs[GGML_MAX_NODES];

void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];

// performance
int perf_runs;
int64_t perf_cycles;
Expand Down
2 changes: 2 additions & 0 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1704,6 +1704,8 @@ static bool llama_eval_internal(
// run the computation
ggml_build_forward_expand(&gf, cur);

// fprintf(stderr, "graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf.n_nodes, gf.n_leafs);

#if GGML_USE_MPI
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, &gf, n_layer);
#endif
Expand Down