|
91 | 91 | #define LLAMA_ATTRIBUTE_FORMAT(...)
|
92 | 92 | #endif
|
93 | 93 |
|
| 94 | +#define LLAMA_MAX_NODES 4096 |
| 95 | + |
94 | 96 | //
|
95 | 97 | // logging
|
96 | 98 | //
|
@@ -3580,7 +3582,7 @@ struct llm_build_context {
|
3580 | 3582 | }
|
3581 | 3583 |
|
3582 | 3584 | struct ggml_cgraph * build_llama() {
|
3583 |
| - struct ggml_cgraph * gf = ggml_new_graph(ctx0); |
| 3585 | + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); |
3584 | 3586 |
|
3585 | 3587 | GGML_ASSERT(n_embd_head == hparams.n_rot);
|
3586 | 3588 |
|
@@ -3692,7 +3694,7 @@ struct llm_build_context {
|
3692 | 3694 | }
|
3693 | 3695 |
|
3694 | 3696 | struct ggml_cgraph * build_baichuan() {
|
3695 |
| - struct ggml_cgraph * gf = ggml_new_graph(ctx0); |
| 3697 | + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); |
3696 | 3698 |
|
3697 | 3699 | struct ggml_tensor * cur;
|
3698 | 3700 | struct ggml_tensor * inpL;
|
@@ -3812,7 +3814,7 @@ struct llm_build_context {
|
3812 | 3814 | }
|
3813 | 3815 |
|
3814 | 3816 | struct ggml_cgraph * build_falcon() {
|
3815 |
| - struct ggml_cgraph * gf = ggml_new_graph(ctx0); |
| 3817 | + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); |
3816 | 3818 |
|
3817 | 3819 | struct ggml_tensor * cur;
|
3818 | 3820 | struct ggml_tensor * inpL;
|
@@ -3934,7 +3936,7 @@ struct llm_build_context {
|
3934 | 3936 | }
|
3935 | 3937 |
|
3936 | 3938 | struct ggml_cgraph * build_starcoder() {
|
3937 |
| - struct ggml_cgraph * gf = ggml_new_graph(ctx0); |
| 3939 | + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); |
3938 | 3940 |
|
3939 | 3941 | struct ggml_tensor * cur;
|
3940 | 3942 | struct ggml_tensor * pos;
|
@@ -4033,7 +4035,7 @@ struct llm_build_context {
|
4033 | 4035 | }
|
4034 | 4036 |
|
4035 | 4037 | struct ggml_cgraph * build_persimmon() {
|
4036 |
| - struct ggml_cgraph * gf = ggml_new_graph(ctx0); |
| 4038 | + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); |
4037 | 4039 |
|
4038 | 4040 | const int64_t n_rot = n_embd_head / 2;
|
4039 | 4041 |
|
@@ -4243,7 +4245,7 @@ struct llm_build_context {
|
4243 | 4245 | }
|
4244 | 4246 |
|
4245 | 4247 | struct ggml_cgraph * build_refact() {
|
4246 |
| - struct ggml_cgraph * gf = ggml_new_graph(ctx0); |
| 4248 | + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); |
4247 | 4249 |
|
4248 | 4250 | struct ggml_tensor * cur;
|
4249 | 4251 | struct ggml_tensor * inpL;
|
@@ -4334,7 +4336,7 @@ struct llm_build_context {
|
4334 | 4336 | }
|
4335 | 4337 |
|
4336 | 4338 | struct ggml_cgraph * build_bloom() {
|
4337 |
| - struct ggml_cgraph * gf = ggml_new_graph(ctx0); |
| 4339 | + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); |
4338 | 4340 |
|
4339 | 4341 | struct ggml_tensor * cur;
|
4340 | 4342 | struct ggml_tensor * inpL;
|
@@ -4428,7 +4430,7 @@ struct llm_build_context {
|
4428 | 4430 | }
|
4429 | 4431 |
|
4430 | 4432 | struct ggml_cgraph * build_mpt() {
|
4431 |
| - struct ggml_cgraph * gf = ggml_new_graph(ctx0); |
| 4433 | + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); |
4432 | 4434 |
|
4433 | 4435 | struct ggml_tensor * cur;
|
4434 | 4436 | struct ggml_tensor * inpL;
|
@@ -8169,7 +8171,7 @@ struct llama_context * llama_new_context_with_model(
|
8169 | 8171 | {
|
8170 | 8172 | static const size_t tensor_alignment = 32;
|
8171 | 8173 | // the compute buffer is used to store the tensor and graph structs, while the allocator buffer is used for the tensor data
|
8172 |
| - ctx->buf_compute.resize(ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead()); |
| 8174 | + ctx->buf_compute.resize(ggml_tensor_overhead()*LLAMA_MAX_NODES + ggml_graph_overhead()); |
8173 | 8175 |
|
8174 | 8176 | // create measure allocator
|
8175 | 8177 | ctx->alloc = ggml_allocr_new_measure(tensor_alignment);
|
|
0 commit comments