File tree 1 file changed +9
-3
lines changed 1 file changed +9
-3
lines changed Original file line number Diff line number Diff line change @@ -5466,8 +5466,7 @@ static void ggml_compute_forward_dup_f16(
5466
5466
size_t id = 0 ;
5467
5467
uint8_t * dst_ptr = (uint8_t * ) dst -> data ;
5468
5468
size_t dst_row_size = nb0 * (ne00 / GGML_BLCK_SIZE [dst -> type ]);
5469
- // todo: use work buffer
5470
- float * src0_f32 = (float * ) alloca (ne00 * sizeof (float ));
5469
+ float * src0_f32 = (float * ) params -> wdata ;
5471
5470
5472
5471
for (int i03 = 0 ; i03 < ne03 ; i03 ++ ) {
5473
5472
for (int i02 = 0 ; i02 < ne02 ; i02 ++ ) {
@@ -10227,9 +10226,17 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
10227
10226
struct ggml_tensor * node = cgraph -> nodes [i ];
10228
10227
10229
10228
switch (node -> op ) {
10229
+ case GGML_OP_CPY :
10230
10230
case GGML_OP_DUP :
10231
10231
{
10232
10232
node -> n_tasks = 1 ;
10233
+
10234
+ size_t cur = 0 ;
10235
+ if (node -> type == GGML_TYPE_Q4_0 || node -> type == GGML_TYPE_Q4_1 ) {
10236
+ cur = GGML_TYPE_SIZE [GGML_TYPE_F32 ] * node -> ne [0 ];
10237
+ }
10238
+
10239
+ work_size = MAX (work_size , cur );
10233
10240
} break ;
10234
10241
case GGML_OP_ADD :
10235
10242
{
@@ -10322,7 +10329,6 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
10322
10329
{
10323
10330
node -> n_tasks = n_threads ;
10324
10331
} break ;
10325
- case GGML_OP_CPY :
10326
10332
case GGML_OP_CONT :
10327
10333
case GGML_OP_RESHAPE :
10328
10334
case GGML_OP_VIEW :
You can’t perform that action at this time.
0 commit comments