Skip to content

Commit d3e1886

Browse files
committed
ggml_cpy: use the work buffer instead of alloca when quantizing
1 parent 061f1ce commit d3e1886

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

ggml.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5466,8 +5466,7 @@ static void ggml_compute_forward_dup_f16(
54665466
size_t id = 0;
54675467
uint8_t * dst_ptr = (uint8_t *) dst->data;
54685468
size_t dst_row_size = nb0 * (ne00 / GGML_BLCK_SIZE[dst->type]);
5469-
// todo: use work buffer
5470-
float * src0_f32 = (float *) alloca(ne00 * sizeof(float));
5469+
float * src0_f32 = (float *) params->wdata;
54715470

54725471
for (int i03 = 0; i03 < ne03; i03++) {
54735472
for (int i02 = 0; i02 < ne02; i02++) {
@@ -10227,9 +10226,17 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
1022710226
struct ggml_tensor * node = cgraph->nodes[i];
1022810227

1022910228
switch (node->op) {
10229+
case GGML_OP_CPY:
1023010230
case GGML_OP_DUP:
1023110231
{
1023210232
node->n_tasks = 1;
10233+
10234+
size_t cur = 0;
10235+
if (node->type == GGML_TYPE_Q4_0 || node->type == GGML_TYPE_Q4_1) {
10236+
cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->ne[0];
10237+
}
10238+
10239+
work_size = MAX(work_size, cur);
1023310240
} break;
1023410241
case GGML_OP_ADD:
1023510242
{
@@ -10322,7 +10329,6 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
1032210329
{
1032310330
node->n_tasks = n_threads;
1032410331
} break;
10325-
case GGML_OP_CPY:
1032610332
case GGML_OP_CONT:
1032710333
case GGML_OP_RESHAPE:
1032810334
case GGML_OP_VIEW:

0 commit comments

Comments
 (0)