File tree 1 file changed +3
-2
lines changed 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change @@ -1479,11 +1479,12 @@ static void ggml_cuda_op_mul_mat(
1479
1479
if (src0_is_contiguous) {
1480
1480
dev[id].src0_dd = split ? (char *) src0_extra->data_device [id] : (char *) src0->data ;
1481
1481
} else {
1482
- // If src0 is not contiguous it will be copied to a temporary buffer, it may then be necessary to clear padding.
1482
+ // If src0 is not contiguous it will be copied to a temporary buffer.
1483
+ // This buffer needs to be cleared entirely because multiple regions will function as padding.
1483
1484
const size_t nbytes_data = ggml_nbytes (src0);
1484
1485
const size_t nbytes_padding = ggml_row_size (src0->type , MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
1485
1486
dev[id].src0_dd = dev[id].src0_dd_alloc .alloc (ctx.pool (id), nbytes_data + nbytes_padding);
1486
- CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd + nbytes_data , 0 , nbytes_padding, stream));
1487
+ CUDA_CHECK (cudaMemsetAsync (dev[id].src0_dd , 0 , nbytes_data + nbytes_padding, stream));
1487
1488
}
1488
1489
1489
1490
// If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared:
You can’t perform that action at this time.
0 commit comments