Skip to content

Commit 7f52cb5

Browse files
CUDA: fix LoRAs
1 parent 89e8959 commit 7f52cb5

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

ggml-cuda.cu

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5247,7 +5247,7 @@ static cudaError_t ggml_cuda_cpy_tensor_2d(
52475247
if (src->backend == GGML_BACKEND_CPU) {
52485248
kind = cudaMemcpyHostToDevice;
52495249
src_ptr = (char *) src->data;
5250-
} else if (src->backend == GGML_BACKEND_GPU) {
5250+
} else if (src->backend == GGML_BACKEND_GPU || src->backend == GGML_BACKEND_GPU_SPLIT) {
52515251
kind = cudaMemcpyDeviceToDevice;
52525252
struct ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
52535253
int id;
@@ -5289,9 +5289,7 @@ inline void ggml_cuda_op_add(
52895289
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
52905290
const float * src0_dd, const float * src1_dd, float * dst_dd, const cudaStream_t & main_stream) {
52915291

5292-
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
52935292
GGML_ASSERT(src1->type == GGML_TYPE_F32);
5294-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
52955293

52965294
const int64_t ne10 = src1->ne[0];
52975295
const int64_t ne11 = src1->ne[1];
@@ -5631,10 +5629,15 @@ inline void ggml_cuda_op_mul_mat_cublas(
56315629
const int64_t ne0 = dst->ne[0];
56325630
const int64_t row_diff = row_high - row_low;
56335631

5634-
const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(src0->type);
5635-
size_t src0_as;
5636-
float * src0_ddf_i = (float *) ggml_cuda_pool_malloc(row_diff*ne00 * sizeof(float), &src0_as);
5637-
to_fp32_cuda(src0_dd_i, src0_ddf_i, row_diff*ne00, stream);
5632+
float * src0_ddq_as_f32;
5633+
size_t src0_as = 0;
5634+
5635+
if (src0->type != GGML_TYPE_F32) {
5636+
const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(src0->type);
5637+
src0_ddq_as_f32 = (float *) ggml_cuda_pool_malloc(row_diff*ne00 * sizeof(float), &src0_as); // NOLINT
5638+
to_fp32_cuda(src0_dd_i, src0_ddq_as_f32, row_diff*ne00, stream);
5639+
}
5640+
const float * src0_ddf_i = src0->type == GGML_TYPE_F32 ? (const float *) src0_dd_i : src0_ddq_as_f32;
56385641

56395642
int id;
56405643
CUDA_CHECK(cudaGetDevice(&id));
@@ -5651,10 +5654,11 @@ inline void ggml_cuda_op_mul_mat_cublas(
56515654
src1_ddf_i, ne10,
56525655
&beta, dst_dd_i, ldc));
56535656

5654-
ggml_cuda_pool_free(src0_ddf_i, src0_as);
5657+
if (src0_as > 0) {
5658+
ggml_cuda_pool_free(src0_ddq_as_f32, src0_as);
5659+
}
56555660

56565661
(void) dst;
5657-
(void) src0_dd_i;
56585662
(void) src1_ddq_i;
56595663
(void) src1_padded_row_size;
56605664
}
@@ -5793,7 +5797,6 @@ static void ggml_cuda_op_flatten(const ggml_tensor * src0, const ggml_tensor * s
57935797
const bool use_src1 = src1 != nullptr;
57945798
const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
57955799

5796-
GGML_ASSERT( src0->backend != GGML_BACKEND_GPU_SPLIT);
57975800
GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_GPU_SPLIT);
57985801
GGML_ASSERT( dst->backend != GGML_BACKEND_GPU_SPLIT);
57995802

0 commit comments

Comments
 (0)