@@ -30,34 +30,34 @@ void ggml_cuda_op_mul_mat_q(
30
30
31
31
switch (src0->type ) {
32
32
case GGML_TYPE_Q4_0:
33
- mul_mat_q_case<GGML_TYPE_Q4_0>(args, stream);
33
+ mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream);
34
34
break ;
35
35
case GGML_TYPE_Q4_1:
36
- mul_mat_q_case<GGML_TYPE_Q4_1>(args, stream);
36
+ mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream);
37
37
break ;
38
38
case GGML_TYPE_Q5_0:
39
- mul_mat_q_case<GGML_TYPE_Q5_0>(args, stream);
39
+ mul_mat_q_case<GGML_TYPE_Q5_0>(ctx, args, stream);
40
40
break ;
41
41
case GGML_TYPE_Q5_1:
42
- mul_mat_q_case<GGML_TYPE_Q5_1>(args, stream);
42
+ mul_mat_q_case<GGML_TYPE_Q5_1>(ctx, args, stream);
43
43
break ;
44
44
case GGML_TYPE_Q8_0:
45
- mul_mat_q_case<GGML_TYPE_Q8_0>(args, stream);
45
+ mul_mat_q_case<GGML_TYPE_Q8_0>(ctx, args, stream);
46
46
break ;
47
47
case GGML_TYPE_Q2_K:
48
- mul_mat_q_case<GGML_TYPE_Q2_K>(args, stream);
48
+ mul_mat_q_case<GGML_TYPE_Q2_K>(ctx, args, stream);
49
49
break ;
50
50
case GGML_TYPE_Q3_K:
51
- mul_mat_q_case<GGML_TYPE_Q3_K>(args, stream);
51
+ mul_mat_q_case<GGML_TYPE_Q3_K>(ctx, args, stream);
52
52
break ;
53
53
case GGML_TYPE_Q4_K:
54
- mul_mat_q_case<GGML_TYPE_Q4_K>(args, stream);
54
+ mul_mat_q_case<GGML_TYPE_Q4_K>(ctx, args, stream);
55
55
break ;
56
56
case GGML_TYPE_Q5_K:
57
- mul_mat_q_case<GGML_TYPE_Q5_K>(args, stream);
57
+ mul_mat_q_case<GGML_TYPE_Q5_K>(ctx, args, stream);
58
58
break ;
59
59
case GGML_TYPE_Q6_K:
60
- mul_mat_q_case<GGML_TYPE_Q6_K>(args, stream);
60
+ mul_mat_q_case<GGML_TYPE_Q6_K>(ctx, args, stream);
61
61
break ;
62
62
default :
63
63
GGML_ASSERT (false );
0 commit comments