@@ -920,14 +920,21 @@ kernel void kernel_mul_mv_q4_0_f32(
920
920
device const float * src1,
921
921
device float * dst,
922
922
constant int64_t & ne00,
923
- constant int64_t & ne01[[buffer(4 )]],
924
- constant int64_t & ne02[[buffer(5 )]],
925
- constant int64_t & ne10[[buffer(9 )]],
926
- constant int64_t & ne12[[buffer(11 )]],
927
- constant int64_t & ne0 [[buffer(15 )]],
928
- constant int64_t & ne1 [[buffer(16 )]],
929
- constant uint & r2 [[buffer(17 )]],
930
- constant uint & r3 [[buffer(18 )]],
923
+ constant int64_t & ne01,
924
+ constant int64_t & ne02,
925
+ constant uint64_t & nb00,
926
+ constant uint64_t & nb01,
927
+ constant uint64_t & nb02,
928
+ constant int64_t & ne10,
929
+ constant int64_t & ne11,
930
+ constant int64_t & ne12,
931
+ constant uint64_t & nb10,
932
+ constant uint64_t & nb11,
933
+ constant uint64_t & nb12,
934
+ constant int64_t & ne0,
935
+ constant int64_t & ne1,
936
+ constant uint & r2,
937
+ constant uint & r3,
931
938
uint3 tgpig[[threadgroup_position_in_grid]],
932
939
uint tiisg[[thread_index_in_simdgroup]],
933
940
uint sgitg[[simdgroup_index_in_threadgroup]]) {
@@ -939,14 +946,21 @@ kernel void kernel_mul_mv_q4_1_f32(
939
946
device const float * src1,
940
947
device float * dst,
941
948
constant int64_t & ne00,
942
- constant int64_t & ne01[[buffer(4 )]],
943
- constant int64_t & ne02[[buffer(5 )]],
944
- constant int64_t & ne10[[buffer(9 )]],
945
- constant int64_t & ne12[[buffer(11 )]],
946
- constant int64_t & ne0 [[buffer(15 )]],
947
- constant int64_t & ne1 [[buffer(16 )]],
948
- constant uint & r2 [[buffer(17 )]],
949
- constant uint & r3 [[buffer(18 )]],
949
+ constant int64_t & ne01,
950
+ constant int64_t & ne02,
951
+ constant uint64_t & nb00,
952
+ constant uint64_t & nb01,
953
+ constant uint64_t & nb02,
954
+ constant int64_t & ne10,
955
+ constant int64_t & ne11,
956
+ constant int64_t & ne12,
957
+ constant uint64_t & nb10,
958
+ constant uint64_t & nb11,
959
+ constant uint64_t & nb12,
960
+ constant int64_t & ne0,
961
+ constant int64_t & ne1,
962
+ constant uint & r2,
963
+ constant uint & r3,
950
964
uint3 tgpig[[threadgroup_position_in_grid]],
951
965
uint tiisg[[thread_index_in_simdgroup]],
952
966
uint sgitg[[simdgroup_index_in_threadgroup]]) {
@@ -958,14 +972,21 @@ kernel void kernel_mul_mv_q5_0_f32(
958
972
device const float * src1,
959
973
device float * dst,
960
974
constant int64_t & ne00,
961
- constant int64_t & ne01[[buffer(4 )]],
962
- constant int64_t & ne02[[buffer(5 )]],
963
- constant int64_t & ne10[[buffer(9 )]],
964
- constant int64_t & ne12[[buffer(11 )]],
965
- constant int64_t & ne0 [[buffer(15 )]],
966
- constant int64_t & ne1 [[buffer(16 )]],
967
- constant uint & r2 [[buffer(17 )]],
968
- constant uint & r3 [[buffer(18 )]],
975
+ constant int64_t & ne01,
976
+ constant int64_t & ne02,
977
+ constant uint64_t & nb00,
978
+ constant uint64_t & nb01,
979
+ constant uint64_t & nb02,
980
+ constant int64_t & ne10,
981
+ constant int64_t & ne11,
982
+ constant int64_t & ne12,
983
+ constant uint64_t & nb10,
984
+ constant uint64_t & nb11,
985
+ constant uint64_t & nb12,
986
+ constant int64_t & ne0,
987
+ constant int64_t & ne1,
988
+ constant uint & r2,
989
+ constant uint & r3,
969
990
uint3 tgpig[[threadgroup_position_in_grid]],
970
991
uint tiisg[[thread_index_in_simdgroup]],
971
992
uint sgitg[[simdgroup_index_in_threadgroup]]) {
@@ -977,14 +998,21 @@ kernel void kernel_mul_mv_q5_1_f32(
977
998
device const float * src1,
978
999
device float * dst,
979
1000
constant int64_t & ne00,
980
- constant int64_t & ne01[[buffer(4 )]],
981
- constant int64_t & ne02[[buffer(5 )]],
982
- constant int64_t & ne10[[buffer(9 )]],
983
- constant int64_t & ne12[[buffer(11 )]],
984
- constant int64_t & ne0 [[buffer(15 )]],
985
- constant int64_t & ne1 [[buffer(16 )]],
986
- constant uint & r2 [[buffer(17 )]],
987
- constant uint & r3 [[buffer(18 )]],
1001
+ constant int64_t & ne01,
1002
+ constant int64_t & ne02,
1003
+ constant uint64_t & nb00,
1004
+ constant uint64_t & nb01,
1005
+ constant uint64_t & nb02,
1006
+ constant int64_t & ne10,
1007
+ constant int64_t & ne11,
1008
+ constant int64_t & ne12,
1009
+ constant uint64_t & nb10,
1010
+ constant uint64_t & nb11,
1011
+ constant uint64_t & nb12,
1012
+ constant int64_t & ne0,
1013
+ constant int64_t & ne1,
1014
+ constant uint & r2,
1015
+ constant uint & r3,
988
1016
uint3 tgpig[[threadgroup_position_in_grid]],
989
1017
uint tiisg[[thread_index_in_simdgroup]],
990
1018
uint sgitg[[simdgroup_index_in_threadgroup]]) {
@@ -1082,8 +1110,8 @@ kernel void kernel_mul_mv_q8_0_f32(
1082
1110
constant uint64_t & nb12,
1083
1111
constant int64_t & ne0,
1084
1112
constant int64_t & ne1,
1085
- constant uint & r2 [[buffer( 17 )]] ,
1086
- constant uint & r3 [[buffer( 18 )]] ,
1113
+ constant uint & r2,
1114
+ constant uint & r3,
1087
1115
uint3 tgpig[[threadgroup_position_in_grid]],
1088
1116
uint tiisg[[thread_index_in_simdgroup]],
1089
1117
uint sgitg[[simdgroup_index_in_threadgroup]]) {
@@ -1189,8 +1217,8 @@ kernel void kernel_mul_mv_f32_f32(
1189
1217
constant uint64_t & nb12,
1190
1218
constant int64_t & ne0,
1191
1219
constant int64_t & ne1,
1192
- constant uint & r2 [[buffer( 17 )]] ,
1193
- constant uint & r3 [[buffer( 18 )]] ,
1220
+ constant uint & r2,
1221
+ constant uint & r3,
1194
1222
uint3 tgpig[[threadgroup_position_in_grid]],
1195
1223
uint tiisg[[thread_index_in_simdgroup]]) {
1196
1224
kernel_mul_mv_f32_f32_impl (src0, src1, dst, ne00, ne01, ne02, nb00, nb01, nb02, ne10, ne11, ne12, nb10, nb11, nb12, ne0, ne1, r2, r3, tgpig, tiisg);
@@ -1216,8 +1244,8 @@ kernel void kernel_mul_mv_f16_f16(
1216
1244
constant uint64_t & nb12,
1217
1245
constant int64_t & ne0,
1218
1246
constant int64_t & ne1,
1219
- constant uint & r2 [[buffer( 17 )]] ,
1220
- constant uint & r3 [[buffer( 18 )]] ,
1247
+ constant uint & r2,
1248
+ constant uint & r3,
1221
1249
uint3 tgpig[[threadgroup_position_in_grid]],
1222
1250
uint tiisg[[thread_index_in_simdgroup]]) {
1223
1251
@@ -1353,8 +1381,8 @@ kernel void kernel_mul_mv_f16_f32_1row(
1353
1381
constant uint64_t & nb12,
1354
1382
constant int64_t & ne0,
1355
1383
constant int64_t & ne1,
1356
- constant uint & r2 [[buffer( 17 )]] ,
1357
- constant uint & r3 [[buffer( 18 )]] ,
1384
+ constant uint & r2,
1385
+ constant uint & r3,
1358
1386
uint3 tgpig[[threadgroup_position_in_grid]],
1359
1387
uint tiisg[[thread_index_in_simdgroup]]) {
1360
1388
kernel_mul_mv_f16_f32_1row_impl (src0, src1, dst, ne00, ne01, ne02, nb00, nb01, nb02, ne10, ne11, ne12, nb10, nb11, nb12, ne0, ne1, r2, r3, tgpig, tiisg);
@@ -1459,8 +1487,8 @@ kernel void kernel_mul_mv_f16_f32(
1459
1487
constant uint64_t & nb12,
1460
1488
constant int64_t & ne0,
1461
1489
constant int64_t & ne1,
1462
- constant uint & r2 [[buffer( 17 )]] ,
1463
- constant uint & r3 [[buffer( 18 )]] ,
1490
+ constant uint & r2,
1491
+ constant uint & r3,
1464
1492
uint3 tgpig[[threadgroup_position_in_grid]],
1465
1493
uint tiisg[[thread_index_in_simdgroup]]) {
1466
1494
kernel_mul_mv_f16_f32_impl (src0, src1, dst, ne00, ne01, ne02, nb00, nb01, nb02, ne10, ne11, ne12, nb10, nb11, nb12, ne0, ne1, r2, r3, tgpig, tiisg);
@@ -1485,8 +1513,8 @@ kernel void kernel_mul_mv_f16_f32_l4(
1485
1513
constant uint64_t & nb12,
1486
1514
constant int64_t & ne0,
1487
1515
constant int64_t & ne1,
1488
- constant uint & r2 [[buffer( 17 )]] ,
1489
- constant uint & r3 [[buffer( 18 )]] ,
1516
+ constant uint & r2,
1517
+ constant uint & r3,
1490
1518
uint3 tgpig[[threadgroup_position_in_grid]],
1491
1519
uint tiisg[[thread_index_in_simdgroup]]) {
1492
1520
@@ -2576,14 +2604,21 @@ kernel void kernel_mul_mv_q2_K_f32(
2576
2604
device const float * src1,
2577
2605
device float * dst,
2578
2606
constant int64_t & ne00,
2579
- constant int64_t & ne01[[buffer(4 )]],
2580
- constant int64_t & ne02[[buffer(5 )]],
2581
- constant int64_t & ne10[[buffer(9 )]],
2582
- constant int64_t & ne12[[buffer(11 )]],
2583
- constant int64_t & ne0 [[buffer(15 )]],
2584
- constant int64_t & ne1 [[buffer(16 )]],
2585
- constant uint & r2 [[buffer(17 )]],
2586
- constant uint & r3 [[buffer(18 )]],
2607
+ constant int64_t & ne01,
2608
+ constant int64_t & ne02,
2609
+ constant uint64_t & nb00,
2610
+ constant uint64_t & nb01,
2611
+ constant uint64_t & nb02,
2612
+ constant int64_t & ne10,
2613
+ constant int64_t & ne11,
2614
+ constant int64_t & ne12,
2615
+ constant uint64_t & nb10,
2616
+ constant uint64_t & nb11,
2617
+ constant uint64_t & nb12,
2618
+ constant int64_t & ne0,
2619
+ constant int64_t & ne1,
2620
+ constant uint & r2,
2621
+ constant uint & r3,
2587
2622
uint3 tgpig[[threadgroup_position_in_grid]],
2588
2623
uint tiisg[[thread_index_in_simdgroup]],
2589
2624
uint sgitg[[simdgroup_index_in_threadgroup]]) {
@@ -2833,14 +2868,21 @@ kernel void kernel_mul_mv_q3_K_f32(
2833
2868
device const float * src1,
2834
2869
device float * dst,
2835
2870
constant int64_t & ne00,
2836
- constant int64_t & ne01[[buffer(4 )]],
2837
- constant int64_t & ne02[[buffer(5 )]],
2838
- constant int64_t & ne10[[buffer(9 )]],
2839
- constant int64_t & ne12[[buffer(11 )]],
2840
- constant int64_t & ne0 [[buffer(15 )]],
2841
- constant int64_t & ne1 [[buffer(16 )]],
2842
- constant uint & r2 [[buffer(17 )]],
2843
- constant uint & r3 [[buffer(18 )]],
2871
+ constant int64_t & ne01,
2872
+ constant int64_t & ne02,
2873
+ constant uint64_t & nb00,
2874
+ constant uint64_t & nb01,
2875
+ constant uint64_t & nb02,
2876
+ constant int64_t & ne10,
2877
+ constant int64_t & ne11,
2878
+ constant int64_t & ne12,
2879
+ constant uint64_t & nb10,
2880
+ constant uint64_t & nb11,
2881
+ constant uint64_t & nb12,
2882
+ constant int64_t & ne0,
2883
+ constant int64_t & ne1,
2884
+ constant uint & r2,
2885
+ constant uint & r3,
2844
2886
uint3 tgpig[[threadgroup_position_in_grid]],
2845
2887
uint tiisg[[thread_index_in_simdgroup]],
2846
2888
uint sgitg[[simdgroup_index_in_threadgroup]]) {
@@ -3064,14 +3106,21 @@ kernel void kernel_mul_mv_q4_K_f32(
3064
3106
device const float * src1,
3065
3107
device float * dst,
3066
3108
constant int64_t & ne00,
3067
- constant int64_t & ne01[[buffer(4 )]],
3068
- constant int64_t & ne02[[buffer(5 )]],
3069
- constant int64_t & ne10[[buffer(9 )]],
3070
- constant int64_t & ne12[[buffer(11 )]],
3071
- constant int64_t & ne0 [[buffer(15 )]],
3072
- constant int64_t & ne1 [[buffer(16 )]],
3073
- constant uint & r2 [[buffer(17 )]],
3074
- constant uint & r3 [[buffer(18 )]],
3109
+ constant int64_t & ne01,
3110
+ constant int64_t & ne02,
3111
+ constant uint64_t & nb00,
3112
+ constant uint64_t & nb01,
3113
+ constant uint64_t & nb02,
3114
+ constant int64_t & ne10,
3115
+ constant int64_t & ne11,
3116
+ constant int64_t & ne12,
3117
+ constant uint64_t & nb10,
3118
+ constant uint64_t & nb11,
3119
+ constant uint64_t & nb12,
3120
+ constant int64_t & ne0,
3121
+ constant int64_t & ne1,
3122
+ constant uint & r2,
3123
+ constant uint & r3,
3075
3124
uint3 tgpig[[threadgroup_position_in_grid]],
3076
3125
uint tiisg[[thread_index_in_simdgroup]],
3077
3126
uint sgitg[[simdgroup_index_in_threadgroup]]) {
@@ -3263,14 +3312,21 @@ kernel void kernel_mul_mv_q5_K_f32(
3263
3312
device const float * src1,
3264
3313
device float * dst,
3265
3314
constant int64_t & ne00,
3266
- constant int64_t & ne01[[buffer(4 )]],
3267
- constant int64_t & ne02[[buffer(5 )]],
3268
- constant int64_t & ne10[[buffer(9 )]],
3269
- constant int64_t & ne12[[buffer(11 )]],
3270
- constant int64_t & ne0 [[buffer(15 )]],
3271
- constant int64_t & ne1 [[buffer(16 )]],
3272
- constant uint & r2 [[buffer(17 )]],
3273
- constant uint & r3 [[buffer(18 )]],
3315
+ constant int64_t & ne01,
3316
+ constant int64_t & ne02,
3317
+ constant uint64_t & nb00,
3318
+ constant uint64_t & nb01,
3319
+ constant uint64_t & nb02,
3320
+ constant int64_t & ne10,
3321
+ constant int64_t & ne11,
3322
+ constant int64_t & ne12,
3323
+ constant uint64_t & nb10,
3324
+ constant uint64_t & nb11,
3325
+ constant uint64_t & nb12,
3326
+ constant int64_t & ne0,
3327
+ constant int64_t & ne1,
3328
+ constant uint & r2,
3329
+ constant uint & r3,
3274
3330
uint3 tgpig[[threadgroup_position_in_grid]],
3275
3331
uint tiisg[[thread_index_in_simdgroup]],
3276
3332
uint sgitg[[simdgroup_index_in_threadgroup]]) {
@@ -3390,14 +3446,21 @@ kernel void kernel_mul_mv_q6_K_f32(
3390
3446
device const float * src1,
3391
3447
device float * dst,
3392
3448
constant int64_t & ne00,
3393
- constant int64_t & ne01[[buffer(4 )]],
3394
- constant int64_t & ne02[[buffer(5 )]],
3395
- constant int64_t & ne10[[buffer(9 )]],
3396
- constant int64_t & ne12[[buffer(11 )]],
3397
- constant int64_t & ne0 [[buffer(15 )]],
3398
- constant int64_t & ne1 [[buffer(16 )]],
3399
- constant uint & r2 [[buffer(17 )]],
3400
- constant uint & r3 [[buffer(18 )]],
3449
+ constant int64_t & ne01,
3450
+ constant int64_t & ne02,
3451
+ constant uint64_t & nb00,
3452
+ constant uint64_t & nb01,
3453
+ constant uint64_t & nb02,
3454
+ constant int64_t & ne10,
3455
+ constant int64_t & ne11,
3456
+ constant int64_t & ne12,
3457
+ constant uint64_t & nb10,
3458
+ constant uint64_t & nb11,
3459
+ constant uint64_t & nb12,
3460
+ constant int64_t & ne0,
3461
+ constant int64_t & ne1,
3462
+ constant uint & r2,
3463
+ constant uint & r3,
3401
3464
uint3 tgpig[[threadgroup_position_in_grid]],
3402
3465
uint tiisg[[thread_index_in_simdgroup]],
3403
3466
uint sgitg[[simdgroup_index_in_threadgroup]]) {
0 commit comments