@@ -966,9 +966,9 @@ void ggml_metal_graph_compute(
966
966
const int64_t nb = ne00;
967
967
968
968
[encoder setComputePipelineState: ctx->pipeline_concat];
969
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
970
- [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
971
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
969
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
970
+ if (id_src1) [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
971
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
972
972
[encoder setBytes: &ne00 length: sizeof (ne00) atIndex: 3 ];
973
973
[encoder setBytes: &ne01 length: sizeof (ne01) atIndex: 4 ];
974
974
[encoder setBytes: &ne02 length: sizeof (ne02) atIndex: 5 ];
@@ -1031,9 +1031,9 @@ void ggml_metal_graph_compute(
1031
1031
default : GGML_ASSERT (false );
1032
1032
}
1033
1033
}
1034
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1035
- [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1036
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1034
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1035
+ if (id_src1) [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1036
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1037
1037
[encoder setBytes: &ne00 length: sizeof (ne00) atIndex: 3 ];
1038
1038
[encoder setBytes: &ne01 length: sizeof (ne01) atIndex: 4 ];
1039
1039
[encoder setBytes: &ne02 length: sizeof (ne02) atIndex: 5 ];
@@ -1085,8 +1085,8 @@ void ggml_metal_graph_compute(
1085
1085
[encoder setComputePipelineState: ctx->pipeline_scale];
1086
1086
}
1087
1087
1088
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1089
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1088
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1089
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1090
1090
[encoder setBytes: &scale length: sizeof (scale) atIndex: 2 ];
1091
1091
1092
1092
[encoder dispatchThreadgroups: MTLSizeMake (n, 1 , 1 ) threadsPerThreadgroup: MTLSizeMake (1 , 1 , 1 )];
@@ -1096,8 +1096,8 @@ void ggml_metal_graph_compute(
1096
1096
case GGML_UNARY_OP_SILU:
1097
1097
{
1098
1098
[encoder setComputePipelineState: ctx->pipeline_silu];
1099
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1100
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1099
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1100
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1101
1101
1102
1102
const int64_t n = ggml_nelements (dst);
1103
1103
GGML_ASSERT (n % 4 == 0 );
@@ -1107,8 +1107,8 @@ void ggml_metal_graph_compute(
1107
1107
case GGML_UNARY_OP_RELU:
1108
1108
{
1109
1109
[encoder setComputePipelineState: ctx->pipeline_relu];
1110
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1111
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1110
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1111
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1112
1112
1113
1113
const int64_t n = ggml_nelements (dst);
1114
1114
@@ -1117,8 +1117,8 @@ void ggml_metal_graph_compute(
1117
1117
case GGML_UNARY_OP_GELU:
1118
1118
{
1119
1119
[encoder setComputePipelineState: ctx->pipeline_gelu];
1120
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1121
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1120
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1121
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1122
1122
1123
1123
const int64_t n = ggml_nelements (dst);
1124
1124
GGML_ASSERT (n % 4 == 0 );
@@ -1136,8 +1136,8 @@ void ggml_metal_graph_compute(
1136
1136
GGML_ASSERT (ggml_is_contiguous (src0));
1137
1137
1138
1138
[encoder setComputePipelineState: ctx->pipeline_sqr];
1139
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1140
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1139
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1140
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1141
1141
1142
1142
const int64_t n = ggml_nelements (dst);
1143
1143
[encoder dispatchThreadgroups: MTLSizeMake (n, 1 , 1 ) threadsPerThreadgroup: MTLSizeMake (1 , 1 , 1 )];
@@ -1147,8 +1147,8 @@ void ggml_metal_graph_compute(
1147
1147
GGML_ASSERT (src0->nb [0 ] == ggml_type_size (src0->type ));
1148
1148
1149
1149
[encoder setComputePipelineState: ctx->pipeline_sum_rows];
1150
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1151
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1150
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1151
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1152
1152
[encoder setBytes: &ne00 length: sizeof (ne00) atIndex: 2 ];
1153
1153
[encoder setBytes: &ne01 length: sizeof (ne01) atIndex: 3 ];
1154
1154
[encoder setBytes: &ne02 length: sizeof (ne02) atIndex: 4 ];
@@ -1194,9 +1194,9 @@ void ggml_metal_graph_compute(
1194
1194
1195
1195
const float scale = ((float *) dst->op_params )[0 ];
1196
1196
1197
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1198
- [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1199
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1197
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1198
+ if (id_src1) [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1199
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1200
1200
[encoder setBytes: &ne00 length: sizeof (ne00) atIndex: 3 ];
1201
1201
[encoder setBytes: &ne01 length: sizeof (ne01) atIndex: 4 ];
1202
1202
[encoder setBytes: &ne02 length: sizeof (ne02) atIndex: 5 ];
@@ -1214,8 +1214,8 @@ void ggml_metal_graph_compute(
1214
1214
} else {
1215
1215
[encoder setComputePipelineState: ctx->pipeline_diag_mask_inf];
1216
1216
}
1217
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1218
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1217
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1218
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1219
1219
[encoder setBytes: &ne00 length: sizeof (ne00) atIndex: 2 ];
1220
1220
[encoder setBytes: &ne01 length: sizeof (ne01) atIndex: 3 ];
1221
1221
[encoder setBytes: &n_past length: sizeof (int ) atIndex: 4 ];
@@ -1288,9 +1288,9 @@ void ggml_metal_graph_compute(
1288
1288
case GGML_TYPE_Q6_K: [encoder setComputePipelineState: ctx->pipeline_mul_mm_q6_K_f32]; break ;
1289
1289
default : GGML_ASSERT (false && " MUL MAT-MAT not implemented" );
1290
1290
}
1291
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1292
- [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1293
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1291
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1292
+ if (id_src1) [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1293
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1294
1294
[encoder setBytes: &ne00 length: sizeof (ne00) atIndex: 3 ];
1295
1295
[encoder setBytes: &ne02 length: sizeof (ne02) atIndex: 4 ];
1296
1296
[encoder setBytes: &nb01 length: sizeof (nb01) atIndex: 5 ];
@@ -1405,9 +1405,9 @@ void ggml_metal_graph_compute(
1405
1405
}
1406
1406
};
1407
1407
1408
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1409
- [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1410
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1408
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1409
+ if (id_src1) [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1410
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1411
1411
[encoder setBytes: &ne00 length: sizeof (ne00) atIndex: 3 ];
1412
1412
[encoder setBytes: &ne01 length: sizeof (ne01) atIndex: 4 ];
1413
1413
[encoder setBytes: &ne02 length: sizeof (ne02) atIndex: 5 ];
@@ -1513,9 +1513,9 @@ void ggml_metal_graph_compute(
1513
1513
case GGML_TYPE_Q6_K: [encoder setComputePipelineState: ctx->pipeline_mul_mm_id_q6_K_f32]; break ;
1514
1514
default : GGML_ASSERT (false && " MUL_MAT_ID not implemented" );
1515
1515
}
1516
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1517
- [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1518
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1516
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1517
+ if (id_src1) [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1518
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1519
1519
[encoder setBytes: &ne20 length: sizeof (ne20) atIndex: 3 ];
1520
1520
[encoder setBytes: &ne22 length: sizeof (ne22) atIndex: 4 ];
1521
1521
[encoder setBytes: &nb21 length: sizeof (nb21) atIndex: 5 ];
@@ -1561,9 +1561,9 @@ void ggml_metal_graph_compute(
1561
1561
default : GGML_ASSERT (false && " not implemented" );
1562
1562
}
1563
1563
1564
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1565
- [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1566
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1564
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1565
+ if (id_src1) [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1566
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1567
1567
[encoder setBytes: &ne00 length: sizeof ( int64_t ) atIndex: 3 ];
1568
1568
[encoder setBytes: &nb01 length: sizeof (uint64_t ) atIndex: 4 ];
1569
1569
[encoder setBytes: &nb1 length: sizeof (uint64_t ) atIndex: 5 ];
@@ -1586,8 +1586,8 @@ void ggml_metal_graph_compute(
1586
1586
}
1587
1587
1588
1588
[encoder setComputePipelineState: ctx->pipeline_rms_norm];
1589
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1590
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1589
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1590
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1591
1591
[encoder setBytes: &ne00 length: sizeof ( int64_t ) atIndex: 2 ];
1592
1592
[encoder setBytes: &nb01 length: sizeof (uint64_t ) atIndex: 3 ];
1593
1593
[encoder setBytes: &eps length: sizeof ( float ) atIndex: 4 ];
@@ -1605,8 +1605,8 @@ void ggml_metal_graph_compute(
1605
1605
const int nth = MIN (256 , ne00);
1606
1606
1607
1607
[encoder setComputePipelineState: ctx->pipeline_norm];
1608
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1609
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1608
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1609
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1610
1610
[encoder setBytes: &ne00 length: sizeof ( int64_t ) atIndex: 2 ];
1611
1611
[encoder setBytes: &nb01 length: sizeof (uint64_t ) atIndex: 3 ];
1612
1612
[encoder setBytes: &eps length: sizeof ( float ) atIndex: 4 ];
@@ -1632,8 +1632,8 @@ void ggml_metal_graph_compute(
1632
1632
const float m1 = powf (2 .0f , -(max_bias / 2 .0f ) / n_heads_log2_floor);
1633
1633
1634
1634
[encoder setComputePipelineState: ctx->pipeline_alibi_f32];
1635
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1636
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1635
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1636
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1637
1637
[encoder setBytes: &ne00 length: sizeof ( int64_t ) atIndex: 2 ];
1638
1638
[encoder setBytes: &ne01 length: sizeof ( int64_t ) atIndex: 3 ];
1639
1639
[encoder setBytes: &ne02 length: sizeof ( int64_t ) atIndex: 4 ];
@@ -1682,9 +1682,9 @@ void ggml_metal_graph_compute(
1682
1682
default : GGML_ASSERT (false );
1683
1683
};
1684
1684
1685
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1686
- [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1687
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1685
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1686
+ if (id_src1) [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
1687
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 2 ];
1688
1688
[encoder setBytes: &ne00 length: sizeof ( int64_t ) atIndex: 3 ];
1689
1689
[encoder setBytes: &ne01 length: sizeof ( int64_t ) atIndex: 4 ];
1690
1690
[encoder setBytes: &ne02 length: sizeof ( int64_t ) atIndex: 5 ];
@@ -1750,8 +1750,8 @@ void ggml_metal_graph_compute(
1750
1750
default : GGML_ASSERT (false );
1751
1751
};
1752
1752
1753
- [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 0 ];
1754
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1753
+ if (id_src1) [encoder setBuffer: id_src1 offset: offs_src1 atIndex: 0 ];
1754
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1755
1755
[encoder setBytes: &ofs0 length: sizeof ( int32_t ) atIndex: 2 ];
1756
1756
[encoder setBytes: &ofs1 length: sizeof ( int32_t ) atIndex: 3 ];
1757
1757
[encoder setBytes: &IW length: sizeof ( int32_t ) atIndex: 4 ];
@@ -1781,8 +1781,8 @@ void ggml_metal_graph_compute(
1781
1781
default : GGML_ASSERT (false );
1782
1782
};
1783
1783
1784
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1785
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1784
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1785
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1786
1786
[encoder setBytes: &ne00 length: sizeof ( int64_t ) atIndex: 2 ];
1787
1787
1788
1788
[encoder dispatchThreadgroups: MTLSizeMake (1 , nrows, 1 ) threadsPerThreadgroup: MTLSizeMake (ne00, 1 , 1 )];
@@ -1822,8 +1822,8 @@ void ggml_metal_graph_compute(
1822
1822
default : GGML_ASSERT (false && " not implemented" );
1823
1823
}
1824
1824
1825
- [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1826
- [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1825
+ if (id_src0) [encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1826
+ if (id_dst) [encoder setBuffer: id_dst offset: offs_dst atIndex: 1 ];
1827
1827
[encoder setBytes: &ne00 length: sizeof ( int64_t ) atIndex: 2 ];
1828
1828
[encoder setBytes: &ne01 length: sizeof ( int64_t ) atIndex: 3 ];
1829
1829
[encoder setBytes: &ne02 length: sizeof ( int64_t ) atIndex: 4 ];
0 commit comments