Skip to content

Commit d3d40bf

Browse files
committed
Help xcode produce fmla instructions
1 parent 9d4d14c commit d3d40bf

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

sgemm.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,20 @@ inline U madd(T a, T b, U c) {
213213
return add(mul(a, b), c);
214214
}
215215

216+
// xcode needs a little help to produce fmla instructions
217+
#if defined(__ARM_FEATURE_FMA)
218+
template <>
219+
inline float32x4_t madd(float32x4_t a, float32x4_t b, float32x4_t c) {
220+
return vfmaq_f32(c, b, a);
221+
}
222+
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(_MSC_VER)
223+
template <>
224+
inline float16x8_t madd(float16x8_t a, float16x8_t b, float16x8_t c) {
225+
return vfmaq_f16(c, b, a);
226+
}
227+
#endif
228+
#endif
229+
216230
////////////////////////////////////////////////////////////////////////////////////////////////////
217231
// FLOATING POINT MATRIX MULTIPLICATION
218232

0 commit comments

Comments
 (0)