Skip to content

Commit fc759be

Browse files
committed
implement s390x SIMD suggested by @taronaeo
1 parent 3874a6d commit fc759be

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

ggml/src/ggml-cpu/vec.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,13 @@ ggml_float ggml_vec_cvar_f32(const int n, float * y, const float * x, const floa
451451
val = vmulq_f32(val, val);
452452
sum += (ggml_float)vaddvq_f32(val);
453453
}
454+
#elif defined(__VXE__) || defined(__VXE2__)
455+
for (; i + 3 < n; i += 4) {
456+
float32x4_t val = vec_sub(vec_xl(0, x + i), vec_splats(mean));
457+
vec_xst(val, 0, y + i);
458+
val = vec_mul(val, val);
459+
sum += (ggml_float)vec_hsum(val);
460+
}
454461
#endif
455462
for (; i < n; ++i) {
456463
float val = x[i] - mean;

0 commit comments

Comments
 (0)