Skip to content

Commit 33f1be0

Browse files
authored
ggml : fix 32-bit ARM NEON (ggml-org#836)
* ggml : add support for 32-bit ARM * ggml : fix * ggml : fix
1 parent c4bec5a commit 33f1be0

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

ggml.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,33 @@ uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
668668
return vget_high_u8(vcombine_u8(a, b));
669669
}
670670

671+
int8x16_t vzip1q_s8(int8x16_t a, int8x16_t b) {
672+
return vcombine_s8(vget_low_s8(a), vget_low_s8(b));
673+
}
674+
675+
int8x16_t vzip2q_s8(int8x16_t a, int8x16_t b) {
676+
return vcombine_s8(vget_high_s8(a), vget_high_s8(b));
677+
}
678+
679+
uint8x16_t vzip1q_u8(uint8x16_t a, uint8x16_t b) {
680+
return vcombine_u8(vget_low_u8(a), vget_low_u8(b));
681+
}
682+
683+
uint8x16_t vzip2q_u8(uint8x16_t a, uint8x16_t b) {
684+
return vcombine_u8(vget_high_u8(a), vget_high_u8(b));
685+
}
686+
687+
int32x4_t vcvtnq_s32_f32(float32x4_t v) {
688+
int32x4_t res;
689+
690+
res[0] = roundf(vgetq_lane_f32(v, 0));
691+
res[1] = roundf(vgetq_lane_f32(v, 1));
692+
res[2] = roundf(vgetq_lane_f32(v, 2));
693+
res[3] = roundf(vgetq_lane_f32(v, 3));
694+
695+
return res;
696+
}
697+
671698
#endif
672699
#endif
673700

0 commit comments

Comments
 (0)