Open
Description
Derived from: #66652
#include<stdint.h>
#include<stddef.h>
void fill_i16(int16_t* a, int16_t v, size_t l) {
for (size_t i = 0; i < l; i++) a[i] = v;
}
riscv-clang -Os -march=rv64gcv_zba_zbb_zbs
fill_i16: # @fill_i16
beqz a2, .LBB0_5
not a4, a2
csrr a7, vlenb
bgeu a4, a7, .LBB0_3
.LBB0_2: # =>This Inner Loop Header: Depth=1
sh a1, 0(a0)
addi a2, a2, -1
addi a0, a0, 2
bnez a2, .LBB0_2
j .LBB0_5
.LBB0_3:
li a4, 0
srli a6, a7, 3
neg a5, a7
add a3, a7, a2
addi a3, a3, -1
and a5, a5, a3
vsetvli a3, zero, e16, m2, ta, ma
vmv.v.x v8, a1
slli a1, a6, 4
vsetvli zero, zero, e64, m8, ta, ma
vid.v v16
.LBB0_4: # =>This Inner Loop Header: Depth=1
vsaddu.vx v24, v16, a4
vmsltu.vx v0, v24, a2
vse16.v v8, (a0), v0.t
add a4, a4, a7
add a0, a0, a1
bne a5, a4, .LBB0_4
.LBB0_5:
ret
arm-clang -Os -march=armv8-a+sve
fill_i16: // @fill_i16
cbz x2, .LBB0_3
cnth x8
mov z0.h, w1
mov x10, xzr
subs x9, x2, x8
csel x9, xzr, x9, lo
whilelo p0.h, xzr, x2
.LBB0_2: // =>This Inner Loop Header: Depth=1
st1h { z0.h }, p0, [x0, x10, lsl #1]
whilelo p0.h, x10, x9
add x10, x10, x8
b.mi .LBB0_2
.LBB0_3:
ret