Skip to content

Commit 7b5dd06

Browse files
committed
igzip/riscv64: Add RVV optimization for VLEN=128
Signed-off-by: WenLei <[email protected]>
1 parent 3d3eee7 commit 7b5dd06

File tree

3 files changed

+153
-2
lines changed

3 files changed

+153
-2
lines changed

igzip/riscv64/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@
3030
lsrc_riscv64 += \
3131
igzip/riscv64/igzip_multibinary_riscv64_dispatcher.c \
3232
igzip/riscv64/igzip_multibinary_riscv64.S \
33-
igzip/riscv64/igzip_isal_adler32_rvv.S
33+
igzip/riscv64/igzip_isal_adler32_rvv.S \
34+
igzip/riscv64/igzip_isal_adler32_rvv128.S
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/**********************************************************************
2+
Copyright (c) 2025 ZTE Corporation.
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions
6+
are met:
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
* Neither the name of ZTE Corporation nor the names of its
14+
contributors may be used to endorse or promote products derived
15+
from this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
**********************************************************************/
29+
#if HAVE_RVV
30+
.text
31+
.align 3
32+
.option arch, +v
33+
.globl adler32_rvv128
34+
.type adler32_rvv128, @function
35+
adler32_rvv128:
36+
addi sp, sp, -32
37+
sd ra, 24(sp)
38+
sd s1, 16(sp)
39+
sd s2, 8(sp)
40+
sd s3, 0(sp)
41+
42+
li t3, 0x80078071 // Barrett reduction magic constant
43+
li t4, 65521
44+
slli s1, a0, 48
45+
srli s1, s1, 48 // s1: A = adler32 & 0xffff
46+
srliw s2, a0, 16 // s2: B = adler32 >> 16
47+
add s3, a1, a2 // s3 = end
48+
li t0, 32
49+
bltu a2, t0, tail_bytes
50+
51+
vsetvli zero, t0, e8, m2, ta, ma
52+
la t6, factors
53+
vle8.v v0, (t6)
54+
vmv.v.i v4, 0
55+
vmv.v.i v8, 0
56+
srli t1, a2, 5 // t1 = length / 32
57+
58+
outer_loop:
59+
beqz t1, tail_bytes
60+
li t2, 173
61+
bgeu t1, t2, 1f
62+
mv t2, t1
63+
1:
64+
mv a3, s1
65+
mv a4, s2
66+
slli t6, t2, 5
67+
add t6, a1, t6
68+
69+
inner_loop:
70+
vle8.v v2, (a1)
71+
addi a1, a1, 32
72+
slli a5, a3, 5
73+
add a4, a4, a5
74+
vwredsumu.vs v12, v2, v4
75+
vwmulu.vv v16, v2, v0
76+
77+
vsetvli zero, t0, e16, m4, ta, ma
78+
vmv.x.s a6, v12
79+
add a3, a3, a6
80+
vwredsumu.vs v20, v16, v8
81+
82+
vsetvli zero, t0, e32, m4, ta, ma
83+
vmv.x.s a6, v20
84+
add a4, a4, a6 // B += weighted_sum
85+
86+
vsetvli zero, t0, e8, m2, ta, ma
87+
bne a1, t6, inner_loop
88+
89+
mv s1, a3
90+
mv s2, a4
91+
mul a5, s1, t3
92+
srli a5, a5, 47
93+
mul a6, a5, t4
94+
sub s1, s1, a6 // A %= 65521
95+
mul a5, s2, t3
96+
srli a5, a5, 47
97+
mul a6, a5, t4
98+
sub s2, s2, a6 // B %= 65521
99+
sub t1, t1, t2
100+
j outer_loop
101+
102+
tail_bytes:
103+
andi a2, a2, 31
104+
beqz a2, finalize
105+
add a2, a1, a2
106+
107+
tail_loop:
108+
beq a1, a2, finalize
109+
lbu a3, 0(a1)
110+
add s1, s1, a3
111+
add s2, s2, s1
112+
addi a1, a1, 1
113+
j tail_loop
114+
115+
finalize:
116+
mul a3, s1, t3
117+
srli a3, a3, 47
118+
mul a4, a3, t4
119+
sub s1, s1, a4
120+
mul a3, s2, t3
121+
srli a3, a3, 47
122+
mul a4, a3, t4
123+
sub s2, s2, a4
124+
slli s2, s2, 16
125+
or a0, s1, s2
126+
127+
ld ra, 24(sp)
128+
ld s1, 16(sp)
129+
ld s2, 8(sp)
130+
ld s3, 0(sp)
131+
addi sp, sp, 32
132+
ret
133+
.size adler32_rvv128, .-adler32_rvv128
134+
135+
.section .rodata
136+
.align 4
137+
factors:
138+
.byte 32, 31, 30, 29, 28, 27, 26, 25
139+
.byte 24, 23, 22, 21, 20, 19, 18, 17
140+
.byte 16, 15, 14, 13, 12, 11, 10, 9
141+
.byte 8, 7, 6, 5, 4, 3, 2, 1
142+
#endif

igzip/riscv64/igzip_multibinary_riscv64_dispatcher.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,22 @@
3131
extern uint32_t
3232
adler32_rvv(uint32_t, uint8_t *, uint64_t);
3333
extern uint32_t
34+
adler32_rvv128(uint32_t, uint8_t *, uint64_t);
35+
extern uint32_t
3436
adler32_base(uint32_t, uint8_t *, uint64_t);
3537

3638
DEFINE_INTERFACE_DISPATCHER(isal_adler32)
3739
{
3840
#if HAVE_RVV
3941
const unsigned long hwcap = getauxval(AT_HWCAP);
40-
if (hwcap & HWCAP_RV('V'))
42+
if (hwcap & HWCAP_RV('V')) {
43+
unsigned long vlenb;
44+
__asm__ volatile ("csrr %0, vlenb" : "=r"(vlenb));
45+
if (vlenb == 16)
46+
return adler32_rvv128;
47+
else
4148
return adler32_rvv;
49+
}
4250
else
4351
#endif
4452
return adler32_base;

0 commit comments

Comments
 (0)