diff --git a/.clang-format-ignore b/.clang-format-ignore index e2f8c370..9fdf5928 100644 --- a/.clang-format-ignore +++ b/.clang-format-ignore @@ -1,3 +1,5 @@ include/aarch64_multibinary.h include/aarch64_label.h **/aarch64/*.h + +**/riscv64/*.h diff --git a/Makefile.am b/Makefile.am index ce22ebe4..4fdcec6a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -29,10 +29,12 @@ other_tests_x86_64= other_tests_x86_32= other_tests_aarch64= other_tests_ppc64le= +other_tests_riscv64= lsrc_x86_64= lsrc_x86_32= lsrc_aarch64= lsrc_ppc64le= +lsrc_riscv64= lsrc_base_aliases= lsrc32= unit_tests32= @@ -83,6 +85,12 @@ libisal_la_SOURCES += ${lsrc_ppc64le} other_tests += ${other_tests_ppc64le} endif +if CPU_RISCV64 +ARCH=-Driscv64 +libisal_la_SOURCES += ${lsrc_riscv64} +other_tests += ${other_tests_riscv64} +endif + if CPU_UNDEFINED libisal_la_SOURCES += ${lsrc_base_aliases} endif @@ -131,6 +139,9 @@ endif if CPU_AARCH64 as_filter = $(CC) -D__ASSEMBLY__ endif +if CPU_RISCV64 + as_filter = $(CC) -D__ASSEMBLY__ +endif CCAS = $(as_filter) EXTRA_DIST += tools/yasm-filter.sh tools/nasm-filter.sh @@ -142,6 +153,9 @@ AM_CCASFLAGS = ${AM_CFLAGS} else AM_CCASFLAGS = ${yasm_args} ${INCLUDE} ${src_include} ${DEFS} ${D} endif +if CPU_RISCV64 +AM_CCASFLAGS = ${AM_CFLAGS} +endif .asm.s: @echo " MKTMP " $@; diff --git a/configure.ac b/configure.ac index 72500eec..62aae78a 100644 --- a/configure.ac +++ b/configure.ac @@ -31,11 +31,13 @@ AS_CASE([$host_cpu], [arm64], [CPU="aarch64"], [powerpc64le], [CPU="ppc64le"], [ppc64le], [CPU="ppc64le"], + [riscv64], [CPU="riscv64"], ) AM_CONDITIONAL([CPU_X86_64], [test "$CPU" = "x86_64"]) AM_CONDITIONAL([CPU_X86_32], [test "$CPU" = "x86_32"]) AM_CONDITIONAL([CPU_AARCH64], [test "$CPU" = "aarch64"]) AM_CONDITIONAL([CPU_PPC64LE], [test "$CPU" = "ppc64le"]) +AM_CONDITIONAL([CPU_RISCV64], [test "$CPU" = "riscv64"]) AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"]) if test "$CPU" = "x86_64"; then diff --git a/crc/Makefile.am b/crc/Makefile.am index 6aed74d9..5264e005 100644 --- a/crc/Makefile.am +++ b/crc/Makefile.am @@ -28,6 +28,7 @@ ######################################################################## include crc/aarch64/Makefile.am +include crc/riscv64/Makefile.am lsrc += \ crc/crc_base.c \ diff --git a/crc/riscv64/Makefile.am b/crc/riscv64/Makefile.am new file mode 100644 index 00000000..b2ea4573 --- /dev/null +++ b/crc/riscv64/Makefile.am @@ -0,0 +1,43 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc_riscv64 += \ + crc/riscv64/crc16_t10dif.S \ + crc/riscv64/crc32_gzip_refl.S \ + crc/riscv64/crc32_ieee.S \ + crc/riscv64/crc32_iscsi.S \ + crc/riscv64/crc64_ecma_norm.S \ + crc/riscv64/crc64_ecma_refl.S \ + crc/riscv64/crc64_iso_norm.S \ + crc/riscv64/crc64_iso_refl.S \ + crc/riscv64/crc64_jones_norm.S \ + crc/riscv64/crc64_jones_refl.S \ + crc/riscv64/crc64_rocksoft_norm.S \ + crc/riscv64/crc64_rocksoft_refl.S + diff --git a/crc/riscv64/crc16_t10dif.S b/crc/riscv64/crc16_t10dif.S new file mode 100644 index 00000000..c9dd1098 --- /dev/null +++ b/crc/riscv64/crc16_t10dif.S @@ -0,0 +1,97 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc32_norm_common_clmul.h" + +/* uint16_t crc16_t10dif(uint16_t init_crc, uint8_t *buf, uint64_t len) */ +.text +.align 1 +.global crc16_t10dif +.type crc16_t10dif, %function +crc16_t10dif: + /* load precomputed constants */ + ld POLY, .poly + ld MU, .mu + + /* shift 16-bit seed into upper nibble */ + slli SEED, SEED, 16 + + /* align and fold as though we're calculating a 32-bit crc */ + crc32_norm_align + + crc_fold_loop 32 1 0 + crc32_norm_fold_reduction + + crc32_norm_excess + + /* shift back down result */ + srli SEED, SEED, 16 + ret + +/* precomputed constants */ +.poly: + .dword 0x000000018bb70000 +.mu: + .dword 0x00000001f65a57f8 +.k1: + .dword 0x00000000371d0000 +.k2: + .dword 0x0000000087e70000 +.k3: + .dword 0x000000004c1a0000 +.k4: + .dword 0x00000000fb0b0000 +.k5: + .dword 0x000000002d560000 +.k6: + .dword 0x0000000013680000 + + +/* uint16_t crc16_t10dif_copy(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len) */ +/* in addition to calculating crc, also copies from src to dst */ +.text +.align 1 +.global crc16_t10dif_copy +.type crc16_t10dif_copy, %function +crc16_t10dif_copy: + beqz a3, .memcpy_done + add t0, a2, a3 + mv t1, a2 +.memcpy_loop: + lb t2, 0(t1) + sb t2, 0(a1) + addi t1, t1, 1 + addi a1, a1, 1 + bne t1, t0, .memcpy_loop + +.memcpy_done: + /* tail-call crc function */ + mv a1, a2 + mv a2, a3 + tail crc16_t10dif diff --git a/crc/riscv64/crc32_gzip_refl.S b/crc/riscv64/crc32_gzip_refl.S new file mode 100644 index 00000000..9241a72e --- /dev/null +++ b/crc/riscv64/crc32_gzip_refl.S @@ -0,0 +1,75 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc32_refl_common_clmul.h" + +/* uint32_t crc32_gzip_refl(uint32_t seed, uint8_t *buf, uint64_t len) */ +.text +.align 1 +.global crc32_gzip_refl +.type crc32_gzip_refl, %function +crc32_gzip_refl: + /* load precomputed constants */ + ld POLY, .poly_refl + ld MU, .mu + + /* invert and zero-extend seed */ + not SEED, SEED + slli SEED, SEED, 32 + srli SEED, SEED, 32 + + /* align buffer to 128-bits, then fold */ + crc32_refl_align + + crc_fold_loop 32 0 1 + crc32_refl_fold_reduction + + /* handle any excess */ + crc32_refl_excess + + /* sign-extend and reflect result */ + sext.w SEED, SEED + not SEED, SEED + ret + +/* precomputed constants */ +.poly_refl: + .dword 0x00000001db710641 +.mu: + .dword 0xb4e5b025f7011641 +.k1: + .dword 0x0000000154442bd4 +.k2: + .dword 0x00000001c6e41596 +.k3: + .dword 0x00000001751997d0 +.k4: + .dword 0x00000000ccaa009e +.k5: + .dword 0x0000000163cd6124 diff --git a/crc/riscv64/crc32_ieee.S b/crc/riscv64/crc32_ieee.S new file mode 100644 index 00000000..af6c2ef5 --- /dev/null +++ b/crc/riscv64/crc32_ieee.S @@ -0,0 +1,78 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc32_norm_common_clmul.h" + +/* uint32_t crc32_ieee(uint32_t init_crc, uint8_t *buf, uint64_t len) */ +.text +.align 1 +.global crc32_ieee +.type crc32_ieee, %function +crc32_ieee: + /* load precomputed constants */ + ld POLY, .poly + ld MU, .mu + + /* reflect and zero-extend seed (riscv calling convention has uint32_t + * passed in and returned sign-extended) + */ + not SEED, SEED + slli SEED, SEED, 32 + srli SEED, SEED, 32 + + /* align and fold buffer */ + crc32_norm_align + + crc_fold_loop 32 1 0 + crc32_norm_fold_reduction + + crc32_norm_excess + + /* sign-extend and reflect result */ + sext.w SEED, SEED + not SEED, SEED + ret + +/* precomputed constants */ +.poly: + .dword 0x0000000104c11db7 +.mu: + .dword 0x0000000104d101df +.k1: + .dword 0x000000008833794c +.k2: + .dword 0x00000000e6228b11 +.k3: + .dword 0x00000000c5b9cd4c +.k4: + .dword 0x00000000e8a45605 +.k5: + .dword 0x00000000f200aa66 +.k6: + .dword 0x00000000490d678d diff --git a/crc/riscv64/crc32_iscsi.S b/crc/riscv64/crc32_iscsi.S new file mode 100644 index 00000000..abcf14d5 --- /dev/null +++ b/crc/riscv64/crc32_iscsi.S @@ -0,0 +1,79 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc32_refl_common_clmul.h" + +/* uint32_t crc32_iscsi(uint8_t *buffer, int len, uint32_t init_crc) */ +.text +.align 1 +.global crc32_iscsi +.type crc32_iscsi, %function +crc32_iscsi: + /* switch around arguments to match common crc functions */ + mv t0, a2 + mv a2, a1 + mv a1, a0 + mv a0, t0 + + /* load precomputed constants */ + ld POLY, .poly_refl + ld MU, .mu + + /* zero-extend seed */ + slli SEED, SEED, 32 + srli SEED, SEED, 32 + + /* align buffer to 128-bits, then fold */ + crc32_refl_align + + crc_fold_loop 32 0 1 + crc32_refl_fold_reduction + + /* handle any remaining excess */ + crc32_refl_excess + + /* sign-extend result */ + sext.w SEED, SEED + ret + +/* precomputed constants */ +.poly_refl: + .dword 0x0000000105ec76f1 +.mu: + .dword 0x4869ec38dea713f1 +.k1: + .dword 0x00000000740eef02 +.k2: + .dword 0x000000009e4addf8 +.k3: + .dword 0x00000000f20c0dfe +.k4: + .dword 0x000000014cd00bd6 +.k5: + .dword 0x00000000dd45aab8 diff --git a/crc/riscv64/crc32_norm_common_clmul.h b/crc/riscv64/crc32_norm_common_clmul.h new file mode 100644 index 00000000..96dc1559 --- /dev/null +++ b/crc/riscv64/crc32_norm_common_clmul.h @@ -0,0 +1,198 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc_fold_common_clmul.h" + +/* folding final reduction */ +/* expects 128-bit value in HIGH:LOW (t0:t1), puts return value in SEED (a0) */ +/* trashes t2, t3, a5, a6 and t5, t6 */ +.macro crc32_norm_fold_reduction + /* precomputed constants */ + ld K5, .k5 + ld K6, .k6 + + /* fold remaining 128 bits into 96 */ + clmulh t2, K5, HIGH + clmul t3, K5, HIGH + srli a5, LOW, 32 + slli a6, LOW, 32 + xor HIGH, t2, a5 + xor LOW, t3, a6 + + /* fold remaining 96 bits into 64 */ + clmul t0, K6, t0 + xor t1, t1, t0 + + /* barrett's reduce the 64-bits */ + clmulh HIGH, LOW, MU + clmul HIGH, HIGH, POLY + xor SEED, HIGH, LOW + +.fold_1_done: +.endm + +/* barrett's reduction on a \bits bit-length value, returning result in seed */ +/* bits must be 32, 16 or 8 */ +/* expects SEED (a0), MU (a3) and POLY (a4) to hold corresponding values */ +/* value and seed must be zero-extended */ +/* trashes t0 and t1 */ +.macro crc32_norm_barrett_reduce value:req, bits:req + /* combine value with seed */ +.if (\bits < 32) + srli t0, SEED, (32 - \bits) + xor t0, t0, \value +.else + xor t0, SEED, \value +.endif + + slli t0, t0, 32 + clmulh t0, t0, MU + clmul t0, t0, POLY + + /* subtract seed from original for smaller sizes */ +.if (\bits < 32) + slli t1, SEED, \bits + xor t0, t0, t1 +.endif + + /* zero-extend 32-bit return value */ + slli t0, t0, 32 + srli SEED, t0, 32 +.endm + +/* align buffer to 64-bits, updating seed */ +/* expects SEED (a0), BUF (a1), LEN (a2), MU (a3), POLY (a4) to hold values */ +/* expects crc32_norm_excess to be called later */ +/* trashes t0 and t1 */ +.macro crc32_norm_align + /* is buffer already aligned? */ + and t0, BUF, 0b111 + beqz t0, .align_done + +.align_8: + /* is enough buffer left? */ + li t0, 1 + bltu LEN, t0, .excess_done + + /* is buffer misaligned by one byte? */ + andi t0, BUF, 0b001 + beqz t0, .align_16 + + /* perform barrett's reduction on one byte */ + lbu t1, (BUF) + crc32_norm_barrett_reduce t1, 8 + addi LEN, LEN, -1 + addi BUF, BUF, 1 + +.align_16: + li t0, 2 + bltu LEN, t0, .excess_8 + + andi t0, BUF, 0b010 + beqz t0, .align_32 + + /* byte reverse the next halfword */ + lhu t1, (BUF) + rev8 t1, t1 + srli t1, t1, 48 + + crc32_norm_barrett_reduce t1, 16 + addi LEN, LEN, -2 + addi BUF, BUF, 2 + +.align_32: + li t0, 4 + bltu LEN, t0, .excess_16 + + andi t0, BUF, 0b100 + beqz t0, .align_done + + /* byte reverse the next word */ + lwu t1, (BUF) + rev8 t1, t1 + srli t1, t1, 32 + + crc32_norm_barrett_reduce t1, 32 + addi LEN, LEN, -4 + addi BUF, BUF, 4 + +.align_done: +.endm + +/* barrett's reduce excess buffer left following fold */ +/* expects SEED (a0), BUF (a1), LEN (a2), MU (a3), POLY (a4) to hold values */ +/* expects less than 127 bits to be left in doubleword-aligned buffer */ +/* trashes t0, t1 and t3 */ +.macro crc32_norm_excess + /* is there any excess left? */ + beqz LEN, .excess_done + +.excess_64: + andi t0, LEN, 0b1000 + beqz t0, .excess_32 + /* read in 64-bits and perform two 32-bit reductions */ + ld t3, (BUF) + rev8 t3, t3 + srli t1, t3, 32 + crc32_norm_barrett_reduce t1, 32 + slli t3, t3, 32 + srli t1, t3, 32 + crc32_norm_barrett_reduce t1, 32 + addi BUF, BUF, 8 + +.excess_32: + andi t0, LEN, 0b0100 + beqz t0, .excess_16 + + lwu t1, (BUF) + rev8 t1, t1 + srli t1, t1, 32 + + crc32_norm_barrett_reduce t1, 32 + addi BUF, BUF, 4 + +.excess_16: + andi t0, LEN, 0b0010 + beqz t0, .excess_8 + + lhu t1, (BUF) + rev8 t1, t1 + srli t1, t1, 48 + + crc32_norm_barrett_reduce t1, 16 + addi BUF, BUF, 2 + +.excess_8: + andi t0, LEN, 0b0001 + beqz t0, .excess_done + lbu t1, (BUF) + crc32_norm_barrett_reduce t1, 8 + +.excess_done: +.endm diff --git a/crc/riscv64/crc32_refl_common_clmul.h b/crc/riscv64/crc32_refl_common_clmul.h new file mode 100644 index 00000000..70c1f647 --- /dev/null +++ b/crc/riscv64/crc32_refl_common_clmul.h @@ -0,0 +1,180 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc_fold_common_clmul.h" + +/* folding reflected final reduction */ +/* expects 128-bit value in HIGH:LOW (t0:t1), puts return value in SEED (a0) */ +/* trashes t2, t3, a5, a6 and t5, t6 */ +.macro crc32_refl_fold_reduction + /* load precalculated constants */ + ld K4, .k4 + ld K5, .k5 + + /* fold remaining 128 bits into 96 */ + clmul t3, K4, t0 + xor t1, t3, t1 + clmulh t0, K4, t0 + + /* high = (low >> 32) | (high << 32) */ + slli t0, t0, 32 + srli t3, t1, 32 + or t0, t0, t3 + + /* fold last 96 bits into 64 */ + slli t1, t1, 32 + srli t1, t1, 32 + clmul t1, K5, t1 + xor t1, t1, t0 + + /* barrett's reduce 64 bits */ + clmul t0, MU, t1 + slli t0, t0, 32 + srli t0, t0, 32 + clmul t0, POLY, t0 + xor t0, t1, t0 + srli SEED, t0, 32 + +.fold_1_done: +.endm + +/* barrett's reduction on a \bits bit-length value, returning result in seed */ +/* bits must be 64, 32, 16 or 8 */ +/* value and seed must be zero-extended */ +.macro barrett_reduce seed:req, value:req, bits:req + /* combine value with seed */ + xor t0, \seed, \value +.if (\bits < 64) + slli t0, t0, (64 - \bits) +.endif + + /* multiply by mu, which is 2^96 divided by our polynomial */ + clmul t0, t0, MU + +.if (\bits == 16) || (\bits == 8) + clmulh t0, t0, POLY + /* subtract from original for smaller sizes */ + srli t1, \seed, \bits + xor \seed, t0, t1 +.else + clmulh \seed, t0, POLY +.endif + +.endm + +/* align buffer to 64-bits updating seed */ +/* expects SEED (a0), BUF (a1), LEN (a2), MU (a3), POLY (a4) to hold values */ +/* expects crc32_refl_excess to be called later */ +/* trashes t0 and t1 */ +.macro crc32_refl_align + /* is buffer already aligned to 128-bits? */ + andi t0, BUF, 0b111 + beqz t0, .align_done + +.align_8: + /* is enough buffer left? */ + li t0, 1 + bltu LEN, t0, .excess_done + + /* is buffer misaligned by one byte? */ + andi t0, BUF, 0b001 + beqz t0, .align_16 + + /* perform barrett's reduction on one byte */ + lbu t1, (BUF) + barrett_reduce SEED, t1, 8 + addi LEN, LEN, -1 + addi BUF, BUF, 1 + +.align_16: + li t0, 2 + bltu LEN, t0, .excess_8 + + andi t0, BUF, 0b010 + beqz t0, .align_32 + + lhu t1, (BUF) + barrett_reduce SEED, t1, 16 + addi LEN, LEN, -2 + addi BUF, BUF, 2 + +.align_32: + li t0, 4 + bltu LEN, t0, .excess_16 + + andi t0, BUF, 0b100 + beqz t0, .align_done + + lwu t1, (BUF) + barrett_reduce SEED, t1, 32 + addi LEN, LEN, -4 + addi BUF, BUF, 4 + +.align_done: +.endm + +/* barrett's reduce excess buffer left following fold */ +/* expects SEED (a0), BUF (a1), LEN (a2), MU (a3), POLY (a4) to hold values */ +/* expects less than 127 bits to be left in doubleword-aligned buffer */ +/* trashes t0, t1 and t3 */ +.macro crc32_refl_excess + /* do we have any excess left? */ + beqz LEN, .excess_done + + /* barret's reduce the remaining excess */ + /* at most there is 127 bytes left */ +.excess_64: + andi t0, LEN, 0b1000 + beqz t0, .excess_32 + ld t1, (BUF) + barrett_reduce SEED, t1, 64 + addi BUF, BUF, 8 + +.excess_32: + andi t0, LEN, 0b0100 + beqz t0, .excess_16 + lwu t1, (BUF) + barrett_reduce SEED, t1, 32 + addi BUF, BUF, 4 + +.excess_16: + andi t0, LEN, 0b0010 + beqz t0, .excess_8 + lhu t1, (BUF) + barrett_reduce SEED, t1, 16 + addi BUF, BUF, 2 + +.excess_8: + andi t0, LEN, 0b0001 + beqz t0, .excess_done + lbu t1, (BUF) + barrett_reduce SEED, t1, 8 + +.excess_done: +.endm diff --git a/crc/riscv64/crc64_ecma_norm.S b/crc/riscv64/crc64_ecma_norm.S new file mode 100644 index 00000000..e7e0554f --- /dev/null +++ b/crc/riscv64/crc64_ecma_norm.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_norm_common_clmul.h" + +/* uint64_t crc64_ecma_norm(uint64_t init_crc, uint8_t *buf, uint64_t len) */ +crc64_func_norm crc64_ecma_norm + +/* precomputed folding constants */ +.poly: + .dword 0x42f0e1eba9ea3693 /* excludes leading 1 */ +.mu: + .dword 0x578d29d06cc4f872 /* excludes leading 1 */ +.k1: + .dword 0xddf4b6981205b83f +.k2: + .dword 0x5f6843ca540df020 +.k3: + .dword 0x4eb938a7d257740e +.k4: +.k5: + .dword 0x05f5c3c7eb52fab6 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x42f0e1eba9ea3693 + .dword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5 + .dword 0x493366450e42ecdf, 0x0bc387aea7a8da4c + .dword 0xccd2a5925d9681f9, 0x8e224479f47cb76a + .dword 0x9266cc8a1c85d9be, 0xd0962d61b56fef2d + .dword 0x17870f5d4f51b498, 0x5577eeb6e6bb820b + .dword 0xdb55aacf12c73561, 0x99a54b24bb2d03f2 + .dword 0x5eb4691841135847, 0x1c4488f3e8f96ed4 + .dword 0x663d78ff90e185ef, 0x24cd9914390bb37c + .dword 0xe3dcbb28c335e8c9, 0xa12c5ac36adfde5a + .dword 0x2f0e1eba9ea36930, 0x6dfeff5137495fa3 + .dword 0xaaefdd6dcd770416, 0xe81f3c86649d3285 + .dword 0xf45bb4758c645c51, 0xb6ab559e258e6ac2 + .dword 0x71ba77a2dfb03177, 0x334a9649765a07e4 + .dword 0xbd68d2308226b08e, 0xff9833db2bcc861d + .dword 0x388911e7d1f2dda8, 0x7a79f00c7818eb3b + .dword 0xcc7af1ff21c30bde, 0x8e8a101488293d4d + .dword 0x499b3228721766f8, 0x0b6bd3c3dbfd506b + .dword 0x854997ba2f81e701, 0xc7b97651866bd192 + .dword 0x00a8546d7c558a27, 0x4258b586d5bfbcb4 + .dword 0x5e1c3d753d46d260, 0x1cecdc9e94ace4f3 + .dword 0xdbfdfea26e92bf46, 0x990d1f49c77889d5 + .dword 0x172f5b3033043ebf, 0x55dfbadb9aee082c + .dword 0x92ce98e760d05399, 0xd03e790cc93a650a + .dword 0xaa478900b1228e31, 0xe8b768eb18c8b8a2 + .dword 0x2fa64ad7e2f6e317, 0x6d56ab3c4b1cd584 + .dword 0xe374ef45bf6062ee, 0xa1840eae168a547d + .dword 0x66952c92ecb40fc8, 0x2465cd79455e395b + .dword 0x3821458aada7578f, 0x7ad1a461044d611c + .dword 0xbdc0865dfe733aa9, 0xff3067b657990c3a + .dword 0x711223cfa3e5bb50, 0x33e2c2240a0f8dc3 + .dword 0xf4f3e018f031d676, 0xb60301f359dbe0e5 + .dword 0xda050215ea6c212f, 0x98f5e3fe438617bc + .dword 0x5fe4c1c2b9b84c09, 0x1d14202910527a9a + .dword 0x93366450e42ecdf0, 0xd1c685bb4dc4fb63 + .dword 0x16d7a787b7faa0d6, 0x5427466c1e109645 + .dword 0x4863ce9ff6e9f891, 0x0a932f745f03ce02 + .dword 0xcd820d48a53d95b7, 0x8f72eca30cd7a324 + .dword 0x0150a8daf8ab144e, 0x43a04931514122dd + .dword 0x84b16b0dab7f7968, 0xc6418ae602954ffb + .dword 0xbc387aea7a8da4c0, 0xfec89b01d3679253 + .dword 0x39d9b93d2959c9e6, 0x7b2958d680b3ff75 + .dword 0xf50b1caf74cf481f, 0xb7fbfd44dd257e8c + .dword 0x70eadf78271b2539, 0x321a3e938ef113aa + .dword 0x2e5eb66066087d7e, 0x6cae578bcfe24bed + .dword 0xabbf75b735dc1058, 0xe94f945c9c3626cb + .dword 0x676dd025684a91a1, 0x259d31cec1a0a732 + .dword 0xe28c13f23b9efc87, 0xa07cf2199274ca14 + .dword 0x167ff3eacbaf2af1, 0x548f120162451c62 + .dword 0x939e303d987b47d7, 0xd16ed1d631917144 + .dword 0x5f4c95afc5edc62e, 0x1dbc74446c07f0bd + .dword 0xdaad56789639ab08, 0x985db7933fd39d9b + .dword 0x84193f60d72af34f, 0xc6e9de8b7ec0c5dc + .dword 0x01f8fcb784fe9e69, 0x43081d5c2d14a8fa + .dword 0xcd2a5925d9681f90, 0x8fdab8ce70822903 + .dword 0x48cb9af28abc72b6, 0x0a3b7b1923564425 + .dword 0x70428b155b4eaf1e, 0x32b26afef2a4998d + .dword 0xf5a348c2089ac238, 0xb753a929a170f4ab + .dword 0x3971ed50550c43c1, 0x7b810cbbfce67552 + .dword 0xbc902e8706d82ee7, 0xfe60cf6caf321874 + .dword 0xe224479f47cb76a0, 0xa0d4a674ee214033 + .dword 0x67c58448141f1b86, 0x253565a3bdf52d15 + .dword 0xab1721da49899a7f, 0xe9e7c031e063acec + .dword 0x2ef6e20d1a5df759, 0x6c0603e6b3b7c1ca + .dword 0xf6fae5c07d3274cd, 0xb40a042bd4d8425e + .dword 0x731b26172ee619eb, 0x31ebc7fc870c2f78 + .dword 0xbfc9838573709812, 0xfd39626eda9aae81 + .dword 0x3a28405220a4f534, 0x78d8a1b9894ec3a7 + .dword 0x649c294a61b7ad73, 0x266cc8a1c85d9be0 + .dword 0xe17dea9d3263c055, 0xa38d0b769b89f6c6 + .dword 0x2daf4f0f6ff541ac, 0x6f5faee4c61f773f + .dword 0xa84e8cd83c212c8a, 0xeabe6d3395cb1a19 + .dword 0x90c79d3fedd3f122, 0xd2377cd44439c7b1 + .dword 0x15265ee8be079c04, 0x57d6bf0317edaa97 + .dword 0xd9f4fb7ae3911dfd, 0x9b041a914a7b2b6e + .dword 0x5c1538adb04570db, 0x1ee5d94619af4648 + .dword 0x02a151b5f156289c, 0x4051b05e58bc1e0f + .dword 0x87409262a28245ba, 0xc5b073890b687329 + .dword 0x4b9237f0ff14c443, 0x0962d61b56fef2d0 + .dword 0xce73f427acc0a965, 0x8c8315cc052a9ff6 + .dword 0x3a80143f5cf17f13, 0x7870f5d4f51b4980 + .dword 0xbf61d7e80f251235, 0xfd913603a6cf24a6 + .dword 0x73b3727a52b393cc, 0x31439391fb59a55f + .dword 0xf652b1ad0167feea, 0xb4a25046a88dc879 + .dword 0xa8e6d8b54074a6ad, 0xea16395ee99e903e + .dword 0x2d071b6213a0cb8b, 0x6ff7fa89ba4afd18 + .dword 0xe1d5bef04e364a72, 0xa3255f1be7dc7ce1 + .dword 0x64347d271de22754, 0x26c49cccb40811c7 + .dword 0x5cbd6cc0cc10fafc, 0x1e4d8d2b65facc6f + .dword 0xd95caf179fc497da, 0x9bac4efc362ea149 + .dword 0x158e0a85c2521623, 0x577eeb6e6bb820b0 + .dword 0x906fc95291867b05, 0xd29f28b9386c4d96 + .dword 0xcedba04ad0952342, 0x8c2b41a1797f15d1 + .dword 0x4b3a639d83414e64, 0x09ca82762aab78f7 + .dword 0x87e8c60fded7cf9d, 0xc51827e4773df90e + .dword 0x020905d88d03a2bb, 0x40f9e43324e99428 + .dword 0x2cffe7d5975e55e2, 0x6e0f063e3eb46371 + .dword 0xa91e2402c48a38c4, 0xebeec5e96d600e57 + .dword 0x65cc8190991cb93d, 0x273c607b30f68fae + .dword 0xe02d4247cac8d41b, 0xa2dda3ac6322e288 + .dword 0xbe992b5f8bdb8c5c, 0xfc69cab42231bacf + .dword 0x3b78e888d80fe17a, 0x7988096371e5d7e9 + .dword 0xf7aa4d1a85996083, 0xb55aacf12c735610 + .dword 0x724b8ecdd64d0da5, 0x30bb6f267fa73b36 + .dword 0x4ac29f2a07bfd00d, 0x08327ec1ae55e69e + .dword 0xcf235cfd546bbd2b, 0x8dd3bd16fd818bb8 + .dword 0x03f1f96f09fd3cd2, 0x41011884a0170a41 + .dword 0x86103ab85a2951f4, 0xc4e0db53f3c36767 + .dword 0xd8a453a01b3a09b3, 0x9a54b24bb2d03f20 + .dword 0x5d45907748ee6495, 0x1fb5719ce1045206 + .dword 0x919735e51578e56c, 0xd367d40ebc92d3ff + .dword 0x1476f63246ac884a, 0x568617d9ef46bed9 + .dword 0xe085162ab69d5e3c, 0xa275f7c11f7768af + .dword 0x6564d5fde549331a, 0x279434164ca30589 + .dword 0xa9b6706fb8dfb2e3, 0xeb46918411358470 + .dword 0x2c57b3b8eb0bdfc5, 0x6ea7525342e1e956 + .dword 0x72e3daa0aa188782, 0x30133b4b03f2b111 + .dword 0xf7021977f9cceaa4, 0xb5f2f89c5026dc37 + .dword 0x3bd0bce5a45a6b5d, 0x79205d0e0db05dce + .dword 0xbe317f32f78e067b, 0xfcc19ed95e6430e8 + .dword 0x86b86ed5267cdbd3, 0xc4488f3e8f96ed40 + .dword 0x0359ad0275a8b6f5, 0x41a94ce9dc428066 + .dword 0xcf8b0890283e370c, 0x8d7be97b81d4019f + .dword 0x4a6acb477bea5a2a, 0x089a2aacd2006cb9 + .dword 0x14dea25f3af9026d, 0x562e43b4931334fe + .dword 0x913f6188692d6f4b, 0xd3cf8063c0c759d8 + .dword 0x5dedc41a34bbeeb2, 0x1f1d25f19d51d821 + .dword 0xd80c07cd676f8394, 0x9afce626ce85b507 diff --git a/crc/riscv64/crc64_ecma_refl.S b/crc/riscv64/crc64_ecma_refl.S new file mode 100644 index 00000000..4efa22f7 --- /dev/null +++ b/crc/riscv64/crc64_ecma_refl.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_refl_common_clmul.h" + +/* uint64_t crc64_ecma_refl(uint64_t init_crc, uint8_t *buf, unt64_t len) */ +crc64_func_refl crc64_ecma_refl + +/* precomputed folding constants */ +.poly: + .dword 0x92d8af2baf0e1e85 /* poly reflected, excluding leading 1 */ +.mu: + .dword 0x9c3e466c172963d5 +.k1: + .dword 0x6ae3efbb9dd441f3 +.k2: + .dword 0x081f6054a7842df4 +.k3: + .dword 0xe05dd497ca393ae4 +.k4: +.k5: + .dword 0xdabe95afc7875f40 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0xb32e4cbe03a75f6f + .dword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34 + .dword 0x7bd0c384ff8f5e33, 0xc8fe8f3afc28015c + .dword 0x8f54f5d357cffe68, 0x3c7ab96d5468a107 + .dword 0xf7a18709ff1ebc66, 0x448fcbb7fcb9e309 + .dword 0x0325b15e575e1c3d, 0xb00bfde054f94352 + .dword 0x8c71448d0091e255, 0x3f5f08330336bd3a + .dword 0x78f572daa8d1420e, 0xcbdb3e64ab761d61 + .dword 0x7d9ba13851336649, 0xceb5ed8652943926 + .dword 0x891f976ff973c612, 0x3a31dbd1fad4997d + .dword 0x064b62bcaebc387a, 0xb5652e02ad1b6715 + .dword 0xf2cf54eb06fc9821, 0x41e11855055bc74e + .dword 0x8a3a2631ae2dda2f, 0x39146a8fad8a8540 + .dword 0x7ebe1066066d7a74, 0xcd905cd805ca251b + .dword 0xf1eae5b551a2841c, 0x42c4a90b5205db73 + .dword 0x056ed3e2f9e22447, 0xb6409f5cfa457b28 + .dword 0xfb374270a266cc92, 0x48190ecea1c193fd + .dword 0x0fb374270a266cc9, 0xbc9d3899098133a6 + .dword 0x80e781f45de992a1, 0x33c9cd4a5e4ecdce + .dword 0x7463b7a3f5a932fa, 0xc74dfb1df60e6d95 + .dword 0x0c96c5795d7870f4, 0xbfb889c75edf2f9b + .dword 0xf812f32ef538d0af, 0x4b3cbf90f69f8fc0 + .dword 0x774606fda2f72ec7, 0xc4684a43a15071a8 + .dword 0x83c230aa0ab78e9c, 0x30ec7c140910d1f3 + .dword 0x86ace348f355aadb, 0x3582aff6f0f2f5b4 + .dword 0x7228d51f5b150a80, 0xc10699a158b255ef + .dword 0xfd7c20cc0cdaf4e8, 0x4e526c720f7dab87 + .dword 0x09f8169ba49a54b3, 0xbad65a25a73d0bdc + .dword 0x710d64410c4b16bd, 0xc22328ff0fec49d2 + .dword 0x85895216a40bb6e6, 0x36a71ea8a7ace989 + .dword 0x0adda7c5f3c4488e, 0xb9f3eb7bf06317e1 + .dword 0xfe5991925b84e8d5, 0x4d77dd2c5823b7ba + .dword 0x64b62bcaebc387a1, 0xd7986774e864d8ce + .dword 0x90321d9d438327fa, 0x231c512340247895 + .dword 0x1f66e84e144cd992, 0xac48a4f017eb86fd + .dword 0xebe2de19bc0c79c9, 0x58cc92a7bfab26a6 + .dword 0x9317acc314dd3bc7, 0x2039e07d177a64a8 + .dword 0x67939a94bc9d9b9c, 0xd4bdd62abf3ac4f3 + .dword 0xe8c76f47eb5265f4, 0x5be923f9e8f53a9b + .dword 0x1c4359104312c5af, 0xaf6d15ae40b59ac0 + .dword 0x192d8af2baf0e1e8, 0xaa03c64cb957be87 + .dword 0xeda9bca512b041b3, 0x5e87f01b11171edc + .dword 0x62fd4976457fbfdb, 0xd1d305c846d8e0b4 + .dword 0x96797f21ed3f1f80, 0x2557339fee9840ef + .dword 0xee8c0dfb45ee5d8e, 0x5da24145464902e1 + .dword 0x1a083bacedaefdd5, 0xa9267712ee09a2ba + .dword 0x955cce7fba6103bd, 0x267282c1b9c65cd2 + .dword 0x61d8f8281221a3e6, 0xd2f6b4961186fc89 + .dword 0x9f8169ba49a54b33, 0x2caf25044a02145c + .dword 0x6b055fede1e5eb68, 0xd82b1353e242b407 + .dword 0xe451aa3eb62a1500, 0x577fe680b58d4a6f + .dword 0x10d59c691e6ab55b, 0xa3fbd0d71dcdea34 + .dword 0x6820eeb3b6bbf755, 0xdb0ea20db51ca83a + .dword 0x9ca4d8e41efb570e, 0x2f8a945a1d5c0861 + .dword 0x13f02d374934a966, 0xa0de61894a93f609 + .dword 0xe7741b60e174093d, 0x545a57dee2d35652 + .dword 0xe21ac88218962d7a, 0x5134843c1b317215 + .dword 0x169efed5b0d68d21, 0xa5b0b26bb371d24e + .dword 0x99ca0b06e7197349, 0x2ae447b8e4be2c26 + .dword 0x6d4e3d514f59d312, 0xde6071ef4cfe8c7d + .dword 0x15bb4f8be788911c, 0xa6950335e42fce73 + .dword 0xe13f79dc4fc83147, 0x521135624c6f6e28 + .dword 0x6e6b8c0f1807cf2f, 0xdd45c0b11ba09040 + .dword 0x9aefba58b0476f74, 0x29c1f6e6b3e0301b + .dword 0xc96c5795d7870f42, 0x7a421b2bd420502d + .dword 0x3de861c27fc7af19, 0x8ec62d7c7c60f076 + .dword 0xb2bc941128085171, 0x0192d8af2baf0e1e + .dword 0x4638a2468048f12a, 0xf516eef883efae45 + .dword 0x3ecdd09c2899b324, 0x8de39c222b3eec4b + .dword 0xca49e6cb80d9137f, 0x7967aa75837e4c10 + .dword 0x451d1318d716ed17, 0xf6335fa6d4b1b278 + .dword 0xb199254f7f564d4c, 0x02b769f17cf11223 + .dword 0xb4f7f6ad86b4690b, 0x07d9ba1385133664 + .dword 0x4073c0fa2ef4c950, 0xf35d8c442d53963f + .dword 0xcf273529793b3738, 0x7c0979977a9c6857 + .dword 0x3ba3037ed17b9763, 0x888d4fc0d2dcc80c + .dword 0x435671a479aad56d, 0xf0783d1a7a0d8a02 + .dword 0xb7d247f3d1ea7536, 0x04fc0b4dd24d2a59 + .dword 0x3886b22086258b5e, 0x8ba8fe9e8582d431 + .dword 0xcc0284772e652b05, 0x7f2cc8c92dc2746a + .dword 0x325b15e575e1c3d0, 0x8175595b76469cbf + .dword 0xc6df23b2dda1638b, 0x75f16f0cde063ce4 + .dword 0x498bd6618a6e9de3, 0xfaa59adf89c9c28c + .dword 0xbd0fe036222e3db8, 0x0e21ac88218962d7 + .dword 0xc5fa92ec8aff7fb6, 0x76d4de52895820d9 + .dword 0x317ea4bb22bfdfed, 0x8250e80521188082 + .dword 0xbe2a516875702185, 0x0d041dd676d77eea + .dword 0x4aae673fdd3081de, 0xf9802b81de97deb1 + .dword 0x4fc0b4dd24d2a599, 0xfceef8632775faf6 + .dword 0xbb44828a8c9205c2, 0x086ace348f355aad + .dword 0x34107759db5dfbaa, 0x873e3be7d8faa4c5 + .dword 0xc094410e731d5bf1, 0x73ba0db070ba049e + .dword 0xb86133d4dbcc19ff, 0x0b4f7f6ad86b4690 + .dword 0x4ce50583738cb9a4, 0xffcb493d702be6cb + .dword 0xc3b1f050244347cc, 0x709fbcee27e418a3 + .dword 0x3735c6078c03e797, 0x841b8ab98fa4b8f8 + .dword 0xadda7c5f3c4488e3, 0x1ef430e13fe3d78c + .dword 0x595e4a08940428b8, 0xea7006b697a377d7 + .dword 0xd60abfdbc3cbd6d0, 0x6524f365c06c89bf + .dword 0x228e898c6b8b768b, 0x91a0c532682c29e4 + .dword 0x5a7bfb56c35a3485, 0xe955b7e8c0fd6bea + .dword 0xaeffcd016b1a94de, 0x1dd181bf68bdcbb1 + .dword 0x21ab38d23cd56ab6, 0x9285746c3f7235d9 + .dword 0xd52f0e859495caed, 0x6601423b97329582 + .dword 0xd041dd676d77eeaa, 0x636f91d96ed0b1c5 + .dword 0x24c5eb30c5374ef1, 0x97eba78ec690119e + .dword 0xab911ee392f8b099, 0x18bf525d915feff6 + .dword 0x5f1528b43ab810c2, 0xec3b640a391f4fad + .dword 0x27e05a6e926952cc, 0x94ce16d091ce0da3 + .dword 0xd3646c393a29f297, 0x604a2087398eadf8 + .dword 0x5c3099ea6de60cff, 0xef1ed5546e415390 + .dword 0xa8b4afbdc5a6aca4, 0x1b9ae303c601f3cb + .dword 0x56ed3e2f9e224471, 0xe5c372919d851b1e + .dword 0xa26908783662e42a, 0x114744c635c5bb45 + .dword 0x2d3dfdab61ad1a42, 0x9e13b115620a452d + .dword 0xd9b9cbfcc9edba19, 0x6a978742ca4ae576 + .dword 0xa14cb926613cf817, 0x1262f598629ba778 + .dword 0x55c88f71c97c584c, 0xe6e6c3cfcadb0723 + .dword 0xda9c7aa29eb3a624, 0x69b2361c9d14f94b + .dword 0x2e184cf536f3067f, 0x9d36004b35545910 + .dword 0x2b769f17cf112238, 0x9858d3a9ccb67d57 + .dword 0xdff2a94067518263, 0x6cdce5fe64f6dd0c + .dword 0x50a65c93309e7c0b, 0xe388102d33392364 + .dword 0xa4226ac498dedc50, 0x170c267a9b79833f + .dword 0xdcd7181e300f9e5e, 0x6ff954a033a8c131 + .dword 0x28532e49984f3e05, 0x9b7d62f79be8616a + .dword 0xa707db9acf80c06d, 0x14299724cc279f02 + .dword 0x5383edcd67c06036, 0xe0ada17364673f59 diff --git a/crc/riscv64/crc64_iso_norm.S b/crc/riscv64/crc64_iso_norm.S new file mode 100644 index 00000000..435851ce --- /dev/null +++ b/crc/riscv64/crc64_iso_norm.S @@ -0,0 +1,178 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_norm_common_clmul.h" + +/* uint64_t crc64_iso_norm(uint64_t init_crc, uint8_t *buf, uint64_t len) */ +crc64_func_norm crc64_iso_norm + +/* precomputed folding constants */ +.poly: +.mu: + .dword 0x000000000000001b /* excludes leading 1 */ +.k1: + .dword 0x0000001b1b001b1b +.k2: + .dword 0x0000000101000101 +.k3: + .dword 0x0000000000001db7 +.k4: +.k5: + .dword 0x0000000000000145 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x000000000000001b + .dword 0x0000000000000036, 0x000000000000002d + .dword 0x000000000000006c, 0x0000000000000077 + .dword 0x000000000000005a, 0x0000000000000041 + .dword 0x00000000000000d8, 0x00000000000000c3 + .dword 0x00000000000000ee, 0x00000000000000f5 + .dword 0x00000000000000b4, 0x00000000000000af + .dword 0x0000000000000082, 0x0000000000000099 + .dword 0x00000000000001b0, 0x00000000000001ab + .dword 0x0000000000000186, 0x000000000000019d + .dword 0x00000000000001dc, 0x00000000000001c7 + .dword 0x00000000000001ea, 0x00000000000001f1 + .dword 0x0000000000000168, 0x0000000000000173 + .dword 0x000000000000015e, 0x0000000000000145 + .dword 0x0000000000000104, 0x000000000000011f + .dword 0x0000000000000132, 0x0000000000000129 + .dword 0x0000000000000360, 0x000000000000037b + .dword 0x0000000000000356, 0x000000000000034d + .dword 0x000000000000030c, 0x0000000000000317 + .dword 0x000000000000033a, 0x0000000000000321 + .dword 0x00000000000003b8, 0x00000000000003a3 + .dword 0x000000000000038e, 0x0000000000000395 + .dword 0x00000000000003d4, 0x00000000000003cf + .dword 0x00000000000003e2, 0x00000000000003f9 + .dword 0x00000000000002d0, 0x00000000000002cb + .dword 0x00000000000002e6, 0x00000000000002fd + .dword 0x00000000000002bc, 0x00000000000002a7 + .dword 0x000000000000028a, 0x0000000000000291 + .dword 0x0000000000000208, 0x0000000000000213 + .dword 0x000000000000023e, 0x0000000000000225 + .dword 0x0000000000000264, 0x000000000000027f + .dword 0x0000000000000252, 0x0000000000000249 + .dword 0x00000000000006c0, 0x00000000000006db + .dword 0x00000000000006f6, 0x00000000000006ed + .dword 0x00000000000006ac, 0x00000000000006b7 + .dword 0x000000000000069a, 0x0000000000000681 + .dword 0x0000000000000618, 0x0000000000000603 + .dword 0x000000000000062e, 0x0000000000000635 + .dword 0x0000000000000674, 0x000000000000066f + .dword 0x0000000000000642, 0x0000000000000659 + .dword 0x0000000000000770, 0x000000000000076b + .dword 0x0000000000000746, 0x000000000000075d + .dword 0x000000000000071c, 0x0000000000000707 + .dword 0x000000000000072a, 0x0000000000000731 + .dword 0x00000000000007a8, 0x00000000000007b3 + .dword 0x000000000000079e, 0x0000000000000785 + .dword 0x00000000000007c4, 0x00000000000007df + .dword 0x00000000000007f2, 0x00000000000007e9 + .dword 0x00000000000005a0, 0x00000000000005bb + .dword 0x0000000000000596, 0x000000000000058d + .dword 0x00000000000005cc, 0x00000000000005d7 + .dword 0x00000000000005fa, 0x00000000000005e1 + .dword 0x0000000000000578, 0x0000000000000563 + .dword 0x000000000000054e, 0x0000000000000555 + .dword 0x0000000000000514, 0x000000000000050f + .dword 0x0000000000000522, 0x0000000000000539 + .dword 0x0000000000000410, 0x000000000000040b + .dword 0x0000000000000426, 0x000000000000043d + .dword 0x000000000000047c, 0x0000000000000467 + .dword 0x000000000000044a, 0x0000000000000451 + .dword 0x00000000000004c8, 0x00000000000004d3 + .dword 0x00000000000004fe, 0x00000000000004e5 + .dword 0x00000000000004a4, 0x00000000000004bf + .dword 0x0000000000000492, 0x0000000000000489 + .dword 0x0000000000000d80, 0x0000000000000d9b + .dword 0x0000000000000db6, 0x0000000000000dad + .dword 0x0000000000000dec, 0x0000000000000df7 + .dword 0x0000000000000dda, 0x0000000000000dc1 + .dword 0x0000000000000d58, 0x0000000000000d43 + .dword 0x0000000000000d6e, 0x0000000000000d75 + .dword 0x0000000000000d34, 0x0000000000000d2f + .dword 0x0000000000000d02, 0x0000000000000d19 + .dword 0x0000000000000c30, 0x0000000000000c2b + .dword 0x0000000000000c06, 0x0000000000000c1d + .dword 0x0000000000000c5c, 0x0000000000000c47 + .dword 0x0000000000000c6a, 0x0000000000000c71 + .dword 0x0000000000000ce8, 0x0000000000000cf3 + .dword 0x0000000000000cde, 0x0000000000000cc5 + .dword 0x0000000000000c84, 0x0000000000000c9f + .dword 0x0000000000000cb2, 0x0000000000000ca9 + .dword 0x0000000000000ee0, 0x0000000000000efb + .dword 0x0000000000000ed6, 0x0000000000000ecd + .dword 0x0000000000000e8c, 0x0000000000000e97 + .dword 0x0000000000000eba, 0x0000000000000ea1 + .dword 0x0000000000000e38, 0x0000000000000e23 + .dword 0x0000000000000e0e, 0x0000000000000e15 + .dword 0x0000000000000e54, 0x0000000000000e4f + .dword 0x0000000000000e62, 0x0000000000000e79 + .dword 0x0000000000000f50, 0x0000000000000f4b + .dword 0x0000000000000f66, 0x0000000000000f7d + .dword 0x0000000000000f3c, 0x0000000000000f27 + .dword 0x0000000000000f0a, 0x0000000000000f11 + .dword 0x0000000000000f88, 0x0000000000000f93 + .dword 0x0000000000000fbe, 0x0000000000000fa5 + .dword 0x0000000000000fe4, 0x0000000000000fff + .dword 0x0000000000000fd2, 0x0000000000000fc9 + .dword 0x0000000000000b40, 0x0000000000000b5b + .dword 0x0000000000000b76, 0x0000000000000b6d + .dword 0x0000000000000b2c, 0x0000000000000b37 + .dword 0x0000000000000b1a, 0x0000000000000b01 + .dword 0x0000000000000b98, 0x0000000000000b83 + .dword 0x0000000000000bae, 0x0000000000000bb5 + .dword 0x0000000000000bf4, 0x0000000000000bef + .dword 0x0000000000000bc2, 0x0000000000000bd9 + .dword 0x0000000000000af0, 0x0000000000000aeb + .dword 0x0000000000000ac6, 0x0000000000000add + .dword 0x0000000000000a9c, 0x0000000000000a87 + .dword 0x0000000000000aaa, 0x0000000000000ab1 + .dword 0x0000000000000a28, 0x0000000000000a33 + .dword 0x0000000000000a1e, 0x0000000000000a05 + .dword 0x0000000000000a44, 0x0000000000000a5f + .dword 0x0000000000000a72, 0x0000000000000a69 + .dword 0x0000000000000820, 0x000000000000083b + .dword 0x0000000000000816, 0x000000000000080d + .dword 0x000000000000084c, 0x0000000000000857 + .dword 0x000000000000087a, 0x0000000000000861 + .dword 0x00000000000008f8, 0x00000000000008e3 + .dword 0x00000000000008ce, 0x00000000000008d5 + .dword 0x0000000000000894, 0x000000000000088f + .dword 0x00000000000008a2, 0x00000000000008b9 + .dword 0x0000000000000990, 0x000000000000098b + .dword 0x00000000000009a6, 0x00000000000009bd + .dword 0x00000000000009fc, 0x00000000000009e7 + .dword 0x00000000000009ca, 0x00000000000009d1 + .dword 0x0000000000000948, 0x0000000000000953 + .dword 0x000000000000097e, 0x0000000000000965 + .dword 0x0000000000000924, 0x000000000000093f + .dword 0x0000000000000912, 0x0000000000000909 diff --git a/crc/riscv64/crc64_iso_refl.S b/crc/riscv64/crc64_iso_refl.S new file mode 100644 index 00000000..3f88dfc2 --- /dev/null +++ b/crc/riscv64/crc64_iso_refl.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_refl_common_clmul.h" + +/* uint64_t crc64_iso_refl(uint64_t init_crc, uint8_t *buf, unt64_t len) */ +crc64_func_refl crc64_iso_refl + +/* precomputed folding constants */ +.poly: + .dword 0xb000000000000001 /* poly reflected, excluding leading 1 */ +.mu: + .dword 0xb000000000000001 +.k1: + .dword 0x01b001b1b0000001 +.k2: + .dword 0xb100010100000001 +.k3: + .dword 0x6b70000000000001 +.k4: +.k5: + .dword 0xf500000000000001 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x01b0000000000000 + .dword 0x0360000000000000, 0x02d0000000000000 + .dword 0x06c0000000000000, 0x0770000000000000 + .dword 0x05a0000000000000, 0x0410000000000000 + .dword 0x0d80000000000000, 0x0c30000000000000 + .dword 0x0ee0000000000000, 0x0f50000000000000 + .dword 0x0b40000000000000, 0x0af0000000000000 + .dword 0x0820000000000000, 0x0990000000000000 + .dword 0x1b00000000000000, 0x1ab0000000000000 + .dword 0x1860000000000000, 0x19d0000000000000 + .dword 0x1dc0000000000000, 0x1c70000000000000 + .dword 0x1ea0000000000000, 0x1f10000000000000 + .dword 0x1680000000000000, 0x1730000000000000 + .dword 0x15e0000000000000, 0x1450000000000000 + .dword 0x1040000000000000, 0x11f0000000000000 + .dword 0x1320000000000000, 0x1290000000000000 + .dword 0x3600000000000000, 0x37b0000000000000 + .dword 0x3560000000000000, 0x34d0000000000000 + .dword 0x30c0000000000000, 0x3170000000000000 + .dword 0x33a0000000000000, 0x3210000000000000 + .dword 0x3b80000000000000, 0x3a30000000000000 + .dword 0x38e0000000000000, 0x3950000000000000 + .dword 0x3d40000000000000, 0x3cf0000000000000 + .dword 0x3e20000000000000, 0x3f90000000000000 + .dword 0x2d00000000000000, 0x2cb0000000000000 + .dword 0x2e60000000000000, 0x2fd0000000000000 + .dword 0x2bc0000000000000, 0x2a70000000000000 + .dword 0x28a0000000000000, 0x2910000000000000 + .dword 0x2080000000000000, 0x2130000000000000 + .dword 0x23e0000000000000, 0x2250000000000000 + .dword 0x2640000000000000, 0x27f0000000000000 + .dword 0x2520000000000000, 0x2490000000000000 + .dword 0x6c00000000000000, 0x6db0000000000000 + .dword 0x6f60000000000000, 0x6ed0000000000000 + .dword 0x6ac0000000000000, 0x6b70000000000000 + .dword 0x69a0000000000000, 0x6810000000000000 + .dword 0x6180000000000000, 0x6030000000000000 + .dword 0x62e0000000000000, 0x6350000000000000 + .dword 0x6740000000000000, 0x66f0000000000000 + .dword 0x6420000000000000, 0x6590000000000000 + .dword 0x7700000000000000, 0x76b0000000000000 + .dword 0x7460000000000000, 0x75d0000000000000 + .dword 0x71c0000000000000, 0x7070000000000000 + .dword 0x72a0000000000000, 0x7310000000000000 + .dword 0x7a80000000000000, 0x7b30000000000000 + .dword 0x79e0000000000000, 0x7850000000000000 + .dword 0x7c40000000000000, 0x7df0000000000000 + .dword 0x7f20000000000000, 0x7e90000000000000 + .dword 0x5a00000000000000, 0x5bb0000000000000 + .dword 0x5960000000000000, 0x58d0000000000000 + .dword 0x5cc0000000000000, 0x5d70000000000000 + .dword 0x5fa0000000000000, 0x5e10000000000000 + .dword 0x5780000000000000, 0x5630000000000000 + .dword 0x54e0000000000000, 0x5550000000000000 + .dword 0x5140000000000000, 0x50f0000000000000 + .dword 0x5220000000000000, 0x5390000000000000 + .dword 0x4100000000000000, 0x40b0000000000000 + .dword 0x4260000000000000, 0x43d0000000000000 + .dword 0x47c0000000000000, 0x4670000000000000 + .dword 0x44a0000000000000, 0x4510000000000000 + .dword 0x4c80000000000000, 0x4d30000000000000 + .dword 0x4fe0000000000000, 0x4e50000000000000 + .dword 0x4a40000000000000, 0x4bf0000000000000 + .dword 0x4920000000000000, 0x4890000000000000 + .dword 0xd800000000000000, 0xd9b0000000000000 + .dword 0xdb60000000000000, 0xdad0000000000000 + .dword 0xdec0000000000000, 0xdf70000000000000 + .dword 0xdda0000000000000, 0xdc10000000000000 + .dword 0xd580000000000000, 0xd430000000000000 + .dword 0xd6e0000000000000, 0xd750000000000000 + .dword 0xd340000000000000, 0xd2f0000000000000 + .dword 0xd020000000000000, 0xd190000000000000 + .dword 0xc300000000000000, 0xc2b0000000000000 + .dword 0xc060000000000000, 0xc1d0000000000000 + .dword 0xc5c0000000000000, 0xc470000000000000 + .dword 0xc6a0000000000000, 0xc710000000000000 + .dword 0xce80000000000000, 0xcf30000000000000 + .dword 0xcde0000000000000, 0xcc50000000000000 + .dword 0xc840000000000000, 0xc9f0000000000000 + .dword 0xcb20000000000000, 0xca90000000000000 + .dword 0xee00000000000000, 0xefb0000000000000 + .dword 0xed60000000000000, 0xecd0000000000000 + .dword 0xe8c0000000000000, 0xe970000000000000 + .dword 0xeba0000000000000, 0xea10000000000000 + .dword 0xe380000000000000, 0xe230000000000000 + .dword 0xe0e0000000000000, 0xe150000000000000 + .dword 0xe540000000000000, 0xe4f0000000000000 + .dword 0xe620000000000000, 0xe790000000000000 + .dword 0xf500000000000000, 0xf4b0000000000000 + .dword 0xf660000000000000, 0xf7d0000000000000 + .dword 0xf3c0000000000000, 0xf270000000000000 + .dword 0xf0a0000000000000, 0xf110000000000000 + .dword 0xf880000000000000, 0xf930000000000000 + .dword 0xfbe0000000000000, 0xfa50000000000000 + .dword 0xfe40000000000000, 0xfff0000000000000 + .dword 0xfd20000000000000, 0xfc90000000000000 + .dword 0xb400000000000000, 0xb5b0000000000000 + .dword 0xb760000000000000, 0xb6d0000000000000 + .dword 0xb2c0000000000000, 0xb370000000000000 + .dword 0xb1a0000000000000, 0xb010000000000000 + .dword 0xb980000000000000, 0xb830000000000000 + .dword 0xbae0000000000000, 0xbb50000000000000 + .dword 0xbf40000000000000, 0xbef0000000000000 + .dword 0xbc20000000000000, 0xbd90000000000000 + .dword 0xaf00000000000000, 0xaeb0000000000000 + .dword 0xac60000000000000, 0xadd0000000000000 + .dword 0xa9c0000000000000, 0xa870000000000000 + .dword 0xaaa0000000000000, 0xab10000000000000 + .dword 0xa280000000000000, 0xa330000000000000 + .dword 0xa1e0000000000000, 0xa050000000000000 + .dword 0xa440000000000000, 0xa5f0000000000000 + .dword 0xa720000000000000, 0xa690000000000000 + .dword 0x8200000000000000, 0x83b0000000000000 + .dword 0x8160000000000000, 0x80d0000000000000 + .dword 0x84c0000000000000, 0x8570000000000000 + .dword 0x87a0000000000000, 0x8610000000000000 + .dword 0x8f80000000000000, 0x8e30000000000000 + .dword 0x8ce0000000000000, 0x8d50000000000000 + .dword 0x8940000000000000, 0x88f0000000000000 + .dword 0x8a20000000000000, 0x8b90000000000000 + .dword 0x9900000000000000, 0x98b0000000000000 + .dword 0x9a60000000000000, 0x9bd0000000000000 + .dword 0x9fc0000000000000, 0x9e70000000000000 + .dword 0x9ca0000000000000, 0x9d10000000000000 + .dword 0x9480000000000000, 0x9530000000000000 + .dword 0x97e0000000000000, 0x9650000000000000 + .dword 0x9240000000000000, 0x93f0000000000000 + .dword 0x9120000000000000, 0x9090000000000000 diff --git a/crc/riscv64/crc64_jones_norm.S b/crc/riscv64/crc64_jones_norm.S new file mode 100644 index 00000000..7bc81942 --- /dev/null +++ b/crc/riscv64/crc64_jones_norm.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_norm_common_clmul.h" + +/* uint64_t crc64_jones_norm(uint64_t init_crc, uint8_t *buf, uint64_t len) */ +crc64_func_norm crc64_jones_norm + +/* precomputed folding constants */ +.poly: + .dword 0xad93d23594c935a9 /* excludes leading 1 */ +.mu: + .dword 0xddf3eeb298be6cf8 /* excludes leading 1 */ +.k1: + .dword 0x13c961588f27f643 +.k2: + .dword 0x4e501e58ca43d25e +.k3: + .dword 0x698b74157cfbd736 +.k4: +.k5: + .dword 0x4445ed2750017038 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0xad93d23594c935a9 + .dword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52 + .dword 0x40fb3e88ee7f885f, 0xed68ecbd7ab6bdf6 + .dword 0xb64f48d65324d6a4, 0x1bdc9ae3c7ede30d + .dword 0x81f67d11dcff10be, 0x2c65af2448362517 + .dword 0x77420b4f61a44e45, 0xdad1d97af56d7bec + .dword 0xc10d4399328098e1, 0x6c9e91aca649ad48 + .dword 0x37b935c78fdbc61a, 0x9a2ae7f21b12f3b3 + .dword 0xae7f28162d3714d5, 0x03ecfa23b9fe217c + .dword 0x58cb5e48906c4a2e, 0xf5588c7d04a57f87 + .dword 0xee84169ec3489c8a, 0x4317c4ab5781a923 + .dword 0x183060c07e13c271, 0xb5a3b2f5eadaf7d8 + .dword 0x2f895507f1c8046b, 0x821a8732650131c2 + .dword 0xd93d23594c935a90, 0x74aef16cd85a6f39 + .dword 0x6f726b8f1fb78c34, 0xc2e1b9ba8b7eb99d + .dword 0x99c61dd1a2ecd2cf, 0x3455cfe43625e766 + .dword 0xf16d8219cea71c03, 0x5cfe502c5a6e29aa + .dword 0x07d9f44773fc42f8, 0xaa4a2672e7357751 + .dword 0xb196bc9120d8945c, 0x1c056ea4b411a1f5 + .dword 0x4722cacf9d83caa7, 0xeab118fa094aff0e + .dword 0x709bff0812580cbd, 0xdd082d3d86913914 + .dword 0x862f8956af035246, 0x2bbc5b633bca67ef + .dword 0x3060c180fc2784e2, 0x9df313b568eeb14b + .dword 0xc6d4b7de417cda19, 0x6b4765ebd5b5efb0 + .dword 0x5f12aa0fe39008d6, 0xf281783a77593d7f + .dword 0xa9a6dc515ecb562d, 0x04350e64ca026384 + .dword 0x1fe994870def8089, 0xb27a46b29926b520 + .dword 0xe95de2d9b0b4de72, 0x44ce30ec247debdb + .dword 0xdee4d71e3f6f1868, 0x7377052baba62dc1 + .dword 0x2850a14082344693, 0x85c3737516fd733a + .dword 0x9e1fe996d1109037, 0x338c3ba345d9a59e + .dword 0x68ab9fc86c4bcecc, 0xc5384dfdf882fb65 + .dword 0x4f48d60609870daf, 0xe2db04339d4e3806 + .dword 0xb9fca058b4dc5354, 0x146f726d201566fd + .dword 0x0fb3e88ee7f885f0, 0xa2203abb7331b059 + .dword 0xf9079ed05aa3db0b, 0x54944ce5ce6aeea2 + .dword 0xcebeab17d5781d11, 0x632d792241b128b8 + .dword 0x380add49682343ea, 0x95990f7cfcea7643 + .dword 0x8e45959f3b07954e, 0x23d647aaafcea0e7 + .dword 0x78f1e3c1865ccbb5, 0xd56231f41295fe1c + .dword 0xe137fe1024b0197a, 0x4ca42c25b0792cd3 + .dword 0x1783884e99eb4781, 0xba105a7b0d227228 + .dword 0xa1ccc098cacf9125, 0x0c5f12ad5e06a48c + .dword 0x5778b6c67794cfde, 0xfaeb64f3e35dfa77 + .dword 0x60c18301f84f09c4, 0xcd5251346c863c6d + .dword 0x9675f55f4514573f, 0x3be6276ad1dd6296 + .dword 0x203abd891630819b, 0x8da96fbc82f9b432 + .dword 0xd68ecbd7ab6bdf60, 0x7b1d19e23fa2eac9 + .dword 0xbe25541fc72011ac, 0x13b6862a53e92405 + .dword 0x489122417a7b4f57, 0xe502f074eeb27afe + .dword 0xfede6a97295f99f3, 0x534db8a2bd96ac5a + .dword 0x086a1cc99404c708, 0xa5f9cefc00cdf2a1 + .dword 0x3fd3290e1bdf0112, 0x9240fb3b8f1634bb + .dword 0xc9675f50a6845fe9, 0x64f48d65324d6a40 + .dword 0x7f281786f5a0894d, 0xd2bbc5b36169bce4 + .dword 0x899c61d848fbd7b6, 0x240fb3eddc32e21f + .dword 0x105a7c09ea170579, 0xbdc9ae3c7ede30d0 + .dword 0xe6ee0a57574c5b82, 0x4b7dd862c3856e2b + .dword 0x50a1428104688d26, 0xfd3290b490a1b88f + .dword 0xa61534dfb933d3dd, 0x0b86e6ea2dfae674 + .dword 0x91ac011836e815c7, 0x3c3fd32da221206e + .dword 0x671877468bb34b3c, 0xca8ba5731f7a7e95 + .dword 0xd1573f90d8979d98, 0x7cc4eda54c5ea831 + .dword 0x27e349ce65ccc363, 0x8a709bfbf105f6ca + .dword 0x9e91ac0c130e1b5e, 0x33027e3987c72ef7 + .dword 0x6825da52ae5545a5, 0xc5b608673a9c700c + .dword 0xde6a9284fd719301, 0x73f940b169b8a6a8 + .dword 0x28dee4da402acdfa, 0x854d36efd4e3f853 + .dword 0x1f67d11dcff10be0, 0xb2f403285b383e49 + .dword 0xe9d3a74372aa551b, 0x44407576e66360b2 + .dword 0x5f9cef95218e83bf, 0xf20f3da0b547b616 + .dword 0xa92899cb9cd5dd44, 0x04bb4bfe081ce8ed + .dword 0x30ee841a3e390f8b, 0x9d7d562faaf03a22 + .dword 0xc65af24483625170, 0x6bc9207117ab64d9 + .dword 0x7015ba92d04687d4, 0xdd8668a7448fb27d + .dword 0x86a1cccc6d1dd92f, 0x2b321ef9f9d4ec86 + .dword 0xb118f90be2c61f35, 0x1c8b2b3e760f2a9c + .dword 0x47ac8f555f9d41ce, 0xea3f5d60cb547467 + .dword 0xf1e3c7830cb9976a, 0x5c7015b69870a2c3 + .dword 0x0757b1ddb1e2c991, 0xaac463e8252bfc38 + .dword 0x6ffc2e15dda9075d, 0xc26ffc20496032f4 + .dword 0x9948584b60f259a6, 0x34db8a7ef43b6c0f + .dword 0x2f07109d33d68f02, 0x8294c2a8a71fbaab + .dword 0xd9b366c38e8dd1f9, 0x7420b4f61a44e450 + .dword 0xee0a5304015617e3, 0x43998131959f224a + .dword 0x18be255abc0d4918, 0xb52df76f28c47cb1 + .dword 0xaef16d8cef299fbc, 0x0362bfb97be0aa15 + .dword 0x58451bd25272c147, 0xf5d6c9e7c6bbf4ee + .dword 0xc1830603f09e1388, 0x6c10d43664572621 + .dword 0x3737705d4dc54d73, 0x9aa4a268d90c78da + .dword 0x8178388b1ee19bd7, 0x2cebeabe8a28ae7e + .dword 0x77cc4ed5a3bac52c, 0xda5f9ce03773f085 + .dword 0x40757b122c610336, 0xede6a927b8a8369f + .dword 0xb6c10d4c913a5dcd, 0x1b52df7905f36864 + .dword 0x008e459ac21e8b69, 0xad1d97af56d7bec0 + .dword 0xf63a33c47f45d592, 0x5ba9e1f1eb8ce03b + .dword 0xd1d97a0a1a8916f1, 0x7c4aa83f8e402358 + .dword 0x276d0c54a7d2480a, 0x8afede61331b7da3 + .dword 0x91224482f4f69eae, 0x3cb196b7603fab07 + .dword 0x679632dc49adc055, 0xca05e0e9dd64f5fc + .dword 0x502f071bc676064f, 0xfdbcd52e52bf33e6 + .dword 0xa69b71457b2d58b4, 0x0b08a370efe46d1d + .dword 0x10d4399328098e10, 0xbd47eba6bcc0bbb9 + .dword 0xe6604fcd9552d0eb, 0x4bf39df8019be542 + .dword 0x7fa6521c37be0224, 0xd2358029a377378d + .dword 0x891224428ae55cdf, 0x2481f6771e2c6976 + .dword 0x3f5d6c94d9c18a7b, 0x92cebea14d08bfd2 + .dword 0xc9e91aca649ad480, 0x647ac8fff053e129 + .dword 0xfe502f0deb41129a, 0x53c3fd387f882733 + .dword 0x08e45953561a4c61, 0xa5778b66c2d379c8 + .dword 0xbeab1185053e9ac5, 0x1338c3b091f7af6c + .dword 0x481f67dbb865c43e, 0xe58cb5ee2cacf197 + .dword 0x20b4f813d42e0af2, 0x8d272a2640e73f5b + .dword 0xd6008e4d69755409, 0x7b935c78fdbc61a0 + .dword 0x604fc69b3a5182ad, 0xcddc14aeae98b704 + .dword 0x96fbb0c5870adc56, 0x3b6862f013c3e9ff + .dword 0xa142850208d11a4c, 0x0cd157379c182fe5 + .dword 0x57f6f35cb58a44b7, 0xfa6521692143711e + .dword 0xe1b9bb8ae6ae9213, 0x4c2a69bf7267a7ba + .dword 0x170dcdd45bf5cce8, 0xba9e1fe1cf3cf941 + .dword 0x8ecbd005f9191e27, 0x235802306dd02b8e + .dword 0x787fa65b444240dc, 0xd5ec746ed08b7575 + .dword 0xce30ee8d17669678, 0x63a33cb883afa3d1 + .dword 0x388498d3aa3dc883, 0x95174ae63ef4fd2a + .dword 0x0f3dad1425e60e99, 0xa2ae7f21b12f3b30 + .dword 0xf989db4a98bd5062, 0x541a097f0c7465cb + .dword 0x4fc6939ccb9986c6, 0xe25541a95f50b36f + .dword 0xb972e5c276c2d83d, 0x14e137f7e20bed94 diff --git a/crc/riscv64/crc64_jones_refl.S b/crc/riscv64/crc64_jones_refl.S new file mode 100644 index 00000000..73ad9546 --- /dev/null +++ b/crc/riscv64/crc64_jones_refl.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_refl_common_clmul.h" + +/* uint64_t crc64_jones_refl(uint64_t init_crc, uint8_t *buf, unt64_t len) */ +crc64_func_refl crc64_jones_refl + +/* precomputed folding constants */ +.poly: + .dword 0x2b5926535897936b /* poly reflected, excluding leading 1 */ +.mu: + .dword 0x3e6cfa329aef9f77 +.k1: + .dword 0xaf86efb16d9ab4fb +.k2: + .dword 0xf49784a634f014e4 +.k3: + .dword 0xd9d7be7d505da32c +.k4: +.k5: + .dword 0x381d0015c96f4444 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x7ad870c830358979 + .dword 0xf5b0e190606b12f2, 0x8f689158505e9b8b + .dword 0xc038e5739841b68f, 0xbae095bba8743ff6 + .dword 0x358804e3f82aa47d, 0x4f50742bc81f2d04 + .dword 0xab28ecb46814fe75, 0xd1f09c7c5821770c + .dword 0x5e980d24087fec87, 0x24407dec384a65fe + .dword 0x6b1009c7f05548fa, 0x11c8790fc060c183 + .dword 0x9ea0e857903e5a08, 0xe478989fa00bd371 + .dword 0x7d08ff3b88be6f81, 0x07d08ff3b88be6f8 + .dword 0x88b81eabe8d57d73, 0xf2606e63d8e0f40a + .dword 0xbd301a4810ffd90e, 0xc7e86a8020ca5077 + .dword 0x4880fbd87094cbfc, 0x32588b1040a14285 + .dword 0xd620138fe0aa91f4, 0xacf86347d09f188d + .dword 0x2390f21f80c18306, 0x594882d7b0f40a7f + .dword 0x1618f6fc78eb277b, 0x6cc0863448deae02 + .dword 0xe3a8176c18803589, 0x997067a428b5bcf0 + .dword 0xfa11fe77117cdf02, 0x80c98ebf2149567b + .dword 0x0fa11fe77117cdf0, 0x75796f2f41224489 + .dword 0x3a291b04893d698d, 0x40f16bccb908e0f4 + .dword 0xcf99fa94e9567b7f, 0xb5418a5cd963f206 + .dword 0x513912c379682177, 0x2be1620b495da80e + .dword 0xa489f35319033385, 0xde51839b2936bafc + .dword 0x9101f7b0e12997f8, 0xebd98778d11c1e81 + .dword 0x64b116208142850a, 0x1e6966e8b1770c73 + .dword 0x8719014c99c2b083, 0xfdc17184a9f739fa + .dword 0x72a9e0dcf9a9a271, 0x08719014c99c2b08 + .dword 0x4721e43f0183060c, 0x3df994f731b68f75 + .dword 0xb29105af61e814fe, 0xc849756751dd9d87 + .dword 0x2c31edf8f1d64ef6, 0x56e99d30c1e3c78f + .dword 0xd9810c6891bd5c04, 0xa3597ca0a188d57d + .dword 0xec09088b6997f879, 0x96d1784359a27100 + .dword 0x19b9e91b09fcea8b, 0x636199d339c963f2 + .dword 0xdf7adabd7a6e2d6f, 0xa5a2aa754a5ba416 + .dword 0x2aca3b2d1a053f9d, 0x50124be52a30b6e4 + .dword 0x1f423fcee22f9be0, 0x659a4f06d21a1299 + .dword 0xeaf2de5e82448912, 0x902aae96b271006b + .dword 0x74523609127ad31a, 0x0e8a46c1224f5a63 + .dword 0x81e2d7997211c1e8, 0xfb3aa75142244891 + .dword 0xb46ad37a8a3b6595, 0xceb2a3b2ba0eecec + .dword 0x41da32eaea507767, 0x3b024222da65fe1e + .dword 0xa2722586f2d042ee, 0xd8aa554ec2e5cb97 + .dword 0x57c2c41692bb501c, 0x2d1ab4dea28ed965 + .dword 0x624ac0f56a91f461, 0x1892b03d5aa47d18 + .dword 0x97fa21650afae693, 0xed2251ad3acf6fea + .dword 0x095ac9329ac4bc9b, 0x7382b9faaaf135e2 + .dword 0xfcea28a2faafae69, 0x8632586aca9a2710 + .dword 0xc9622c4102850a14, 0xb3ba5c8932b0836d + .dword 0x3cd2cdd162ee18e6, 0x460abd1952db919f + .dword 0x256b24ca6b12f26d, 0x5fb354025b277b14 + .dword 0xd0dbc55a0b79e09f, 0xaa03b5923b4c69e6 + .dword 0xe553c1b9f35344e2, 0x9f8bb171c366cd9b + .dword 0x10e3202993385610, 0x6a3b50e1a30ddf69 + .dword 0x8e43c87e03060c18, 0xf49bb8b633338561 + .dword 0x7bf329ee636d1eea, 0x012b592653589793 + .dword 0x4e7b2d0d9b47ba97, 0x34a35dc5ab7233ee + .dword 0xbbcbcc9dfb2ca865, 0xc113bc55cb19211c + .dword 0x5863dbf1e3ac9dec, 0x22bbab39d3991495 + .dword 0xadd33a6183c78f1e, 0xd70b4aa9b3f20667 + .dword 0x985b3e827bed2b63, 0xe2834e4a4bd8a21a + .dword 0x6debdf121b863991, 0x1733afda2bb3b0e8 + .dword 0xf34b37458bb86399, 0x8993478dbb8deae0 + .dword 0x06fbd6d5ebd3716b, 0x7c23a61ddbe6f812 + .dword 0x3373d23613f9d516, 0x49aba2fe23cc5c6f + .dword 0xc6c333a67392c7e4, 0xbc1b436e43a74e9d + .dword 0x95ac9329ac4bc9b5, 0xef74e3e19c7e40cc + .dword 0x601c72b9cc20db47, 0x1ac40271fc15523e + .dword 0x5594765a340a7f3a, 0x2f4c0692043ff643 + .dword 0xa02497ca54616dc8, 0xdafce7026454e4b1 + .dword 0x3e847f9dc45f37c0, 0x445c0f55f46abeb9 + .dword 0xcb349e0da4342532, 0xb1eceec59401ac4b + .dword 0xfebc9aee5c1e814f, 0x8464ea266c2b0836 + .dword 0x0b0c7b7e3c7593bd, 0x71d40bb60c401ac4 + .dword 0xe8a46c1224f5a634, 0x927c1cda14c02f4d + .dword 0x1d148d82449eb4c6, 0x67ccfd4a74ab3dbf + .dword 0x289c8961bcb410bb, 0x5244f9a98c8199c2 + .dword 0xdd2c68f1dcdf0249, 0xa7f41839ecea8b30 + .dword 0x438c80a64ce15841, 0x3954f06e7cd4d138 + .dword 0xb63c61362c8a4ab3, 0xcce411fe1cbfc3ca + .dword 0x83b465d5d4a0eece, 0xf96c151de49567b7 + .dword 0x76048445b4cbfc3c, 0x0cdcf48d84fe7545 + .dword 0x6fbd6d5ebd3716b7, 0x15651d968d029fce + .dword 0x9a0d8ccedd5c0445, 0xe0d5fc06ed698d3c + .dword 0xaf85882d2576a038, 0xd55df8e515432941 + .dword 0x5a3569bd451db2ca, 0x20ed197575283bb3 + .dword 0xc49581ead523e8c2, 0xbe4df122e51661bb + .dword 0x3125607ab548fa30, 0x4bfd10b2857d7349 + .dword 0x04ad64994d625e4d, 0x7e7514517d57d734 + .dword 0xf11d85092d094cbf, 0x8bc5f5c11d3cc5c6 + .dword 0x12b5926535897936, 0x686de2ad05bcf04f + .dword 0xe70573f555e26bc4, 0x9ddd033d65d7e2bd + .dword 0xd28d7716adc8cfb9, 0xa85507de9dfd46c0 + .dword 0x273d9686cda3dd4b, 0x5de5e64efd965432 + .dword 0xb99d7ed15d9d8743, 0xc3450e196da80e3a + .dword 0x4c2d9f413df695b1, 0x36f5ef890dc31cc8 + .dword 0x79a59ba2c5dc31cc, 0x037deb6af5e9b8b5 + .dword 0x8c157a32a5b7233e, 0xf6cd0afa9582aa47 + .dword 0x4ad64994d625e4da, 0x300e395ce6106da3 + .dword 0xbf66a804b64ef628, 0xc5bed8cc867b7f51 + .dword 0x8aeeace74e645255, 0xf036dc2f7e51db2c + .dword 0x7f5e4d772e0f40a7, 0x05863dbf1e3ac9de + .dword 0xe1fea520be311aaf, 0x9b26d5e88e0493d6 + .dword 0x144e44b0de5a085d, 0x6e963478ee6f8124 + .dword 0x21c640532670ac20, 0x5b1e309b16452559 + .dword 0xd476a1c3461bbed2, 0xaeaed10b762e37ab + .dword 0x37deb6af5e9b8b5b, 0x4d06c6676eae0222 + .dword 0xc26e573f3ef099a9, 0xb8b627f70ec510d0 + .dword 0xf7e653dcc6da3dd4, 0x8d3e2314f6efb4ad + .dword 0x0256b24ca6b12f26, 0x788ec2849684a65f + .dword 0x9cf65a1b368f752e, 0xe62e2ad306bafc57 + .dword 0x6946bb8b56e467dc, 0x139ecb4366d1eea5 + .dword 0x5ccebf68aecec3a1, 0x2616cfa09efb4ad8 + .dword 0xa97e5ef8cea5d153, 0xd3a62e30fe90582a + .dword 0xb0c7b7e3c7593bd8, 0xca1fc72bf76cb2a1 + .dword 0x45775673a732292a, 0x3faf26bb9707a053 + .dword 0x70ff52905f188d57, 0x0a2722586f2d042e + .dword 0x854fb3003f739fa5, 0xff97c3c80f4616dc + .dword 0x1bef5b57af4dc5ad, 0x61372b9f9f784cd4 + .dword 0xee5fbac7cf26d75f, 0x9487ca0fff135e26 + .dword 0xdbd7be24370c7322, 0xa10fceec0739fa5b + .dword 0x2e675fb4576761d0, 0x54bf2f7c6752e8a9 + .dword 0xcdcf48d84fe75459, 0xb71738107fd2dd20 + .dword 0x387fa9482f8c46ab, 0x42a7d9801fb9cfd2 + .dword 0x0df7adabd7a6e2d6, 0x772fdd63e7936baf + .dword 0xf8474c3bb7cdf024, 0x829f3cf387f8795d + .dword 0x66e7a46c27f3aa2c, 0x1c3fd4a417c62355 + .dword 0x935745fc4798b8de, 0xe98f353477ad31a7 + .dword 0xa6df411fbfb21ca3, 0xdc0731d78f8795da + .dword 0x536fa08fdfd90e51, 0x29b7d047efec8728 diff --git a/crc/riscv64/crc64_norm_common_clmul.h b/crc/riscv64/crc64_norm_common_clmul.h new file mode 100644 index 00000000..30533efd --- /dev/null +++ b/crc/riscv64/crc64_norm_common_clmul.h @@ -0,0 +1,104 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc_fold_common_clmul.h" + +/* folding final reduction */ +/* expects 128-bit value in HIGH:LOW (t0:t1), puts return value in SEED (a0) */ +.macro crc64_norm_fold_reduction + /* precomputed constants */ + ld K5, .k5 + + clmulh t2, K5, HIGH + xor t2, t2, LOW + clmul LOW, K5, HIGH + + /* as the mu and poly constants are 65-bits long, stored missing their + * leading 1, multiplication requires a clmul(h) and xor operation + */ + clmulh t3, MU, t2 + xor t3, t3, t2 + clmul t2, POLY, t3 + xor SEED, t2, LOW +.fold_1_done: +.endm + +/* calculate crc64 of a misaligned buffer using a table */ +/* \len is the register holding how many bytes to read */ +/* expects SEED (a0) and BUF (a1) to hold corresponding values */ +/* updates values of SEED and BUF */ +/* trashes t0, t1, t2 and t3 */ +.macro crc64_norm_table len:req + beqz \len, .table_done_\@ + add t1, BUF, \len + la t0, .crc64_table +.table_loop_\@: + lbu t2, (BUF) + srli t3, SEED, 56 + addi BUF, BUF, 1 + xor t2, t2, t3 + slli t2, t2, 3 + add t2, t2, t0 + ld t3, (t2) + slli SEED, SEED, 8 + xor SEED, SEED, t3 + bne BUF, t1, .table_loop_\@ +.table_done_\@: +.endm + +/* define a function to calculate a crc64 norm hash */ +.macro crc64_func_norm name:req +.text +.align 1 +.global \name +.type \name\(), %function +\name\(): + /* load precomputed constants */ + ld POLY, .poly + ld MU, .mu + + /* invert seed */ + not SEED, SEED + + /* align and fold buffer to 64-bits */ + and t4, BUF, 0b111 + bltu LEN, t4, .excess + crc64_norm_table t4 + sub LEN, LEN, t4 + + crc_fold_loop 64 1 0 + crc64_norm_fold_reduction + +.excess: + crc64_norm_table LEN + + /* invert result */ + not SEED, SEED + ret +.endm diff --git a/crc/riscv64/crc64_refl_common_clmul.h b/crc/riscv64/crc64_refl_common_clmul.h new file mode 100644 index 00000000..af45d2cb --- /dev/null +++ b/crc/riscv64/crc64_refl_common_clmul.h @@ -0,0 +1,104 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc_fold_common_clmul.h" + +/* folding final reduction */ +/* expects 128-bit value in HIGH:LOW (t0:t1), puts return value in SEED (a0) */ +.macro crc64_refl_fold_reduction + /* precomputed constants */ + ld K5, .k5 + + clmulh t2, K5, HIGH + clmul HIGH, K5, HIGH + xor LOW, HIGH, LOW + + clmul LOW, MU, LOW + /* as poly constant is 65-bits long missing the leading 1, + * multiplication requires a clmul(h) and xor + */ + clmulh HIGH, POLY, LOW + xor LOW, HIGH, LOW + xor SEED, LOW, t2 +.fold_1_done: +.endm + +/* calculate crc64 of a misaligned buffer using a table */ +/* \len is the register holding how many bytes to read */ +/* expects SEED (a0) and BUF (a1) to hold corresponding values */ +/* updates values of SEED and BUF */ +/* trashes t0, t1, t2 and t3 */ +.macro crc64_refl_table len:req + beqz \len, .table_done_\@ + add t1, BUF, \len + la t0, .crc64_table +.table_loop_\@: + lbu t2, (BUF) + andi t3, SEED, 0xff + addi BUF, BUF, 1 + xor t2, t2, t3 + slli t2, t2, 3 + add t2, t2, t0 + ld t3, (t2) + srli SEED, SEED, 8 + xor SEED, SEED, t3 + bne BUF, t1, .table_loop_\@ +.table_done_\@: +.endm + +/* define a function to calculate a crc64 refl hash */ +.macro crc64_func_refl name:req +.text +.align 1 +.global \name +.type \name\(), %function +\name\(): + /* load precomputed constants */ + ld POLY, .poly + ld MU, .mu + + /* invert seed */ + not SEED, SEED + + /* align and fold buffer to 64-bits */ + and t4, BUF, 0b111 + bltu LEN, t4, .excess + crc64_refl_table t4 + sub LEN, LEN, t4 + + crc_fold_loop 64 0 1 + crc64_refl_fold_reduction + +.excess: + crc64_refl_table LEN + + /* invert result */ + not SEED, SEED + ret +.endm diff --git a/crc/riscv64/crc64_rocksoft_norm.S b/crc/riscv64/crc64_rocksoft_norm.S new file mode 100644 index 00000000..003fd023 --- /dev/null +++ b/crc/riscv64/crc64_rocksoft_norm.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_norm_common_clmul.h" + +/* uint64_t crc64_rocksoft_norm(uint64_t init_crc, uint8_t *buf, uint64_t len) */ +crc64_func_norm crc64_rocksoft_norm + +/* precomputed folding constants */ +.poly: + .dword 0xad93d23594c93659 /* excludes leading 1 */ +.mu: + .dword 0xddf3eeb298be6fc8 /* excludes leading 1 */ +.k1: + .dword 0xa42a30f19b669860 +.k2: + .dword 0xb4414e6a0488488c +.k3: + .dword 0x08578ba97f0476ae +.k4: +.k5: + .dword 0x6b08c948f0dd2f08 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0xad93d23594c93659 + .dword 0xf6b4765ebd5b5aeb, 0x5b27a46b29926cb2 + .dword 0x40fb3e88ee7f838f, 0xed68ecbd7ab6b5d6 + .dword 0xb64f48d65324d964, 0x1bdc9ae3c7edef3d + .dword 0x81f67d11dcff071e, 0x2c65af2448363147 + .dword 0x77420b4f61a45df5, 0xdad1d97af56d6bac + .dword 0xc10d439932808491, 0x6c9e91aca649b2c8 + .dword 0x37b935c78fdbde7a, 0x9a2ae7f21b12e823 + .dword 0xae7f28162d373865, 0x03ecfa23b9fe0e3c + .dword 0x58cb5e48906c628e, 0xf5588c7d04a554d7 + .dword 0xee84169ec348bbea, 0x4317c4ab57818db3 + .dword 0x183060c07e13e101, 0xb5a3b2f5eadad758 + .dword 0x2f895507f1c83f7b, 0x821a873265010922 + .dword 0xd93d23594c936590, 0x74aef16cd85a53c9 + .dword 0x6f726b8f1fb7bcf4, 0xc2e1b9ba8b7e8aad + .dword 0x99c61dd1a2ece61f, 0x3455cfe43625d046 + .dword 0xf16d8219cea74693, 0x5cfe502c5a6e70ca + .dword 0x07d9f44773fc1c78, 0xaa4a2672e7352a21 + .dword 0xb196bc9120d8c51c, 0x1c056ea4b411f345 + .dword 0x4722cacf9d839ff7, 0xeab118fa094aa9ae + .dword 0x709bff081258418d, 0xdd082d3d869177d4 + .dword 0x862f8956af031b66, 0x2bbc5b633bca2d3f + .dword 0x3060c180fc27c202, 0x9df313b568eef45b + .dword 0xc6d4b7de417c98e9, 0x6b4765ebd5b5aeb0 + .dword 0x5f12aa0fe3907ef6, 0xf281783a775948af + .dword 0xa9a6dc515ecb241d, 0x04350e64ca021244 + .dword 0x1fe994870deffd79, 0xb27a46b29926cb20 + .dword 0xe95de2d9b0b4a792, 0x44ce30ec247d91cb + .dword 0xdee4d71e3f6f79e8, 0x7377052baba64fb1 + .dword 0x2850a14082342303, 0x85c3737516fd155a + .dword 0x9e1fe996d110fa67, 0x338c3ba345d9cc3e + .dword 0x68ab9fc86c4ba08c, 0xc5384dfdf88296d5 + .dword 0x4f48d6060987bb7f, 0xe2db04339d4e8d26 + .dword 0xb9fca058b4dce194, 0x146f726d2015d7cd + .dword 0x0fb3e88ee7f838f0, 0xa2203abb73310ea9 + .dword 0xf9079ed05aa3621b, 0x54944ce5ce6a5442 + .dword 0xcebeab17d578bc61, 0x632d792241b18a38 + .dword 0x380add496823e68a, 0x95990f7cfcead0d3 + .dword 0x8e45959f3b073fee, 0x23d647aaafce09b7 + .dword 0x78f1e3c1865c6505, 0xd56231f41295535c + .dword 0xe137fe1024b0831a, 0x4ca42c25b079b543 + .dword 0x1783884e99ebd9f1, 0xba105a7b0d22efa8 + .dword 0xa1ccc098cacf0095, 0x0c5f12ad5e0636cc + .dword 0x5778b6c677945a7e, 0xfaeb64f3e35d6c27 + .dword 0x60c18301f84f8404, 0xcd5251346c86b25d + .dword 0x9675f55f4514deef, 0x3be6276ad1dde8b6 + .dword 0x203abd891630078b, 0x8da96fbc82f931d2 + .dword 0xd68ecbd7ab6b5d60, 0x7b1d19e23fa26b39 + .dword 0xbe25541fc720fdec, 0x13b6862a53e9cbb5 + .dword 0x489122417a7ba707, 0xe502f074eeb2915e + .dword 0xfede6a97295f7e63, 0x534db8a2bd96483a + .dword 0x086a1cc994042488, 0xa5f9cefc00cd12d1 + .dword 0x3fd3290e1bdffaf2, 0x9240fb3b8f16ccab + .dword 0xc9675f50a684a019, 0x64f48d65324d9640 + .dword 0x7f281786f5a0797d, 0xd2bbc5b361694f24 + .dword 0x899c61d848fb2396, 0x240fb3eddc3215cf + .dword 0x105a7c09ea17c589, 0xbdc9ae3c7edef3d0 + .dword 0xe6ee0a57574c9f62, 0x4b7dd862c385a93b + .dword 0x50a1428104684606, 0xfd3290b490a1705f + .dword 0xa61534dfb9331ced, 0x0b86e6ea2dfa2ab4 + .dword 0x91ac011836e8c297, 0x3c3fd32da221f4ce + .dword 0x671877468bb3987c, 0xca8ba5731f7aae25 + .dword 0xd1573f90d8974118, 0x7cc4eda54c5e7741 + .dword 0x27e349ce65cc1bf3, 0x8a709bfbf1052daa + .dword 0x9e91ac0c130f76fe, 0x33027e3987c640a7 + .dword 0x6825da52ae542c15, 0xc5b608673a9d1a4c + .dword 0xde6a9284fd70f571, 0x73f940b169b9c328 + .dword 0x28dee4da402baf9a, 0x854d36efd4e299c3 + .dword 0x1f67d11dcff071e0, 0xb2f403285b3947b9 + .dword 0xe9d3a74372ab2b0b, 0x44407576e6621d52 + .dword 0x5f9cef95218ff26f, 0xf20f3da0b546c436 + .dword 0xa92899cb9cd4a884, 0x04bb4bfe081d9edd + .dword 0x30ee841a3e384e9b, 0x9d7d562faaf178c2 + .dword 0xc65af24483631470, 0x6bc9207117aa2229 + .dword 0x7015ba92d047cd14, 0xdd8668a7448efb4d + .dword 0x86a1cccc6d1c97ff, 0x2b321ef9f9d5a1a6 + .dword 0xb118f90be2c74985, 0x1c8b2b3e760e7fdc + .dword 0x47ac8f555f9c136e, 0xea3f5d60cb552537 + .dword 0xf1e3c7830cb8ca0a, 0x5c7015b69871fc53 + .dword 0x0757b1ddb1e390e1, 0xaac463e8252aa6b8 + .dword 0x6ffc2e15dda8306d, 0xc26ffc2049610634 + .dword 0x9948584b60f36a86, 0x34db8a7ef43a5cdf + .dword 0x2f07109d33d7b3e2, 0x8294c2a8a71e85bb + .dword 0xd9b366c38e8ce909, 0x7420b4f61a45df50 + .dword 0xee0a530401573773, 0x43998131959e012a + .dword 0x18be255abc0c6d98, 0xb52df76f28c55bc1 + .dword 0xaef16d8cef28b4fc, 0x0362bfb97be182a5 + .dword 0x58451bd25273ee17, 0xf5d6c9e7c6bad84e + .dword 0xc1830603f09f0808, 0x6c10d43664563e51 + .dword 0x3737705d4dc452e3, 0x9aa4a268d90d64ba + .dword 0x8178388b1ee08b87, 0x2cebeabe8a29bdde + .dword 0x77cc4ed5a3bbd16c, 0xda5f9ce03772e735 + .dword 0x40757b122c600f16, 0xede6a927b8a9394f + .dword 0xb6c10d4c913b55fd, 0x1b52df7905f263a4 + .dword 0x008e459ac21f8c99, 0xad1d97af56d6bac0 + .dword 0xf63a33c47f44d672, 0x5ba9e1f1eb8de02b + .dword 0xd1d97a0a1a88cd81, 0x7c4aa83f8e41fbd8 + .dword 0x276d0c54a7d3976a, 0x8afede61331aa133 + .dword 0x91224482f4f74e0e, 0x3cb196b7603e7857 + .dword 0x679632dc49ac14e5, 0xca05e0e9dd6522bc + .dword 0x502f071bc677ca9f, 0xfdbcd52e52befcc6 + .dword 0xa69b71457b2c9074, 0x0b08a370efe5a62d + .dword 0x10d4399328084910, 0xbd47eba6bcc17f49 + .dword 0xe6604fcd955313fb, 0x4bf39df8019a25a2 + .dword 0x7fa6521c37bff5e4, 0xd2358029a376c3bd + .dword 0x891224428ae4af0f, 0x2481f6771e2d9956 + .dword 0x3f5d6c94d9c0766b, 0x92cebea14d094032 + .dword 0xc9e91aca649b2c80, 0x647ac8fff0521ad9 + .dword 0xfe502f0deb40f2fa, 0x53c3fd387f89c4a3 + .dword 0x08e45953561ba811, 0xa5778b66c2d29e48 + .dword 0xbeab1185053f7175, 0x1338c3b091f6472c + .dword 0x481f67dbb8642b9e, 0xe58cb5ee2cad1dc7 + .dword 0x20b4f813d42f8b12, 0x8d272a2640e6bd4b + .dword 0xd6008e4d6974d1f9, 0x7b935c78fdbde7a0 + .dword 0x604fc69b3a50089d, 0xcddc14aeae993ec4 + .dword 0x96fbb0c5870b5276, 0x3b6862f013c2642f + .dword 0xa142850208d08c0c, 0x0cd157379c19ba55 + .dword 0x57f6f35cb58bd6e7, 0xfa6521692142e0be + .dword 0xe1b9bb8ae6af0f83, 0x4c2a69bf726639da + .dword 0x170dcdd45bf45568, 0xba9e1fe1cf3d6331 + .dword 0x8ecbd005f918b377, 0x235802306dd1852e + .dword 0x787fa65b4443e99c, 0xd5ec746ed08adfc5 + .dword 0xce30ee8d176730f8, 0x63a33cb883ae06a1 + .dword 0x388498d3aa3c6a13, 0x95174ae63ef55c4a + .dword 0x0f3dad1425e7b469, 0xa2ae7f21b12e8230 + .dword 0xf989db4a98bcee82, 0x541a097f0c75d8db + .dword 0x4fc6939ccb9837e6, 0xe25541a95f5101bf + .dword 0xb972e5c276c36d0d, 0x14e137f7e20a5b54 diff --git a/crc/riscv64/crc64_rocksoft_refl.S b/crc/riscv64/crc64_rocksoft_refl.S new file mode 100644 index 00000000..97e7cc9b --- /dev/null +++ b/crc/riscv64/crc64_rocksoft_refl.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_refl_common_clmul.h" + +/* uint64_t crc64_rocksoft_refl(uint64_t init_crc, uint8_t *buf, unt64_t len) */ +crc64_func_refl crc64_rocksoft_refl + +/* precomputed folding constants */ +.poly: + .dword 0x34d926535897936b /* poly reflected, excluding leading 1 */ +.mu: + .dword 0x27ecfa329aef9f77 +.k1: + .dword 0x0c32cdb31e18a84a +.k2: + .dword 0x62242240ace5045a +.k3: + .dword 0xeadc41fd2ba3d420 +.k4: +.k5: + .dword 0x21e9761e252621ac + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x7f6ef0c830358979 + .dword 0xfedde190606b12f2, 0x81b31158505e9b8b + .dword 0xc962e5739841b68f, 0xb60c15bba8743ff6 + .dword 0x37bf04e3f82aa47d, 0x48d1f42bc81f2d04 + .dword 0xa61cecb46814fe75, 0xd9721c7c5821770c + .dword 0x58c10d24087fec87, 0x27affdec384a65fe + .dword 0x6f7e09c7f05548fa, 0x1010f90fc060c183 + .dword 0x91a3e857903e5a08, 0xeecd189fa00bd371 + .dword 0x78e0ff3b88be6f81, 0x078e0ff3b88be6f8 + .dword 0x863d1eabe8d57d73, 0xf953ee63d8e0f40a + .dword 0xb1821a4810ffd90e, 0xceecea8020ca5077 + .dword 0x4f5ffbd87094cbfc, 0x30310b1040a14285 + .dword 0xdefc138fe0aa91f4, 0xa192e347d09f188d + .dword 0x2021f21f80c18306, 0x5f4f02d7b0f40a7f + .dword 0x179ef6fc78eb277b, 0x68f0063448deae02 + .dword 0xe943176c18803589, 0x962de7a428b5bcf0 + .dword 0xf1c1fe77117cdf02, 0x8eaf0ebf2149567b + .dword 0x0f1c1fe77117cdf0, 0x7072ef2f41224489 + .dword 0x38a31b04893d698d, 0x47cdebccb908e0f4 + .dword 0xc67efa94e9567b7f, 0xb9100a5cd963f206 + .dword 0x57dd12c379682177, 0x28b3e20b495da80e + .dword 0xa900f35319033385, 0xd66e039b2936bafc + .dword 0x9ebff7b0e12997f8, 0xe1d10778d11c1e81 + .dword 0x606216208142850a, 0x1f0ce6e8b1770c73 + .dword 0x8921014c99c2b083, 0xf64ff184a9f739fa + .dword 0x77fce0dcf9a9a271, 0x08921014c99c2b08 + .dword 0x4043e43f0183060c, 0x3f2d14f731b68f75 + .dword 0xbe9e05af61e814fe, 0xc1f0f56751dd9d87 + .dword 0x2f3dedf8f1d64ef6, 0x50531d30c1e3c78f + .dword 0xd1e00c6891bd5c04, 0xae8efca0a188d57d + .dword 0xe65f088b6997f879, 0x9931f84359a27100 + .dword 0x1882e91b09fcea8b, 0x67ec19d339c963f2 + .dword 0xd75adabd7a6e2d6f, 0xa8342a754a5ba416 + .dword 0x29873b2d1a053f9d, 0x56e9cbe52a30b6e4 + .dword 0x1e383fcee22f9be0, 0x6156cf06d21a1299 + .dword 0xe0e5de5e82448912, 0x9f8b2e96b271006b + .dword 0x71463609127ad31a, 0x0e28c6c1224f5a63 + .dword 0x8f9bd7997211c1e8, 0xf0f5275142244891 + .dword 0xb824d37a8a3b6595, 0xc74a23b2ba0eecec + .dword 0x46f932eaea507767, 0x3997c222da65fe1e + .dword 0xafba2586f2d042ee, 0xd0d4d54ec2e5cb97 + .dword 0x5167c41692bb501c, 0x2e0934dea28ed965 + .dword 0x66d8c0f56a91f461, 0x19b6303d5aa47d18 + .dword 0x980521650afae693, 0xe76bd1ad3acf6fea + .dword 0x09a6c9329ac4bc9b, 0x76c839faaaf135e2 + .dword 0xf77b28a2faafae69, 0x8815d86aca9a2710 + .dword 0xc0c42c4102850a14, 0xbfaadc8932b0836d + .dword 0x3e19cdd162ee18e6, 0x41773d1952db919f + .dword 0x269b24ca6b12f26d, 0x59f5d4025b277b14 + .dword 0xd846c55a0b79e09f, 0xa72835923b4c69e6 + .dword 0xeff9c1b9f35344e2, 0x90973171c366cd9b + .dword 0x1124202993385610, 0x6e4ad0e1a30ddf69 + .dword 0x8087c87e03060c18, 0xffe938b633338561 + .dword 0x7e5a29ee636d1eea, 0x0134d92653589793 + .dword 0x49e52d0d9b47ba97, 0x368bddc5ab7233ee + .dword 0xb738cc9dfb2ca865, 0xc8563c55cb19211c + .dword 0x5e7bdbf1e3ac9dec, 0x21152b39d3991495 + .dword 0xa0a63a6183c78f1e, 0xdfc8caa9b3f20667 + .dword 0x97193e827bed2b63, 0xe877ce4a4bd8a21a + .dword 0x69c4df121b863991, 0x16aa2fda2bb3b0e8 + .dword 0xf86737458bb86399, 0x8709c78dbb8deae0 + .dword 0x06bad6d5ebd3716b, 0x79d4261ddbe6f812 + .dword 0x3105d23613f9d516, 0x4e6b22fe23cc5c6f + .dword 0xcfd833a67392c7e4, 0xb0b6c36e43a74e9d + .dword 0x9a6c9329ac4bc9b5, 0xe50263e19c7e40cc + .dword 0x64b172b9cc20db47, 0x1bdf8271fc15523e + .dword 0x530e765a340a7f3a, 0x2c608692043ff643 + .dword 0xadd397ca54616dc8, 0xd2bd67026454e4b1 + .dword 0x3c707f9dc45f37c0, 0x431e8f55f46abeb9 + .dword 0xc2ad9e0da4342532, 0xbdc36ec59401ac4b + .dword 0xf5129aee5c1e814f, 0x8a7c6a266c2b0836 + .dword 0x0bcf7b7e3c7593bd, 0x74a18bb60c401ac4 + .dword 0xe28c6c1224f5a634, 0x9de29cda14c02f4d + .dword 0x1c518d82449eb4c6, 0x633f7d4a74ab3dbf + .dword 0x2bee8961bcb410bb, 0x548079a98c8199c2 + .dword 0xd53368f1dcdf0249, 0xaa5d9839ecea8b30 + .dword 0x449080a64ce15841, 0x3bfe706e7cd4d138 + .dword 0xba4d61362c8a4ab3, 0xc52391fe1cbfc3ca + .dword 0x8df265d5d4a0eece, 0xf29c951de49567b7 + .dword 0x732f8445b4cbfc3c, 0x0c41748d84fe7545 + .dword 0x6bad6d5ebd3716b7, 0x14c39d968d029fce + .dword 0x95708ccedd5c0445, 0xea1e7c06ed698d3c + .dword 0xa2cf882d2576a038, 0xdda178e515432941 + .dword 0x5c1269bd451db2ca, 0x237c997575283bb3 + .dword 0xcdb181ead523e8c2, 0xb2df7122e51661bb + .dword 0x336c607ab548fa30, 0x4c0290b2857d7349 + .dword 0x04d364994d625e4d, 0x7bbd94517d57d734 + .dword 0xfa0e85092d094cbf, 0x856075c11d3cc5c6 + .dword 0x134d926535897936, 0x6c2362ad05bcf04f + .dword 0xed9073f555e26bc4, 0x92fe833d65d7e2bd + .dword 0xda2f7716adc8cfb9, 0xa54187de9dfd46c0 + .dword 0x24f29686cda3dd4b, 0x5b9c664efd965432 + .dword 0xb5517ed15d9d8743, 0xca3f8e196da80e3a + .dword 0x4b8c9f413df695b1, 0x34e26f890dc31cc8 + .dword 0x7c339ba2c5dc31cc, 0x035d6b6af5e9b8b5 + .dword 0x82ee7a32a5b7233e, 0xfd808afa9582aa47 + .dword 0x4d364994d625e4da, 0x3258b95ce6106da3 + .dword 0xb3eba804b64ef628, 0xcc8558cc867b7f51 + .dword 0x8454ace74e645255, 0xfb3a5c2f7e51db2c + .dword 0x7a894d772e0f40a7, 0x05e7bdbf1e3ac9de + .dword 0xeb2aa520be311aaf, 0x944455e88e0493d6 + .dword 0x15f744b0de5a085d, 0x6a99b478ee6f8124 + .dword 0x224840532670ac20, 0x5d26b09b16452559 + .dword 0xdc95a1c3461bbed2, 0xa3fb510b762e37ab + .dword 0x35d6b6af5e9b8b5b, 0x4ab846676eae0222 + .dword 0xcb0b573f3ef099a9, 0xb465a7f70ec510d0 + .dword 0xfcb453dcc6da3dd4, 0x83daa314f6efb4ad + .dword 0x0269b24ca6b12f26, 0x7d0742849684a65f + .dword 0x93ca5a1b368f752e, 0xeca4aad306bafc57 + .dword 0x6d17bb8b56e467dc, 0x12794b4366d1eea5 + .dword 0x5aa8bf68aecec3a1, 0x25c64fa09efb4ad8 + .dword 0xa4755ef8cea5d153, 0xdb1bae30fe90582a + .dword 0xbcf7b7e3c7593bd8, 0xc399472bf76cb2a1 + .dword 0x422a5673a732292a, 0x3d44a6bb9707a053 + .dword 0x759552905f188d57, 0x0afba2586f2d042e + .dword 0x8b48b3003f739fa5, 0xf42643c80f4616dc + .dword 0x1aeb5b57af4dc5ad, 0x6585ab9f9f784cd4 + .dword 0xe436bac7cf26d75f, 0x9b584a0fff135e26 + .dword 0xd389be24370c7322, 0xace74eec0739fa5b + .dword 0x2d545fb4576761d0, 0x523aaf7c6752e8a9 + .dword 0xc41748d84fe75459, 0xbb79b8107fd2dd20 + .dword 0x3acaa9482f8c46ab, 0x45a459801fb9cfd2 + .dword 0x0d75adabd7a6e2d6, 0x721b5d63e7936baf + .dword 0xf3a84c3bb7cdf024, 0x8cc6bcf387f8795d + .dword 0x620ba46c27f3aa2c, 0x1d6554a417c62355 + .dword 0x9cd645fc4798b8de, 0xe3b8b53477ad31a7 + .dword 0xab69411fbfb21ca3, 0xd407b1d78f8795da + .dword 0x55b4a08fdfd90e51, 0x2ada5047efec8728 diff --git a/crc/riscv64/crc_fold_common_clmul.h b/crc/riscv64/crc_fold_common_clmul.h new file mode 100644 index 00000000..b3a0b5b1 --- /dev/null +++ b/crc/riscv64/crc_fold_common_clmul.h @@ -0,0 +1,342 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#define SEED a0 +#define BUF a1 +#define LEN a2 +#define POLY a3 +#define MU a4 +#define K1 t5 +#define K2 t6 +#define K3 t5 +#define K4 t6 +#define K5 t5 +#define K6 t6 + +#define X3HIGH t0 +#define HIGH t0 +#define X3LOW t1 +#define LOW t1 + +#define X2HIGH t2 +#define X2LOW a5 +#define X1HIGH a6 +#define X1LOW a7 +#define X0HIGH t3 +#define X0LOW t4 + +#define BUF3HIGH s4 +#define BUF3LOW s5 +#define BUF2HIGH s6 +#define BUF2LOW s7 +#define BUF1HIGH s8 +#define BUF1LOW s9 +#define BUF0HIGH s10 +#define BUF0LOW s11 + +#define X3K1LOW ra +#define X3K2HIGH gp +#define X2K1LOW tp +#define X2K2HIGH s0 +#define X1K1LOW s1 +#define X1K2HIGH a0 +#define X0K1LOW s2 +#define X0K2HIGH s3 + +/* repeated fold-by-four followed by fold-by-one */ +/* takes parameter \bits, bit length of polynomial (32 or 64) */ +/* \endianswap is a boolean parameter, controlling whether an endiannes swap is + * needed (true for norm crc on little-endian cpu, false for refl crc) */ +/* expects SEED (a0), BUF (a1) and LEN (a2) to hold those values */ +/* expects BUF is doubleword-aligned */ +/* returns 128-bit result in HIGH:LOW (t0:t1) */ +/* returns updated buffer ptr & length in BUF and LEN */ +/* trashes all caller-saved registers except POLY and MU (a3/a4) */ +.macro crc_fold_loop bits:req endianswap:req reflected:req + + /* for a reflected crc, clmulh gets low word and vice-versa */ +.macro clmul_low rd:req, rs1:req, rs2:req +.if !\reflected + clmul \rd, \rs1, \rs2 +.else + clmulh \rd, \rs1, \rs2 +.endif +.endm +.macro clmul_high rd:req, rs1:req, rs2:req +.if !\reflected + clmulh \rd, \rs1, \rs2 +.else + clmul \rd, \rs1, \rs2 +.endif +.endm + + /* does enough buffer exist for a 4-fold? */ + li t0, 128 + bltu LEN, t0, .fold_1 + + /* push callee-saved registers to stack */ + addi sp, sp, -136 + sd a3, 128(sp) + sd ra, 120(sp) + sd gp, 112(sp) + sd tp, 104(sp) + sd s0, 96(sp) + sd s1, 88(sp) + sd s2, 80(sp) + sd s3, 72(sp) + sd s4, 64(sp) + sd s5, 56(sp) + sd s6, 48(sp) + sd s7, 40(sp) + sd s8, 32(sp) + sd s9, 24(sp) + sd s10, 16(sp) + sd s11, 8(sp) + + /* load initial 4 128-bit chunks */ + ld X3HIGH, 0(BUF) + ld X3LOW, 8(BUF) + ld X2HIGH, 16(BUF) + ld X2LOW, 24(BUF) + ld X1HIGH, 32(BUF) + ld X1LOW, 40(BUF) + ld X0HIGH, 48(BUF) + ld X0LOW, 56(BUF) + + addi BUF, BUF, 64 + addi LEN, LEN, -64 + + /* endianness swap */ +.if \endianswap + rev8 X3HIGH, X3HIGH + rev8 X3LOW, X3LOW + rev8 X2HIGH, X2HIGH + rev8 X2LOW, X2LOW + rev8 X1HIGH, X1HIGH + rev8 X1LOW, X1LOW + rev8 X0HIGH, X0HIGH + rev8 X0LOW, X0LOW +.endif + + /* xor in seed */ +.if (\bits != 64) && \endianswap + slli SEED, SEED, 64 - \bits +.endif + xor X3HIGH, X3HIGH, SEED + + /* load constants */ + ld K1, .k1 + ld K2, .k2 + + /* calculate how far we'll fold til and load LEN with the amount left */ + srli a3, LEN, 6 + slli a3, a3, 6 + add a3, BUF, a3 + and LEN, LEN, 0x3f + +.align 3 +.fold_4_loop: + /* carryless multiply each high doubleword by k1, get 128-bit result */ + /* interleve fetching next 4 128-bit chunks */ + clmul_low X3K1LOW, K1, X3HIGH + ld BUF3HIGH, 0(BUF) + clmul_low X2K1LOW, K1, X2HIGH + ld BUF3LOW, 8(BUF) + clmul_low X1K1LOW, K1, X1HIGH + ld BUF2HIGH, 16(BUF) + clmul_low X0K1LOW, K1, X0HIGH + ld BUF2LOW, 24(BUF) + clmul_high X3HIGH, K1, X3HIGH + ld BUF1HIGH, 32(BUF) + clmul_high X2HIGH, K1, X2HIGH + ld BUF1LOW, 40(BUF) + clmul_high X1HIGH, K1, X1HIGH + ld BUF0HIGH, 48(BUF) + clmul_high X0HIGH, K1, X0HIGH + ld BUF0LOW, 56(BUF) + + addi BUF, BUF, 64 + + /* endianness swap */ +.if \endianswap + rev8 BUF3HIGH, BUF3HIGH + rev8 BUF3LOW, BUF3LOW + rev8 BUF2HIGH, BUF2HIGH + rev8 BUF2LOW, BUF2LOW + rev8 BUF1HIGH, BUF1HIGH + rev8 BUF1LOW, BUF1LOW + rev8 BUF0HIGH, BUF0HIGH + rev8 BUF0LOW, BUF0LOW +.endif + + /* carryless multiply each low doubleword by k2 */ + clmul_high X3K2HIGH, K2, X3LOW + clmul_high X2K2HIGH, K2, X2LOW + clmul_high X1K2HIGH, K2, X1LOW + clmul_high X0K2HIGH, K2, X0LOW + clmul_low X3LOW, K2, X3LOW + clmul_low X2LOW, K2, X2LOW + clmul_low X1LOW, K2, X1LOW + clmul_low X0LOW, K2, X0LOW + + /* xor results together */ + xor BUF3LOW, BUF3LOW, X3K1LOW + xor BUF2LOW, BUF2LOW, X2K1LOW + xor BUF1LOW, BUF1LOW, X1K1LOW + xor BUF0LOW, BUF0LOW, X0K1LOW + xor X3HIGH, BUF3HIGH, X3HIGH + xor X2HIGH, BUF2HIGH, X2HIGH + xor X1HIGH, BUF1HIGH, X1HIGH + xor X0HIGH, BUF0HIGH, X0HIGH + xor X3LOW, X3LOW, BUF3LOW + xor X2LOW, X2LOW, BUF2LOW + xor X1LOW, X1LOW, BUF1LOW + xor X0LOW, X0LOW, BUF0LOW + xor X3HIGH, X3K2HIGH, X3HIGH + xor X2HIGH, X2K2HIGH, X2HIGH + xor X1HIGH, X1K2HIGH, X1HIGH + xor X0HIGH, X0K2HIGH, X0HIGH + + bne BUF, a3, .fold_4_loop + + /* we've four folded as much as we can, fold-by-one values in regs */ + /* load fold-by-one constants */ + ld K3, .k3 + ld K4, .k4 + + clmul_high s0, K3, X3HIGH + clmul_low s1, K3, X3HIGH + clmul_high s2, K4, X3LOW + clmul_low s3, K4, X3LOW + xor HIGH, X2HIGH, s0 + xor HIGH, HIGH, s2 + xor LOW, X2LOW, s1 + xor LOW, LOW, s3 + + clmul_high s0, K3, HIGH + clmul_low s1, K3, HIGH + clmul_high s2, K4, LOW + clmul_low s3, K4, LOW + xor HIGH, X1HIGH, s0 + xor HIGH, HIGH, s2 + xor LOW, X1LOW, s1 + xor LOW, LOW, s3 + + clmul_high s0, K3, HIGH + clmul_low s1, K3, HIGH + clmul_high s2, K4, LOW + clmul_low s3, K4, LOW + xor HIGH, X0HIGH, s0 + xor HIGH, HIGH, s2 + xor LOW, X0LOW, s1 + xor LOW, LOW, s3 + + /* pop register values saved on stack */ + ld a3, 128(sp) + ld ra, 120(sp) + ld gp, 112(sp) + ld tp, 104(sp) + ld s0, 96(sp) + ld s1, 88(sp) + ld s2, 80(sp) + ld s3, 72(sp) + ld s4, 64(sp) + ld s5, 56(sp) + ld s6, 48(sp) + ld s7, 40(sp) + ld s8, 32(sp) + ld s9, 24(sp) + ld s10, 16(sp) + ld s11, 8(sp) + addi sp, sp, 136 + + /* load fold loop constant, check if any more 1-folding to do */ + li t4, 16 + bgeu LEN, t4, .fold_1_loop + /* else jump straight to end */ + j .fold_1_cleanup + +.fold_1: + li t4, 16 /* kept throughout loop */ + /* handle case where not enough buffer to do any fold */ + /* .fold_1_done must be defined by the crc32/64 fold reduction macro */ + bltu LEN, t4, .fold_1_done + + /* load in initial values and xor with seed */ + ld HIGH, 0(BUF) +.if \endianswap + rev8 HIGH, HIGH +.endif + +.if (\bits != 64) && \endianswap + slli SEED, SEED, 64 - \bits +.endif + xor HIGH, HIGH, SEED + + ld LOW, 8(BUF) +.if \endianswap + rev8 LOW, LOW +.endif + + addi LEN, LEN, -16 + addi BUF, BUF, 16 + + bltu a2, t4, .fold_1_cleanup + + /* precomputed constants */ + ld K3, .k3 + ld K4, .k4 +.fold_1_loop: + /* multiply high and low by constants to get two 128-bit result */ + clmul_high t2, K3, HIGH + clmul_low t3, K3, HIGH + clmul_high a5, K4, LOW + clmul_low a6, K4, LOW + + /* load next 128-bits of buffer */ + ld HIGH, 0(BUF) + ld LOW, 8(BUF) +.if \endianswap + rev8 HIGH, HIGH + rev8 LOW, LOW +.endif + + addi LEN, LEN, -16 + addi BUF, BUF, 16 + + /* fold in values with xor */ + xor HIGH, HIGH, t2 + xor HIGH, HIGH, a5 + xor LOW, LOW, t3 + xor LOW, LOW, a6 + + bgeu LEN, t4, .fold_1_loop + +.fold_1_cleanup: +.endm diff --git a/erasure_code/Makefile.am b/erasure_code/Makefile.am index 8f334462..15bb4265 100644 --- a/erasure_code/Makefile.am +++ b/erasure_code/Makefile.am @@ -34,6 +34,7 @@ include erasure_code/ppc64le/Makefile.am lsrc += erasure_code/ec_base.c lsrc_base_aliases += erasure_code/ec_base_aliases.c +lsrc_riscv64 += erasure_code/ec_base_aliases.c lsrc_x86_64 += \ erasure_code/ec_highlevel_func.c \ erasure_code/gf_vect_mul_sse.asm \ diff --git a/igzip/Makefile.am b/igzip/Makefile.am index bec359ab..01622914 100644 --- a/igzip/Makefile.am +++ b/igzip/Makefile.am @@ -39,6 +39,7 @@ lsrc += igzip/igzip.c \ lsrc_base_aliases += igzip/igzip_base_aliases.c igzip/proc_heap_base.c lsrc_x86_32 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c lsrc_ppc64le += igzip/igzip_base_aliases.c igzip/proc_heap_base.c +lsrc_riscv64 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \ igzip/aarch64/igzip_multibinary_arm64.S \ diff --git a/mem/Makefile.am b/mem/Makefile.am index a49fc0c5..6c537de4 100644 --- a/mem/Makefile.am +++ b/mem/Makefile.am @@ -33,6 +33,7 @@ lsrc += mem/mem_zero_detect_base.c lsrc_base_aliases += mem/mem_zero_detect_base_aliases.c lsrc_ppc64le += mem/mem_zero_detect_base_aliases.c +lsrc_riscv64 += mem/mem_zero_detect_base_aliases.c lsrc_x86_64 += mem/mem_zero_detect_avx512.asm \ mem/mem_zero_detect_avx2.asm \ diff --git a/raid/Makefile.am b/raid/Makefile.am index 854f258e..63ab6a21 100644 --- a/raid/Makefile.am +++ b/raid/Makefile.am @@ -33,6 +33,7 @@ lsrc += raid/raid_base.c lsrc_base_aliases += raid/raid_base_aliases.c lsrc_ppc64le += raid/raid_base_aliases.c +lsrc_riscv64 += raid/raid_base_aliases.c lsrc_x86_64 += \ raid/xor_gen_sse.asm \