Skip to content

Commit a38b0a0

Browse files
jvarela-jumpptaffet-jumpriptl
authored
vinyl: xxhash-r39 AVX512DQ implementation (#7206)
Co-authored-by: Philip Taffet <[email protected]> Co-authored-by: Richard Patel <[email protected]>
1 parent 6a6861d commit a38b0a0

File tree

7 files changed

+291
-9
lines changed

7 files changed

+291
-9
lines changed

src/discof/restore/fd_snapwh_tile.c

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,11 @@ handle_data_frag( fd_snapwh_t * ctx,
174174
FD_CRIT( fd_ulong_is_aligned( (ulong)rem, FD_VINYL_BSTREAM_BLOCK_SZ ), "misaligned write request" );
175175
FD_CRIT( fd_ulong_is_aligned( rem_sz, FD_VINYL_BSTREAM_BLOCK_SZ ), "misaligned write request" );
176176

177+
#define PAIR_HASH_N (8)
178+
179+
uchar * pair[PAIR_HASH_N];
180+
ulong pair_sz[PAIR_HASH_N];
181+
ulong pair_cnt = 0UL;
177182
while( rem_sz ) {
178183
FD_CRIT( rem_sz>=FD_VINYL_BSTREAM_BLOCK_SZ, "corrupted bstream block" );
179184
fd_vinyl_bstream_phdr_t * phdr = (fd_vinyl_bstream_phdr_t *)rem;
@@ -182,10 +187,11 @@ handle_data_frag( fd_snapwh_t * ctx,
182187
switch( ctl_type ) {
183188

184189
case FD_VINYL_BSTREAM_CTL_TYPE_PAIR: {
185-
ulong val_esz = fd_vinyl_bstream_ctl_sz( ctl );
186-
ulong block_sz = fd_vinyl_bstream_pair_sz( val_esz );
187-
FD_CRIT( rem_sz>=block_sz, "corrupted bstream pair" );
188-
fd_vinyl_bstream_pair_hash( io_seed, (fd_vinyl_bstream_block_t *)rem );
190+
pair[ pair_cnt ] = rem;
191+
ulong val_esz = fd_vinyl_bstream_ctl_sz( ctl );
192+
ulong block_sz = fd_vinyl_bstream_pair_sz( val_esz );
193+
pair_sz[ pair_cnt ] = block_sz;
194+
pair_cnt += 1UL;
189195
rem += block_sz;
190196
rem_sz -= block_sz;
191197
break;
@@ -199,9 +205,43 @@ handle_data_frag( fd_snapwh_t * ctx,
199205

200206
default:
201207
FD_LOG_CRIT(( "unexpected vinyl bstream block ctl=%016lx", ctl ));
208+
}
202209

210+
if( FD_UNLIKELY( ( pair_cnt==PAIR_HASH_N ) || ( !rem_sz ) ) ) {
211+
# if FD_HAS_AVX512 && defined(__AVX512DQ__)
212+
ulong h_seed[PAIR_HASH_N];
213+
ulong h_trail[PAIR_HASH_N];
214+
ulong h_block[PAIR_HASH_N];
215+
void const * h_tin [PAIR_HASH_N];
216+
ulong h_tinsz[PAIR_HASH_N] = {0};
217+
void const * h_bin [PAIR_HASH_N];
218+
ulong h_binsz[PAIR_HASH_N] = {0};
219+
for( ulong i=0UL; i<pair_cnt; i++ ) {
220+
h_seed[ i ] = io_seed;
221+
fd_vinyl_bstream_pair_zero( (fd_vinyl_bstream_block_t *)pair[ i ] );
222+
h_tin [ i ] = pair [ i ] + FD_VINYL_BSTREAM_BLOCK_SZ;
223+
h_tinsz[ i ] = pair_sz[ i ] - FD_VINYL_BSTREAM_BLOCK_SZ;
224+
h_bin [ i ] = pair [ i ];
225+
h_binsz[ i ] = FD_VINYL_BSTREAM_BLOCK_SZ;
226+
}
227+
fd_vinyl_bstream_hash_batch8( h_seed, h_trail, h_tin, h_tinsz );
228+
fd_vinyl_bstream_hash_batch8( h_trail, h_block, h_bin, h_binsz );
229+
for( ulong i=0UL; i<pair_cnt; i++ ) {
230+
fd_vinyl_bstream_block_t * ftr = (fd_vinyl_bstream_block_t *)( pair[ i ]+pair_sz[ i ]-FD_VINYL_BSTREAM_BLOCK_SZ );
231+
ftr->ftr.hash_trail = h_trail[ i ];
232+
ftr->ftr.hash_blocks = h_block[ i ];
233+
}
234+
# else
235+
(void)pair_sz;
236+
for( ulong hash_i=0UL; hash_i<pair_cnt; hash_i++ ) {
237+
fd_vinyl_bstream_pair_hash( io_seed, (fd_vinyl_bstream_block_t *)pair[ hash_i ] );
238+
}
239+
# endif
240+
pair_cnt = 0UL;
203241
}
204242
}
243+
244+
#undef PAIR_HASH_N
205245
}
206246

207247
static int

src/discof/restore/utils/fd_vinyl_io_wd.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,16 +124,16 @@ fd_vinyl_io_wd_append( fd_vinyl_io_t * io,
124124
fd_vinyl_io_wd_t * wd = (fd_vinyl_io_wd_t *)io; /* Note: io must be non-NULL to have even been called */
125125
uchar const * src = (uchar const *)_src;
126126

127-
wd_buf_t * buf = wd->buf_append;
128-
FD_CRIT( buf, "no append buf" );
129-
FD_CRIT( buf->state==WD_BUF_APPEND, "corrupt wd_buf" );
130-
FD_CRIT( src>=buf->buf && (src+sz)<=buf->buf+wd->wr_mtu, "alloc invalidated" );
131-
132127
/* Validate the input args. */
133128

134129
ulong seq_future = wd->base->seq_future;
135130
if( FD_UNLIKELY( !sz ) ) return seq_future;
136131

132+
wd_buf_t * buf = wd->buf_append;
133+
FD_CRIT( buf, "no append buf" );
134+
FD_CRIT( buf->state==WD_BUF_APPEND, "corrupt wd_buf" );
135+
FD_CRIT( src>=buf->buf && (src+sz)<=buf->buf+wd->wr_mtu, "alloc invalidated" );
136+
137137
int bad_src = !src || src<wd->wr_chunk0 || (src+sz)>wd->wr_chunk1;
138138
int bad_align = !fd_ulong_is_aligned( (ulong)src, FD_VINYL_BSTREAM_BLOCK_SZ );
139139
int bad_sz = !fd_ulong_is_aligned( sz, FD_VINYL_BSTREAM_BLOCK_SZ );

src/vinyl/bstream/Local.mk

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
$(call add-hdrs,fd_vinyl_bstream.h)
22
$(call add-objs,fd_vinyl_bstream,fd_vinyl)
3+
ifdef FD_HAS_AVX512
4+
$(call add-objs,fd_hash_avx512dq,fd_vinyl)
5+
$(call make-unit-test,test_hash_avx512dq,test_hash_avx512dq,fd_vinyl fd_tango fd_util)
6+
$(call run-unit-test,test_hash_avx512dq)
7+
endif
38
$(call make-unit-test,test_vinyl_bstream,test_vinyl_bstream,fd_vinyl fd_tango fd_util)
49
$(call run-unit-test,test_vinyl_bstream)
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
#include "fd_vinyl_bstream.h"
2+
#if !FD_HAS_AVX512
3+
#error "fd_hash_avx512dq requires AVX-512"
4+
#endif
5+
6+
#include "../../util/simd/fd_avx512.h"
7+
#include "../../util/simd/fd_avx.h"
8+
9+
/* Bases t/f just on the leading bit of c */
10+
#define w_if( c, t, f ) _mm256_castpd_si256( _mm256_blendv_pd( _mm256_castsi256_pd( (f) ), \
11+
_mm256_castsi256_pd( (t) ), \
12+
_mm256_castsi256_pd( (c) ) ) )
13+
14+
/* Given (a0, a1, a2, a3) and (b0, b1, b2, b3), constructs
15+
( c0?a0:a1, c1?b0:b1, c2?a2:a3, c3?b2:b3 ) */
16+
#define wv_shuffle( c0,c1,c2,c3, a, b ) _mm256_castpd_si256( _mm256_shuffle_pd( _mm256_castsi256_pd( (a) ), \
17+
_mm256_castsi256_pd( (b) ), \
18+
((!c0)<<0)|((!c1)<<1)|((!c2)<<2)|((!c3)<<3) ) )
19+
#define wv_mul(a,b) _mm256_mullo_epi64( (a), (b) ) /* [ a0 *b0 a1 *b1 ... a3 *b3 ] */
20+
21+
FD_FN_PURE void
22+
fd_vinyl_bstream_hash_batch8( ulong const * FD_RESTRICT seed_,
23+
ulong * FD_RESTRICT out,
24+
void const * FD_RESTRICT * FD_RESTRICT buf_,
25+
ulong const * FD_RESTRICT sz_ ) {
26+
#define C1 (11400714785074694791UL)
27+
#define C2 (14029467366897019727UL)
28+
#define C3 ( 1609587929392839161UL)
29+
#define C4 ( 9650029242287828579UL)
30+
#define C5 ( 2870177450012600261UL)
31+
32+
wv_t const CC1 = wv_bcast( C1 );
33+
wv_t const CC2 = wv_bcast( C2 );
34+
wv_t const CWV = wv( C1+C2, C2, 0UL, -C1 );
35+
36+
/* Vi = ( w_i, x_i, y_i, z_i ) */
37+
wv_t V0 = wv_add( wv_bcast( seed_[0] ), CWV ); uchar const * p0 = (uchar const *)buf_[0];
38+
wv_t V1 = wv_add( wv_bcast( seed_[1] ), CWV ); uchar const * p1 = (uchar const *)buf_[1];
39+
wv_t V2 = wv_add( wv_bcast( seed_[2] ), CWV ); uchar const * p2 = (uchar const *)buf_[2];
40+
wv_t V3 = wv_add( wv_bcast( seed_[3] ), CWV ); uchar const * p3 = (uchar const *)buf_[3];
41+
wv_t V4 = wv_add( wv_bcast( seed_[4] ), CWV ); uchar const * p4 = (uchar const *)buf_[4];
42+
wv_t V5 = wv_add( wv_bcast( seed_[5] ), CWV ); uchar const * p5 = (uchar const *)buf_[5];
43+
wv_t V6 = wv_add( wv_bcast( seed_[6] ), CWV ); uchar const * p6 = (uchar const *)buf_[6];
44+
wv_t V7 = wv_add( wv_bcast( seed_[7] ), CWV ); uchar const * p7 = (uchar const *)buf_[7];
45+
46+
ulong max_sz = 0UL;
47+
for( ulong i=0UL; i<8UL; i++ ) max_sz = fd_ulong_max( max_sz, sz_[i] );
48+
49+
wwv_t rem_sz = wwv_ldu( sz_ );
50+
wwv_t sub512 = wwv_bcast( 512UL );
51+
52+
for( ulong j_outer=0UL; j_outer<max_sz; j_outer+=512UL ) {
53+
/* not_done has one bit per lane, and we need to convert it to one
54+
mask per lane. We'll extract and invert the kth bit with a shift
55+
and mask, then add 0xFF to it, which has the effect of
56+
broadcasting the inverted bit: 0x00 + 0xFF = 0xFF, whereas 0x01 +
57+
0xFF = 0x00. */
58+
__mmask8 not_done = _mm512_cmpneq_epi64_mask( rem_sz, wwv_zero() );
59+
/* Do effectively a saturating subtract */
60+
rem_sz = _mm512_mask_sub_epi64( rem_sz, not_done, rem_sz, sub512 );
61+
__mmask8 k0 = _kadd_mask8( 0xFF, _kandn_mask8( not_done, 0x01 ) );
62+
__mmask8 k1 = _kadd_mask8( 0xFF, _kandn_mask8( _kshiftri_mask8( not_done, 1 ), 0x01 ) );
63+
__mmask8 k2 = _kadd_mask8( 0xFF, _kandn_mask8( _kshiftri_mask8( not_done, 2 ), 0x01 ) );
64+
__mmask8 k3 = _kadd_mask8( 0xFF, _kandn_mask8( _kshiftri_mask8( not_done, 3 ), 0x01 ) );
65+
__mmask8 k4 = _kadd_mask8( 0xFF, _kandn_mask8( _kshiftri_mask8( not_done, 4 ), 0x01 ) );
66+
__mmask8 k5 = _kadd_mask8( 0xFF, _kandn_mask8( _kshiftri_mask8( not_done, 5 ), 0x01 ) );
67+
__mmask8 k6 = _kadd_mask8( 0xFF, _kandn_mask8( _kshiftri_mask8( not_done, 6 ), 0x01 ) );
68+
__mmask8 k7 = _kadd_mask8( 0xFF, _kandn_mask8( _kshiftri_mask8( not_done, 7 ), 0x01 ) );
69+
70+
71+
for( ulong j=j_outer; j<j_outer+512UL; j+=32UL ) {
72+
V0 = _mm256_mask_mullo_epi64( V0, k0, wv_rol( wv_add( V0, wv_mul( CC2, _mm256_maskz_loadu_epi64( k0, p0+j ) ) ), 31 ), CC1 );
73+
V1 = _mm256_mask_mullo_epi64( V1, k1, wv_rol( wv_add( V1, wv_mul( CC2, _mm256_maskz_loadu_epi64( k1, p1+j ) ) ), 31 ), CC1 );
74+
V2 = _mm256_mask_mullo_epi64( V2, k2, wv_rol( wv_add( V2, wv_mul( CC2, _mm256_maskz_loadu_epi64( k2, p2+j ) ) ), 31 ), CC1 );
75+
V3 = _mm256_mask_mullo_epi64( V3, k3, wv_rol( wv_add( V3, wv_mul( CC2, _mm256_maskz_loadu_epi64( k3, p3+j ) ) ), 31 ), CC1 );
76+
V4 = _mm256_mask_mullo_epi64( V4, k4, wv_rol( wv_add( V4, wv_mul( CC2, _mm256_maskz_loadu_epi64( k4, p4+j ) ) ), 31 ), CC1 );
77+
V5 = _mm256_mask_mullo_epi64( V5, k5, wv_rol( wv_add( V5, wv_mul( CC2, _mm256_maskz_loadu_epi64( k5, p5+j ) ) ), 31 ), CC1 );
78+
V6 = _mm256_mask_mullo_epi64( V6, k6, wv_rol( wv_add( V6, wv_mul( CC2, _mm256_maskz_loadu_epi64( k6, p6+j ) ) ), 31 ), CC1 );
79+
V7 = _mm256_mask_mullo_epi64( V7, k7, wv_rol( wv_add( V7, wv_mul( CC2, _mm256_maskz_loadu_epi64( k7, p7+j ) ) ), 31 ), CC1 );
80+
}
81+
}
82+
83+
/* In preparation for the final steps, we need to transpose
84+
everything. Start by renaming to make the transpose more clear. */
85+
wv_t w0x0y0z0 = V0; wv_t w1x1y1z1 = V1; wv_t w2x2y2z2 = V2; wv_t w3x3y3z3 = V3;
86+
wv_t w4x4y4z4 = V4; wv_t w5x5y5z5 = V5; wv_t w6x6y6z6 = V6; wv_t w7x7y7z7 = V7;
87+
88+
wv_t w0w1y0y1 = wv_shuffle( 1, 1, 1, 1, w0x0y0z0, w1x1y1z1 ); wv_t w4w5y4y5 = wv_shuffle( 1, 1, 1, 1, w4x4y4z4, w5x5y5z5 );
89+
wv_t x0x1z0z1 = wv_shuffle( 0, 0, 0, 0, w0x0y0z0, w1x1y1z1 ); wv_t x4x5z4z5 = wv_shuffle( 0, 0, 0, 0, w4x4y4z4, w5x5y5z5 );
90+
wv_t w2w3y2y3 = wv_shuffle( 1, 1, 1, 1, w2x2y2z2, w3x3y3z3 ); wv_t w6w7y6y7 = wv_shuffle( 1, 1, 1, 1, w6x6y6z6, w7x7y7z7 );
91+
wv_t x2x3z2z3 = wv_shuffle( 0, 0, 0, 0, w2x2y2z2, w3x3y3z3 ); wv_t x6x7z6z7 = wv_shuffle( 0, 0, 0, 0, w6x6y6z6, w7x7y7z7 );
92+
93+
/* On Zen 4, _mm256_inserti128_si256 is only 1 cycle. On Intel, it's
94+
3, and on Zen 5, it is 2. On the whole, it's still better than
95+
_mm256_permute2x128_si256, so we'll use it where we can. */
96+
wv_t w0w1w2w3 = _mm256_inserti128_si256( w0w1y0y1, _mm256_castsi256_si128( w2w3y2y3 ), 1 );
97+
wv_t x0x1x2x3 = _mm256_inserti128_si256( x0x1z0z1, _mm256_castsi256_si128( x2x3z2z3 ), 1 );
98+
99+
wv_t y0y1y2y3 = _mm256_permute2x128_si256( w0w1y0y1, w2w3y2y3, 0x31 );
100+
wv_t z0z1z2z3 = _mm256_permute2x128_si256( x0x1z0z1, x2x3z2z3, 0x31 );
101+
102+
wv_t w4w5w6w7 = _mm256_inserti128_si256( w4w5y4y5, _mm256_castsi256_si128( w6w7y6y7 ), 1 );
103+
wv_t x4x5x6x7 = _mm256_inserti128_si256( x4x5z4z5, _mm256_castsi256_si128( x6x7z6z7 ), 1 );
104+
wv_t y4y5y6y7 = _mm256_permute2x128_si256( w4w5y4y5, w6w7y6y7, 0x31 );
105+
wv_t z4z5z6z7 = _mm256_permute2x128_si256( x4x5z4z5, x6x7z6z7, 0x31 );
106+
107+
wwv_t w0to7 = _mm512_inserti32x8( _mm512_castsi256_si512( w0w1w2w3 ), w4w5w6w7, 1 );
108+
wwv_t x0to7 = _mm512_inserti32x8( _mm512_castsi256_si512( x0x1x2x3 ), x4x5x6x7, 1 );
109+
wwv_t y0to7 = _mm512_inserti32x8( _mm512_castsi256_si512( y0y1y2y3 ), y4y5y6y7, 1 );
110+
wwv_t z0to7 = _mm512_inserti32x8( _mm512_castsi256_si512( z0z1z2z3 ), z4z5z6z7, 1 );
111+
112+
wwv_t h = wwv_add(
113+
wwv_add( wwv_rol( w0to7, 1 ), wwv_rol( x0to7, 7 ) ),
114+
wwv_add( wwv_rol( y0to7, 12 ), wwv_rol( z0to7, 18 ) )
115+
);
116+
117+
wwv_t const CCC1 = wwv_bcast( C1 );
118+
wwv_t const CCC2 = wwv_bcast( C2 );
119+
wwv_t const CCC3 = wwv_bcast( C3 );
120+
wwv_t const CCC4 = wwv_bcast( C4 );
121+
122+
w0to7 = wwv_mul( wwv_rol( wwv_mul( w0to7, CCC2 ), 31 ), CCC1 ); h = wwv_add( wwv_mul( wwv_xor( h, w0to7 ), CCC1 ), CCC4 );
123+
x0to7 = wwv_mul( wwv_rol( wwv_mul( x0to7, CCC2 ), 31 ), CCC1 ); h = wwv_add( wwv_mul( wwv_xor( h, x0to7 ), CCC1 ), CCC4 );
124+
y0to7 = wwv_mul( wwv_rol( wwv_mul( y0to7, CCC2 ), 31 ), CCC1 ); h = wwv_add( wwv_mul( wwv_xor( h, y0to7 ), CCC1 ), CCC4 );
125+
z0to7 = wwv_mul( wwv_rol( wwv_mul( z0to7, CCC2 ), 31 ), CCC1 ); h = wwv_add( wwv_mul( wwv_xor( h, z0to7 ), CCC1 ), CCC4 );
126+
127+
h = wwv_add( h, wwv_ldu( sz_ ) );
128+
129+
/* Final avalanche */
130+
h = wwv_xor( h, wwv_shr( h, 33 ) );
131+
h = wwv_mul( h, CCC2 );
132+
h = wwv_xor( h, wwv_shr( h, 29 ) );
133+
h = wwv_mul( h, CCC3 );
134+
h = wwv_xor( h, wwv_shr( h, 32 ) );
135+
136+
wwv_stu( out, h );
137+
for( ulong i=0UL; i<8UL; i++ ) if( !sz_[i] ) out[i] = seed_[i];
138+
139+
#undef C1
140+
#undef C2
141+
#undef C3
142+
#undef C4
143+
#undef C5
144+
}

src/vinyl/bstream/fd_vinyl_bstream.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
#include "fd_vinyl_bstream.h"
22

3+
void
4+
fd_vinyl_bstream_pair_zero( fd_vinyl_bstream_block_t * hdr ) {
5+
6+
ulong ctl = hdr->ctl;
7+
ulong val_esz = fd_vinyl_bstream_ctl_sz( ctl );
8+
9+
ulong pair_sz = fd_vinyl_bstream_pair_sz( val_esz );
10+
11+
ulong off = sizeof(fd_vinyl_bstream_phdr_t) + val_esz;
12+
ulong zsz = pair_sz - off; /* covers zpad and footer so at least FD_VINYL_BSTREAM_FTR_SZ */
13+
14+
memset( (uchar *)hdr + off, 0, zsz );
15+
}
16+
317
void
418
fd_vinyl_bstream_pair_hash( ulong seed,
519
fd_vinyl_bstream_block_t * hdr ) {

src/vinyl/bstream/fd_vinyl_bstream.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,12 @@ fd_vinyl_bstream_block_test( ulong seed,
392392
return FD_VINYL_SUCCESS;
393393
}
394394

395+
/* fd_vinyl_bstream_pair_zero clears the footer region and any zero
396+
padding region. */
397+
398+
void
399+
fd_vinyl_bstream_pair_zero( fd_vinyl_bstream_block_t * phdr );
400+
395401
/* fd_vinyl_bstream_pair_hash clears the footer region and any zero
396402
padding region of pair and then populates the hash fields
397403
appropriately. seed is the bstream data integrity seed. Assumes
@@ -464,6 +470,20 @@ fd_vinyl_bstream_ctl_style_cstr( int style );
464470
int
465471
fd_cstr_to_vinyl_bstream_ctl_style( char const * cstr );
466472

473+
#if FD_HAS_AVX512 && defined(__AVX512DQ__)
474+
475+
/* fd_vinyl_bstream_hash_batch8 does 8 fd_vinyl_bstream_hash()
476+
computations in parallel. out, buf, sz point to arrays of 8 elements
477+
respectively. */
478+
479+
FD_FN_PURE void
480+
fd_vinyl_bstream_hash_batch8( ulong const * FD_RESTRICT seed,
481+
ulong * FD_RESTRICT out,
482+
void const * FD_RESTRICT * FD_RESTRICT buf,
483+
ulong const * FD_RESTRICT sz );
484+
485+
#endif
486+
467487
FD_PROTOTYPES_END
468488

469489
#endif /* HEADER_fd_src_vinyl_bstream_fd_vinyl_bstream_h */
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#include "../fd_vinyl.h"
2+
3+
uchar buffers[8][2048];
4+
5+
int
6+
main( int argc,
7+
char ** argv ) {
8+
fd_boot( &argc, &argv );
9+
10+
fd_rng_t rng[1]; fd_rng_join( fd_rng_new( rng, 0U, 0UL ) );
11+
12+
ulong const sz_distr[8] = {
13+
0UL,
14+
512UL, 512UL, 512UL, 512UL, 512UL,
15+
1024UL,
16+
2048UL };
17+
18+
long time = 0L;
19+
ulong useful_sz = 0UL;
20+
21+
void const * bufs[ 8 ] = { buffers[0], buffers[1], buffers[2], buffers[3], buffers[4], buffers[5], buffers[6], buffers[7] };
22+
23+
for( ulong rem=1000000UL; rem; rem-- ) {
24+
ulong szs[8];
25+
for( ulong i=0UL; i<8UL; i++ ) {
26+
ulong sz = sz_distr[ fd_rng_uint_roll( rng, 8UL ) ];
27+
useful_sz += sz;
28+
szs[ i ] = sz;
29+
for( ulong j=0UL; j<sz; j+=4UL ) {
30+
uint r = fd_rng_uint( rng );
31+
buffers[ i ][ j+0UL ] = (uchar)(r );
32+
buffers[ i ][ j+1UL ] = (uchar)(r>> 8);
33+
buffers[ i ][ j+2UL ] = (uchar)(r>>16);
34+
buffers[ i ][ j+3UL ] = (uchar)(r>>24);
35+
}
36+
}
37+
ulong seed = fd_rng_ulong( rng );
38+
ulong h_seed[8]; for( ulong i=0UL; i<8UL; i++ ) h_seed[ i ] = seed;
39+
ulong out[ 8 ];
40+
time -= fd_tickcount();
41+
fd_vinyl_bstream_hash_batch8( h_seed, out, bufs, szs );
42+
time += fd_tickcount();
43+
44+
for( ulong i=0UL; i<8UL; i++ ) {
45+
ulong expected = szs[i] ? fd_hash( seed, buffers[i], szs[i] ) : seed;
46+
if( FD_UNLIKELY( expected!=out[ i ] ) ) {
47+
FD_LOG_ERR(( "mismatch at %lu %lu. sz=%lu. %lx != %lx", rem, i, szs[ i ], out[ i ], expected ));
48+
}
49+
}
50+
}
51+
double ticks_per_ns = fd_tempo_tick_per_ns( NULL );
52+
FD_LOG_NOTICE(( "%f byte/ns", (double)useful_sz/((double)time/ticks_per_ns) ));
53+
54+
fd_rng_delete( fd_rng_leave( rng ) );
55+
56+
FD_LOG_NOTICE(( "pass" ));
57+
fd_halt();
58+
return 0;
59+
}

0 commit comments

Comments
 (0)