@@ -2164,28 +2164,95 @@ static void dirty_all_reference_blocks(struct vdo_slab *slab)
2164
2164
dirty_block (& slab -> reference_blocks [i ]);
2165
2165
}
2166
2166
2167
+ static inline bool journal_points_equal (struct journal_point first ,
2168
+ struct journal_point second )
2169
+ {
2170
+ return ((first .sequence_number == second .sequence_number ) &&
2171
+ (first .entry_count == second .entry_count ));
2172
+ }
2173
+
2167
2174
/**
2168
- * clear_provisional_references() - Clear the provisional reference counts from a reference block.
2169
- * @block: The block to clear.
2175
+ * match_bytes() - Check an 8-byte word for bytes matching the value specified
2176
+ * @input: A word to examine the bytes of
2177
+ * @match: The byte value sought
2178
+ *
2179
+ * Return: 1 in each byte when the corresponding input byte matched, 0 otherwise
2170
2180
*/
2171
- static void clear_provisional_references ( struct reference_block * block )
2181
+ static inline u64 match_bytes ( u64 input , u8 match )
2172
2182
{
2173
- vdo_refcount_t * counters = get_reference_counters_for_block (block );
2174
- block_count_t j ;
2183
+ u64 temp = input ^ (match * 0x0101010101010101ULL );
2184
+ /* top bit of each byte is set iff top bit of temp byte is clear; rest are 0 */
2185
+ u64 test_top_bits = ~temp & 0x8080808080808080ULL ;
2186
+ /* top bit of each byte is set iff low 7 bits of temp byte are clear; rest are useless */
2187
+ u64 test_low_bits = 0x8080808080808080ULL - (temp & 0x7f7f7f7f7f7f7f7fULL );
2188
+ /* return 1 when both tests indicate temp byte is 0 */
2189
+ return (test_top_bits & test_low_bits ) >> 7 ;
2190
+ }
2191
+
2192
+ /**
2193
+ * count_valid_references() - Process a newly loaded refcount array
2194
+ * @counters: the array of counters from a metadata block
2195
+ *
2196
+ * Scan a 8-byte-aligned array of counters, fixing up any "provisional" values that weren't
2197
+ * cleaned up at shutdown, changing them internally to "empty".
2198
+ *
2199
+ * Return: the number of blocks that are referenced (counters not "empty")
2200
+ */
2201
+ static unsigned int count_valid_references (vdo_refcount_t * counters )
2202
+ {
2203
+ u64 * words = (u64 * )counters ;
2204
+ /* It's easier to count occurrences of a specific byte than its absences. */
2205
+ unsigned int empty_count = 0 ;
2206
+ /* For speed, we process 8 bytes at once. */
2207
+ unsigned int words_left = COUNTS_PER_BLOCK / sizeof (u64 );
2208
+
2209
+ /*
2210
+ * Sanity check assumptions used for optimizing this code: Counters are bytes. The counter
2211
+ * array is a multiple of the word size.
2212
+ */
2213
+ BUILD_BUG_ON (sizeof (vdo_refcount_t ) != 1 );
2214
+ BUILD_BUG_ON ((COUNTS_PER_BLOCK % sizeof (u64 )) != 0 );
2175
2215
2176
- for (j = 0 ; j < COUNTS_PER_BLOCK ; j ++ ) {
2177
- if (counters [j ] == PROVISIONAL_REFERENCE_COUNT ) {
2178
- counters [j ] = EMPTY_REFERENCE_COUNT ;
2179
- block -> allocated_count -- ;
2216
+ while (words_left > 0 ) {
2217
+ /*
2218
+ * This is used effectively as 8 byte-size counters. Byte 0 counts how many words
2219
+ * had the target value found in byte 0, etc. We just have to avoid overflow.
2220
+ */
2221
+ u64 split_count = 0 ;
2222
+ /*
2223
+ * The counter "% 255" trick used below to fold split_count into empty_count
2224
+ * imposes a limit of 254 bytes examined each iteration of the outer loop. We
2225
+ * process a word at a time, so that limit gets rounded down to 31 u64 words.
2226
+ */
2227
+ const unsigned int max_words_per_iteration = 254 / sizeof (u64 );
2228
+ unsigned int iter_words_left = min_t (unsigned int , words_left ,
2229
+ max_words_per_iteration );
2230
+
2231
+ words_left -= iter_words_left ;
2232
+
2233
+ while (iter_words_left -- ) {
2234
+ u64 word = * words ;
2235
+ u64 temp ;
2236
+
2237
+ /* First, if we have any provisional refcount values, clear them. */
2238
+ temp = match_bytes (word , PROVISIONAL_REFERENCE_COUNT );
2239
+ if (temp ) {
2240
+ /*
2241
+ * 'temp' has 0x01 bytes where 'word' has PROVISIONAL; this xor
2242
+ * will alter just those bytes, changing PROVISIONAL to EMPTY.
2243
+ */
2244
+ word ^= temp * (PROVISIONAL_REFERENCE_COUNT ^ EMPTY_REFERENCE_COUNT );
2245
+ * words = word ;
2246
+ }
2247
+
2248
+ /* Now count the EMPTY_REFERENCE_COUNT bytes, updating the 8 counters. */
2249
+ split_count += match_bytes (word , EMPTY_REFERENCE_COUNT );
2250
+ words ++ ;
2180
2251
}
2252
+ empty_count += split_count % 255 ;
2181
2253
}
2182
- }
2183
2254
2184
- static inline bool journal_points_equal (struct journal_point first ,
2185
- struct journal_point second )
2186
- {
2187
- return ((first .sequence_number == second .sequence_number ) &&
2188
- (first .entry_count == second .entry_count ));
2255
+ return COUNTS_PER_BLOCK - empty_count ;
2189
2256
}
2190
2257
2191
2258
/**
@@ -2196,7 +2263,6 @@ static inline bool journal_points_equal(struct journal_point first,
2196
2263
static void unpack_reference_block (struct packed_reference_block * packed ,
2197
2264
struct reference_block * block )
2198
2265
{
2199
- block_count_t index ;
2200
2266
sector_count_t i ;
2201
2267
struct vdo_slab * slab = block -> slab ;
2202
2268
vdo_refcount_t * counters = get_reference_counters_for_block (block );
@@ -2222,11 +2288,7 @@ static void unpack_reference_block(struct packed_reference_block *packed,
2222
2288
}
2223
2289
}
2224
2290
2225
- block -> allocated_count = 0 ;
2226
- for (index = 0 ; index < COUNTS_PER_BLOCK ; index ++ ) {
2227
- if (counters [index ] != EMPTY_REFERENCE_COUNT )
2228
- block -> allocated_count ++ ;
2229
- }
2291
+ block -> allocated_count = count_valid_references (counters );
2230
2292
}
2231
2293
2232
2294
/**
@@ -2247,7 +2309,6 @@ static void finish_reference_block_load(struct vdo_completion *completion)
2247
2309
struct packed_reference_block * packed = (struct packed_reference_block * ) data ;
2248
2310
2249
2311
unpack_reference_block (packed , block );
2250
- clear_provisional_references (block );
2251
2312
slab -> free_blocks -= block -> allocated_count ;
2252
2313
}
2253
2314
return_vio_to_pool (pooled );
0 commit comments