Skip to content

Commit fa82cbb

Browse files
committed
refactor percent_encode_index to be more simd friendly
1 parent 6518b54 commit fa82cbb

File tree

1 file changed

+27
-4
lines changed

1 file changed

+27
-4
lines changed

include/ada/unicode-inl.h

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,33 @@
1818
namespace ada::unicode {
1919
ada_really_inline size_t percent_encode_index(const std::string_view input,
2020
const uint8_t character_set[]) {
21-
return std::distance(
22-
input.begin(), std::ranges::find_if(input, [character_set](const char c) {
23-
return character_sets::bit_at(character_set, c);
24-
}));
21+
const char* data = input.data();
22+
const size_t size = input.size();
23+
24+
// Process 8 bytes at a time using unrolled loop
25+
size_t i = 0;
26+
for (; i + 8 <= size; i += 8) {
27+
uint64_t chunk;
28+
std::memcpy(&chunk, data + i, 8); // Avoid potential alignment issues
29+
30+
// Check 8 characters at once
31+
for (size_t j = 0; j < 8; j++) {
32+
char c = static_cast<char>(chunk & 0xFF);
33+
if (character_sets::bit_at(character_set, c)) {
34+
return i + j;
35+
}
36+
chunk >>= 8;
37+
}
38+
}
39+
40+
// Handle remaining bytes
41+
for (; i < size; i++) {
42+
if (character_sets::bit_at(character_set, data[i])) {
43+
return i;
44+
}
45+
}
46+
47+
return size;
2548
}
2649
} // namespace ada::unicode
2750

0 commit comments

Comments
 (0)