Skip to content

Commit 63e2e88

Browse files
committed
cache splat
1 parent 15ea059 commit 63e2e88

File tree

2 files changed

+83
-82
lines changed

2 files changed

+83
-82
lines changed

crates/swc_ecma_fast_parser/src/lexer/mod.rs

Lines changed: 69 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,17 @@ static TOKEN_DISPATCH: [TokenType; 128] = {
191191
table
192192
};
193193

194+
// Thread-local SIMD vectors for whitespace processing
195+
thread_local! {
196+
static SPACE_VEC: u8x16 = u8x16::splat(b' ');
197+
static TAB_VEC: u8x16 = u8x16::splat(b'\t');
198+
static NEWLINE_VEC: u8x16 = u8x16::splat(b'\n');
199+
static CARRIAGE_RETURN_VEC: u8x16 = u8x16::splat(b'\r');
200+
static FORM_FEED_VEC: u8x16 = u8x16::splat(0x0c); // Form feed
201+
static VERT_TAB_VEC: u8x16 = u8x16::splat(0x0b); // Vertical tab
202+
static SLASH_VEC: u8x16 = u8x16::splat(b'/'); // For detecting comments
203+
}
204+
194205
impl<'a> Lexer<'a> {
195206
/// Create a new lexer from a string input
196207
#[inline(always)]
@@ -502,100 +513,80 @@ impl<'a> Lexer<'a> {
502513
#[inline]
503514
fn process_whitespace_simd(&mut self) -> bool {
504515
// Need at least 16 bytes to use SIMD
505-
let rest_len = self.cursor.rest().len();
506-
if rest_len < 16 || self.cursor.position() + 16 > rest_len as u32 {
516+
if self.cursor.position() + 16 > self.cursor.rest().len() as u32 {
507517
return false;
508518
}
509519

510-
// Get current 16 bytes and load them directly into SIMD vector
511-
let input = self.cursor.rest();
512-
let data = unsafe {
513-
// SAFETY: We've checked that we have at least 16 bytes
514-
let mut bytes = [0u8; 16];
515-
std::ptr::copy_nonoverlapping(input.as_ptr(), bytes.as_mut_ptr(), 16);
516-
u8x16::new(bytes)
517-
};
518-
519-
// Handle special characters separately for better branch prediction
520-
let first_byte = unsafe { *input.get_unchecked(0) };
521-
522-
// Check for special cases that need individual handling
523-
match first_byte {
524-
b'\n' => {
525-
self.cursor.advance();
526-
self.had_line_break = LineBreak::Present;
527-
return true;
528-
}
529-
b'\r' => {
530-
self.cursor.advance();
531-
if let Some(b'\n') = self.cursor.peek() {
532-
self.cursor.advance();
533-
}
534-
self.had_line_break = LineBreak::Present;
535-
return true;
536-
}
537-
b'/' => {
538-
// Check if this could be a comment start
539-
if let Some(b'/') | Some(b'*') = self.cursor.peek_at(1) {
540-
return false; // Let the caller handle comments
541-
}
542-
return false; // Not a whitespace
543-
}
544-
0xe2 => {
545-
// Check for line separator (U+2028) and paragraph separator (U+2029)
546-
let bytes = self.cursor.peek_n(3);
547-
if bytes.len() == 3
548-
&& bytes[0] == 0xe2
549-
&& bytes[1] == 0x80
550-
&& (bytes[2] == 0xa8 || bytes[2] == 0xa9)
551-
{
552-
self.cursor.advance_n(3);
553-
self.had_line_break = LineBreak::Present;
554-
return true;
555-
}
556-
return false;
557-
}
558-
_ => {}
559-
}
520+
// Use thread-local SIMD vectors for common whitespace characters
521+
let space_vec = SPACE_VEC.with(|v| *v);
522+
let tab_vec = TAB_VEC.with(|v| *v);
523+
let newline_vec = NEWLINE_VEC.with(|v| *v);
524+
let carriage_return_vec = CARRIAGE_RETURN_VEC.with(|v| *v);
525+
let form_feed_vec = FORM_FEED_VEC.with(|v| *v);
526+
let vert_tab_vec = VERT_TAB_VEC.with(|v| *v);
527+
let slash_vec = SLASH_VEC.with(|v| *v);
560528

561-
// Create SIMD vectors for common whitespace characters
562-
let space_vec = u8x16::splat(b' ');
563-
let tab_vec = u8x16::splat(b'\t');
564-
let form_feed_vec = u8x16::splat(0x0c); // Form feed
565-
let vert_tab_vec = u8x16::splat(0x0b); // Vertical tab
529+
// Get current 16 bytes
530+
let input = self.cursor.rest();
531+
let mut data = [0u8; 16];
532+
data.copy_from_slice(unsafe { input.get_unchecked(0..16) });
533+
let chunk = u8x16::new(data);
566534

567-
// Fast path for regular whitespace (space, tab, form feed, vertical tab)
568535
// Compare with our whitespace vectors
569-
let is_space = data.cmp_eq(space_vec);
570-
let is_tab = data.cmp_eq(tab_vec);
571-
let is_ff = data.cmp_eq(form_feed_vec);
572-
let is_vt = data.cmp_eq(vert_tab_vec);
536+
let is_space = chunk.cmp_eq(space_vec);
537+
let is_tab = chunk.cmp_eq(tab_vec);
538+
let is_newline = chunk.cmp_eq(newline_vec);
539+
let is_cr = chunk.cmp_eq(carriage_return_vec);
540+
let is_ff = chunk.cmp_eq(form_feed_vec);
541+
let is_vt = chunk.cmp_eq(vert_tab_vec);
542+
let is_slash = chunk.cmp_eq(slash_vec);
573543

574544
// Combine masks for regular whitespace
575545
let is_basic_ws = is_space | is_tab | is_ff | is_vt;
576546

577-
// Convert SIMD mask to array to process consecutive whitespace
578-
let ws_array = is_basic_ws.to_array();
547+
// Convert masks to arrays
548+
let is_basic_ws_arr = is_basic_ws.to_array();
549+
let is_newline_arr = is_newline.to_array();
550+
let is_cr_arr = is_cr.to_array();
551+
let is_slash_arr = is_slash.to_array();
579552

580-
// If the first byte is whitespace, process consecutive whitespace
581-
if ws_array[0] != 0 {
582-
// Count consecutive whitespace characters
583-
let mut count = 0;
584-
for ws_char in ws_array {
585-
if ws_char == 0 {
586-
break;
587-
}
588-
count += 1;
553+
// Check the first byte only - we'll process one character at a time
554+
// This is more efficient than trying to process the entire chunk at once
555+
// when we need to handle special cases like CR+LF and comments
556+
557+
if unsafe { *is_basic_ws_arr.get_unchecked(0) } != 0 {
558+
// Regular whitespace - just advance
559+
self.cursor.advance();
560+
return true;
561+
}
562+
563+
if unsafe { *is_newline_arr.get_unchecked(0) } != 0 {
564+
// Newline - need to set had_line_break
565+
self.cursor.advance();
566+
self.had_line_break = LineBreak::Present;
567+
return true;
568+
}
569+
570+
if unsafe { *is_cr_arr.get_unchecked(0) } != 0 {
571+
// Carriage return - need to check for CRLF sequence
572+
self.cursor.advance();
573+
if let Some(b'\n') = self.cursor.peek() {
574+
self.cursor.advance();
589575
}
576+
self.had_line_break = LineBreak::Present;
577+
return true;
578+
}
590579

591-
// Skip all consecutive basic whitespace characters at once
592-
if count > 0 {
593-
self.cursor.advance_n(count);
594-
return true;
580+
if unsafe { *is_slash_arr.get_unchecked(0) } != 0 {
581+
// Potential comment - need to check next character
582+
if let Some(b'/') | Some(b'*') = self.cursor.peek_at(1) {
583+
return false; // Let the caller handle comments
595584
}
585+
// Not a comment, just a slash
586+
return false;
596587
}
597588

598-
// No whitespace found
589+
// Not whitespace or a special character
599590
false
600591
}
601592

crates/swc_ecma_fast_parser/src/lexer/string.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ use crate::{
1212
token::{Token, TokenType, TokenValue},
1313
};
1414

15+
// Thread-local SIMD vectors for string processing
16+
thread_local! {
17+
static BACKSLASH_VEC: u8x16 = u8x16::splat(b'\\');
18+
static NEWLINE_VEC: u8x16 = u8x16::splat(b'\n');
19+
static CARRIAGE_VEC: u8x16 = u8x16::splat(b'\r');
20+
}
21+
1522
// Pre-computed lookup table for escape sequences
1623
static ESCAPE_LOOKUP: [u8; 128] = {
1724
let mut table = [0u8; 128];
@@ -228,11 +235,14 @@ impl Lexer<'_> {
228235
bytes.copy_from_slice(chunk_bytes);
229236
let chunk = u8x16::new(bytes);
230237

231-
// Create vectors for quick comparison
238+
// Create quote vector (needs to be created each time as it depends on
239+
// parameter)
232240
let quote_vec = u8x16::splat(quote);
233-
let backslash_vec = u8x16::splat(b'\\');
234-
let newline_vec = u8x16::splat(b'\n');
235-
let carriage_vec = u8x16::splat(b'\r');
241+
242+
// Use thread-local vectors for constants
243+
let backslash_vec = BACKSLASH_VEC.with(|v| *v);
244+
let newline_vec = NEWLINE_VEC.with(|v| *v);
245+
let carriage_vec = CARRIAGE_VEC.with(|v| *v);
236246

237247
// Check for presence of special characters
238248
let quote_mask = chunk.cmp_eq(quote_vec);

0 commit comments

Comments
 (0)