From d32e6201b6d227cc57b04ae329d23e787390cd85 Mon Sep 17 00:00:00 2001
From: Alexandre <gourdel.alexandre@gmail.com>
Date: Mon, 24 Nov 2025 22:34:27 +0100
Subject: [PATCH] Add feature

---
 Cargo.toml                 |   1 +
 INDEX_BINARY_FORMAT.md     | 111 +++++
 src/error.rs               |   2 +
 src/index.rs               | 240 +++++++++++
 src/lib.rs                 |   1 +
 src/python_bindings/mod.rs |  10 +
 tools/index_player.html    | 833 +++++++++++++++++++++++++++++++++++++
 7 files changed, 1198 insertions(+)
 create mode 100644 INDEX_BINARY_FORMAT.md
 create mode 100644 tools/index_player.html

diff --git a/Cargo.toml b/Cargo.toml
index 33df5205..add9c4c8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@ serde = {version = "1.0", features = ["derive"]}
 bincode = "2.0.1"
 rustc-hash = "2.1.0"
 regex-automata = "0.4.9"
+flate2 = "1.1.5"
 
 # Below are fragile dependencies, even minor updates of which often break the code
 [dependencies.hf-hub]
diff --git a/INDEX_BINARY_FORMAT.md b/INDEX_BINARY_FORMAT.md
new file mode 100644
index 00000000..cc2375ea
--- /dev/null
+++ b/INDEX_BINARY_FORMAT.md
@@ -0,0 +1,111 @@
+# Index Binary Format Specification
+
+This document describes the binary format used for serializing and deserializing the `Index` structure.
+
+## Overview
+
+The Index is saved as a compressed binary file using gzip compression. The uncompressed data follows a structured format with fixed-size fields for efficient storage and retrieval.
+
+## Binary Format Structure
+
+All multi-byte integers are stored in **little-endian** format.
+
+### Header Section
+
+| Offset | Size (bits) | Field | Description |
+|--------|-------------|-------|-------------|
+| 0 | 32 | vocab_size | Size of the vocabulary used to build the index |
+| 4 | 32 | eos_token_id | Token ID reserved for the end-of-sequence token |
+| 8 | 32 | initial_state_id | ID of the initial state in the automaton |
+| 12 | 32 | num_final_states | Number of final (accepting) states |
+
+### Final States Section
+
+Starting at offset 16, this section contains the IDs of all final states.
+
+| Size (bits) | Field | Description |
+|-------------|-------|-------------|
+| 32 × num_final_states | final_state_ids | Array of final state IDs |
+
+### Index Type
+
+| Size (bits) | Field | Description |
+|-------------|-------|-------------|
+| 8 | index_type | Type identifier for the index format (currently only type 1 is supported) |
+
+### Transitions Section (Type 1)
+
+The format of this section depends on the index type. For type 1:
+
+#### States Header
+
+| Size (bits) | Field | Description |
+|-------------|-------|-------------|
+| 32 | num_states | Number of states with transitions |
+
+#### For Each State
+
+For each of the `num_states` states:
+
+| Size (bits) | Field | Description |
+|-------------|-------|-------------|
+| 32 | state_id | ID of the current state |
+| 32 | num_transitions | Number of transitions from this state |
+
+#### For Each Transition
+
+For each of the `num_transitions` transitions in a state:
+
+| Size (bits) | Field | Description |
+|-------------|-------|-------------|
+| 32 | token_id | Token ID that triggers this transition |
+| 32 | next_state_id | Destination state ID for this transition |
+
+## Compression
+
+The entire binary structure described above is compressed using gzip compression (flate2) with default compression level before being written to disk.
+
+## Example Layout
+
+```
+┌─────────────────────────────────────────────────────────┐
+│ Compressed File (gzip)                                  │
+│ ┌─────────────────────────────────────────────────────┐ │
+│ │ Uncompressed Binary Data                            │ │
+│ │ ┌───────────────────────────────────────────────┐   │ │
+│ │ │ Header (16 bytes)                             │   │ │
+│ │ │ - vocab_size (4 bytes)                        │   │ │
+│ │ │ - eos_token_id (4 bytes)                      │   │ │
+│ │ │ - initial_state_id (4 bytes)                  │   │ │
+│ │ │ - num_final_states (4 bytes)                  │   │ │
+│ │ └───────────────────────────────────────────────┘   │ │
+│ │ ┌───────────────────────────────────────────────┐   │ │
+│ │ │ Final States (4 bytes × num_final_states)     │   │ │
+│ │ └───────────────────────────────────────────────┘   │ │
+│ │ ┌───────────────────────────────────────────────┐   │ │
+│ │ │ Index Type (1 byte)                           │   │ │
+│ │ └───────────────────────────────────────────────┘   │ │
+│ │ ┌───────────────────────────────────────────────┐   │ │
+│ │ │ Transitions Section                           │   │ │
+│ │ │ - num_states (4 bytes)                        │   │ │
+│ │ │ - For each state:                             │   │ │
+│ │ │   - state_id (4 bytes)                        │   │ │
+│ │ │   - num_transitions (4 bytes)                 │   │ │
+│ │ │   - For each transition:                      │   │ │
+│ │ │     - token_id (4 bytes)                      │   │ │
+│ │ │     - next_state_id (4 bytes)                 │   │ │
+│ │ └───────────────────────────────────────────────┘   │ │
+│ └─────────────────────────────────────────────────────┘ │
+└─────────────────────────────────────────────────────────┘
+```
+
+## Version History
+
+- **Type 1**: Initial format supporting basic state transitions with token-to-state mappings.
+
+## Future Extensions
+
+The index type field allows for future extensions of the format. New index types can be added to support:
+- Optimized storage formats for sparse or dense transition tables
+- Compressed transition representations
+- Alternative state machine encodings
diff --git a/src/error.rs b/src/error.rs
index e0d0eb7b..991d91bc 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -75,6 +75,8 @@ pub enum Error {
         error_state: u32,
         missing_tokens: Vec<String>,
     },
+    #[error("IO error: {0}")]
+    IOError(String),
 }
 
 impl Error {
diff --git a/src/index.rs b/src/index.rs
index 04ca41ca..aea675d4 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -193,6 +193,142 @@ impl Index {
         })
     }
 
+    pub fn save<P: AsRef<std::path::Path>>(&self, path: P) -> Result<()> {
+        use std::io::Write;
+        let mut buffer = Vec::new();
+        
+        // Write vocab_size (32 bits)
+        buffer.extend_from_slice(&(self.vocab_size as u32).to_le_bytes());
+        
+        // Write eos_token_id (32 bits)
+        buffer.extend_from_slice(&self.eos_token_id.to_le_bytes());
+        
+        // Write initial_state_id (32 bits)
+        buffer.extend_from_slice(&self.initial_state.to_le_bytes());
+        
+        // Write number of final states (32 bits)
+        buffer.extend_from_slice(&(self.final_states.len() as u32).to_le_bytes());
+        
+        // Write final states (32 bits each)
+        for &final_state in &self.final_states {
+            buffer.extend_from_slice(&final_state.to_le_bytes());
+        }
+        
+        // Write index type (8 bits) - Type 1 for now
+        buffer.push(1u8);
+        
+        // Write number of states with transitions (32 bits)
+        buffer.extend_from_slice(&(self.transitions.len() as u32).to_le_bytes());
+        
+        // Write transitions for each state
+        for (&state_id, transitions_map) in &self.transitions {
+            // Write state ID (32 bits)
+            buffer.extend_from_slice(&state_id.to_le_bytes());
+            
+            // Write number of transitions (32 bits)
+            buffer.extend_from_slice(&(transitions_map.len() as u32).to_le_bytes());
+            
+            // Write each transition (TokenId -> StateId)
+            for (&token_id, &next_state_id) in transitions_map {
+                buffer.extend_from_slice(&token_id.to_le_bytes());
+                buffer.extend_from_slice(&next_state_id.to_le_bytes());
+            }
+        }
+        
+        // Write compressed data to file
+        let compressed = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
+        let mut encoder = compressed;
+        encoder.write_all(&buffer).map_err(|e| Error::IOError(e.to_string()))?;
+        let compressed_data = encoder.finish().map_err(|e| Error::IOError(e.to_string()))?;
+        
+        std::fs::write(path, compressed_data).map_err(|e| Error::IOError(e.to_string()))?;
+        
+        Ok(())
+    }
+
+    pub fn load<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
+        use std::io::Read;
+        
+        // Read and decompress file
+        let compressed_data = std::fs::read(path).map_err(|e| Error::IOError(e.to_string()))?;
+        let mut decoder = flate2::read::GzDecoder::new(&compressed_data[..]);
+        let mut buffer = Vec::new();
+        decoder.read_to_end(&mut buffer).map_err(|e| Error::IOError(e.to_string()))?;
+        
+        let mut cursor = 0;
+        
+        // Helper to read u32
+        let read_u32 = |buf: &[u8], pos: &mut usize| -> Result<u32> {
+            if *pos + 4 > buf.len() {
+                return Err(Error::IOError("Unexpected end of buffer".to_string()));
+            }
+            let value = u32::from_le_bytes([buf[*pos], buf[*pos + 1], buf[*pos + 2], buf[*pos + 3]]);
+            *pos += 4;
+            Ok(value)
+        };
+        
+        // Read vocab_size (32 bits)
+        let vocab_size = read_u32(&buffer, &mut cursor)? as usize;
+        
+        // Read eos_token_id (32 bits)
+        let eos_token_id = read_u32(&buffer, &mut cursor)?;
+        
+        // Read initial_state_id (32 bits)
+        let initial_state = read_u32(&buffer, &mut cursor)?;
+        
+        // Read number of final states (32 bits)
+        let num_final_states = read_u32(&buffer, &mut cursor)? as usize;
+        
+        // Read final states
+        let mut final_states = HashSet::default();
+        for _ in 0..num_final_states {
+            let final_state = read_u32(&buffer, &mut cursor)?;
+            final_states.insert(final_state);
+        }
+        
+        // Read index type (8 bits)
+        if cursor >= buffer.len() {
+            return Err(Error::IOError("Unexpected end of buffer".to_string()));
+        }
+        let index_type = buffer[cursor];
+        cursor += 1;
+        
+        if index_type != 1 {
+            return Err(Error::IOError(format!("Unsupported index type: {}", index_type)));
+        }
+        
+        // Read number of states with transitions (32 bits)
+        let num_states = read_u32(&buffer, &mut cursor)? as usize;
+        
+        // Read transitions
+        let mut transitions: HashMap<StateId, HashMap<TokenId, StateId>> = HashMap::default();
+        for _ in 0..num_states {
+            // Read state ID (32 bits)
+            let state_id = read_u32(&buffer, &mut cursor)?;
+            
+            // Read number of transitions (32 bits)
+            let num_transitions = read_u32(&buffer, &mut cursor)? as usize;
+            
+            // Read each transition
+            let mut state_transitions = HashMap::default();
+            for _ in 0..num_transitions {
+                let token_id = read_u32(&buffer, &mut cursor)?;
+                let next_state_id = read_u32(&buffer, &mut cursor)?;
+                state_transitions.insert(token_id, next_state_id);
+            }
+            
+            transitions.insert(state_id, state_transitions);
+        }
+        
+        Ok(Self {
+            initial_state,
+            final_states,
+            transitions,
+            eos_token_id,
+            vocab_size,
+        })
+    }
+
     /// Returns the ID of the initial state in the automaton.
     pub fn initial_state(&self) -> StateId {
         self.initial_state
@@ -391,4 +527,108 @@ mod tests {
             panic!("Expected IncompatibleVocabulary error");
         }
     }
+
+    #[test]
+    fn test_save_and_load() {
+        let regex = "0|[1-9][0-9]*";
+        let eos_token_id = 4;
+        let mut vocabulary = Vocabulary::new(eos_token_id);
+        for (token, token_id) in [("blah", 0), ("1a", 1), ("2", 2), ("0", 3)] {
+            vocabulary
+                .try_insert(token, token_id as u32)
+                .expect("Insert failed");
+        }
+        
+        let original_index = Index::new(regex, &vocabulary).expect("Index failed");
+        
+        // Save to temporary file
+        let temp_path = std::env::temp_dir().join("test_index.bin");
+        original_index.save(&temp_path).expect("Save failed");
+        
+        // Load from file
+        let loaded_index = Index::load(&temp_path).expect("Load failed");
+        
+        // Cleanup
+        std::fs::remove_file(&temp_path).ok();
+        
+        // Verify equality
+        assert_eq!(original_index, loaded_index);
+        assert_eq!(original_index.initial_state(), loaded_index.initial_state());
+        assert_eq!(original_index.final_states(), loaded_index.final_states());
+        assert_eq!(original_index.transitions(), loaded_index.transitions());
+        assert_eq!(original_index.vocab_size(), loaded_index.vocab_size());
+    }
+
+    #[test]
+    fn test_save_and_load_multibyte() {
+        let regex = "😇| [😈-😍][😇-😎]*";
+        let mut vocabulary = Vocabulary::new(8);
+        for (token, token_id) in [(" 😍", 5), ("blah", 0), ("😇", 2), ("😈a", 1), ("😍", 3)] {
+            vocabulary
+                .try_insert(token, token_id as u32)
+                .expect("Insert failed");
+        }
+        for (token, token_id) in [
+            (vec![32, 240, 159, 152, 136], 7),
+            (vec![32, 240, 159, 152, 141], 6),
+            (vec![240, 159, 152, 141], 4),
+        ] {
+            vocabulary
+                .try_insert(token, token_id as u32)
+                .expect("Insert failed");
+        }
+
+        let original_index = Index::new(regex, &vocabulary).expect("Index failed");
+        
+        let temp_path = std::env::temp_dir().join("test_index_multibyte.bin");
+        original_index.save(&temp_path).expect("Save failed");
+        let loaded_index = Index::load(&temp_path).expect("Load failed");
+        std::fs::remove_file(&temp_path).ok();
+        
+        assert_eq!(original_index, loaded_index);
+    }
+
+    #[test]
+    fn test_load_nonexistent_file() {
+        let result = Index::load("/nonexistent/path/index.bin");
+        assert!(result.is_err());
+        assert!(matches!(result, Err(Error::IOError(_))));
+    }
+
+    #[test]
+    fn test_load_corrupted_file() {
+        let temp_path = std::env::temp_dir().join("test_corrupted.bin");
+        std::fs::write(&temp_path, b"corrupted data").expect("Write failed");
+        
+        let result = Index::load(&temp_path);
+        std::fs::remove_file(&temp_path).ok();
+        
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_save_preserves_file_size() {
+        let regex = "0|[1-9][0-9]*";
+        let mut vocabulary = Vocabulary::new(4);
+        for (token, token_id) in [("blah", 0), ("1a", 1), ("2", 2), ("0", 3)] {
+            vocabulary
+                .try_insert(token, token_id as u32)
+                .expect("Insert failed");
+        }
+        
+        let index = Index::new(regex, &vocabulary).expect("Index failed");
+        let temp_path = std::env::temp_dir().join("test_size.bin");
+        
+        index.save(&temp_path).expect("Save failed");
+        let metadata = std::fs::metadata(&temp_path).expect("Metadata failed");
+        
+        // File should exist and be non-empty
+        assert!(metadata.len() > 0);
+        
+        // Gzip compression should make it smaller than raw data
+        // Rough estimate: at least 5 * 4 bytes for basic fields + transitions
+        assert!(metadata.len() < 10000); // Should be much smaller for this simple case
+        
+        std::fs::remove_file(&temp_path).ok();
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 4b331791..2bf95ea7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -87,6 +87,7 @@ pub mod prelude;
 pub mod primitives;
 pub mod vocabulary;
 
+
 pub use error::{Error, Result};
 
 #[cfg(feature = "python-bindings")]
diff --git a/src/python_bindings/mod.rs b/src/python_bindings/mod.rs
index f83a454d..38af7737 100644
--- a/src/python_bindings/mod.rs
+++ b/src/python_bindings/mod.rs
@@ -279,6 +279,16 @@ impl PyIndex {
         self.0.initial_state()
     }
 
+    fn save(&self, path: String) -> PyResult<()> {
+        self.0.save(path).map_err(Into::into)
+    }
+    #[staticmethod]
+    fn load(path: String) -> PyResult<Self> {
+        Index::load(path)
+            .map(|x| PyIndex(Arc::new(x)))
+            .map_err(Into::into)
+    }
+
     /// Gets the debug string representation of the index.
     fn __repr__(&self) -> String {
         format!("{:#?}", self.0)
diff --git a/tools/index_player.html b/tools/index_player.html
new file mode 100644
index 00000000..324eb7e7
--- /dev/null
+++ b/tools/index_player.html
@@ -0,0 +1,833 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Index Player - FSM Explorer</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            padding: 20px;
+        }
+
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            background: white;
+            border-radius: 12px;
+            box-shadow: 0 10px 40px rgba(0, 0, 0, 0.2);
+            overflow: hidden;
+        }
+
+        .header {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 30px;
+            text-align: center;
+        }
+
+        .header h1 {
+            font-size: 2.5em;
+            margin-bottom: 10px;
+        }
+
+        .header p {
+            opacity: 0.9;
+            font-size: 1.1em;
+        }
+
+        .content {
+            padding: 30px;
+        }
+
+        .file-loader {
+            display: flex;
+            gap: 20px;
+            margin-bottom: 30px;
+            flex-wrap: wrap;
+        }
+
+        .file-input-group {
+            flex: 1;
+            min-width: 250px;
+        }
+
+        .file-input-group label {
+            display: block;
+            font-weight: 600;
+            margin-bottom: 8px;
+            color: #333;
+        }
+
+        .file-input-wrapper {
+            position: relative;
+            display: flex;
+            gap: 10px;
+        }
+
+        input[type="file"] {
+            display: none;
+        }
+
+        .file-button {
+            flex: 1;
+            padding: 12px 20px;
+            background: #667eea;
+            color: white;
+            border: none;
+            border-radius: 6px;
+            cursor: pointer;
+            font-size: 1em;
+            transition: background 0.3s;
+        }
+
+        .file-button:hover {
+            background: #5568d3;
+        }
+
+        .file-button.loaded {
+            background: #48bb78;
+        }
+
+        .file-status {
+            font-size: 0.9em;
+            color: #666;
+            margin-top: 5px;
+        }
+
+        .index-info {
+            background: #f7fafc;
+            border-radius: 8px;
+            padding: 15px;
+            margin-bottom: 20px;
+            display: none;
+        }
+
+        .index-info.visible {
+            display: block;
+        }
+
+        .index-info h2 {
+            color: #667eea;
+            margin-bottom: 10px;
+            font-size: 1.1em;
+        }
+
+        .info-grid {
+            display: flex;
+            gap: 10px;
+            flex-wrap: wrap;
+            align-items: center;
+        }
+
+        .info-item {
+            background: white;
+            padding: 8px 12px;
+            border-radius: 6px;
+            border-left: 3px solid #667eea;
+            display: flex;
+            gap: 8px;
+            align-items: baseline;
+        }
+
+        .info-item label {
+            font-size: 0.75em;
+            color: #666;
+            text-transform: uppercase;
+            letter-spacing: 0.3px;
+            white-space: nowrap;
+        }
+
+        .info-item .value {
+            font-size: 1em;
+            font-weight: 600;
+            color: #333;
+        }
+
+        .explorer {
+            display: none;
+        }
+
+        .explorer.visible {
+            display: block;
+        }
+
+        .current-state {
+            background: #edf2f7;
+            padding: 20px;
+            border-radius: 8px;
+            margin-bottom: 20px;
+        }
+
+        .current-state h3 {
+            color: #667eea;
+            margin-bottom: 10px;
+            font-size: 1.3em;
+        }
+
+        .state-info {
+            display: flex;
+            gap: 20px;
+            align-items: center;
+            flex-wrap: wrap;
+        }
+
+        .state-badge {
+            display: inline-block;
+            padding: 8px 16px;
+            background: #667eea;
+            color: white;
+            border-radius: 20px;
+            font-weight: 600;
+            font-size: 1.1em;
+        }
+
+        .state-badge.final {
+            background: #48bb78;
+        }
+
+        .path-display {
+            background: white;
+            padding: 15px;
+            border-radius: 6px;
+            margin-top: 15px;
+            border: 2px solid #e2e8f0;
+        }
+
+        .path-display h4 {
+            color: #666;
+            margin-bottom: 10px;
+            font-size: 0.9em;
+        }
+
+        .path-tokens {
+            display: flex;
+            gap: 8px;
+            flex-wrap: wrap;
+        }
+
+        .path-token {
+            background: #667eea;
+            color: white;
+            padding: 6px 12px;
+            border-radius: 4px;
+            font-size: 0.9em;
+        }
+
+        .concatenated-result {
+            margin-top: 12px;
+            padding: 12px;
+            background: #fff;
+            border: 2px solid #48bb78;
+            border-radius: 6px;
+        }
+
+        .concatenated-result h4 {
+            color: #48bb78;
+            margin-bottom: 8px;
+            font-size: 0.85em;
+            font-weight: 600;
+        }
+
+        .concatenated-text {
+            font-size: 1.1em;
+            color: #2d3748;
+            font-weight: 500;
+            word-break: break-all;
+            line-height: 1.5;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            background-clip: text;
+        }
+
+        .transitions-section {
+            margin-top: 20px;
+        }
+
+        .transitions-section h3 {
+            color: #333;
+            margin-bottom: 15px;
+            font-size: 1.2em;
+        }
+
+        .transition-input {
+            display: flex;
+            gap: 10px;
+            margin-bottom: 20px;
+        }
+
+        .transition-input input {
+            flex: 1;
+            padding: 12px;
+            border: 2px solid #e2e8f0;
+            border-radius: 6px;
+            font-size: 1em;
+        }
+
+        .transition-input input:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+
+        .transition-input button {
+            padding: 12px 24px;
+            background: #667eea;
+            color: white;
+            border: none;
+            border-radius: 6px;
+            cursor: pointer;
+            font-size: 1em;
+            transition: background 0.3s;
+        }
+
+        .transition-input button:hover {
+            background: #5568d3;
+        }
+
+        .transition-input button:disabled {
+            background: #cbd5e0;
+            cursor: not-allowed;
+        }
+
+        .transitions-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
+            gap: 10px;
+            max-height: 400px;
+            overflow-y: auto;
+            padding: 10px;
+            background: #f7fafc;
+            border-radius: 6px;
+        }
+
+        .transition-card {
+            background: white;
+            padding: 12px;
+            border-radius: 6px;
+            border: 2px solid #e2e8f0;
+            cursor: pointer;
+            transition: all 0.3s;
+        }
+
+        .transition-card:hover {
+            border-color: #667eea;
+            transform: translateY(-2px);
+            box-shadow: 0 4px 12px rgba(102, 126, 234, 0.2);
+        }
+
+        .transition-card .token-id {
+            font-weight: 600;
+            color: #667eea;
+            margin-bottom: 4px;
+        }
+
+        .transition-card .token-value {
+            font-size: 0.85em;
+            color: #999;
+            word-break: break-all;
+        }
+
+        .transition-card .token-value-text {
+            font-size: 1.1em;
+            color: #667eea;
+            font-weight: 500;
+        }
+
+        .transition-card .next-state {
+            margin-top: 8px;
+            padding-top: 8px;
+            border-top: 1px solid #e2e8f0;
+            font-size: 0.85em;
+            color: #999;
+        }
+
+        .controls {
+            margin-top: 20px;
+            display: flex;
+            gap: 10px;
+        }
+
+        .controls button {
+            padding: 10px 20px;
+            background: #718096;
+            color: white;
+            border: none;
+            border-radius: 6px;
+            cursor: pointer;
+            font-size: 0.95em;
+            transition: background 0.3s;
+        }
+
+        .controls button:hover {
+            background: #4a5568;
+        }
+
+        .error-message {
+            background: #fed7d7;
+            color: #c53030;
+            padding: 12px;
+            border-radius: 6px;
+            margin-top: 10px;
+            display: none;
+        }
+
+        .error-message.visible {
+            display: block;
+        }
+
+        .empty-state {
+            text-align: center;
+            padding: 60px 20px;
+            color: #666;
+        }
+
+        .empty-state svg {
+            width: 100px;
+            height: 100px;
+            margin-bottom: 20px;
+            opacity: 0.3;
+        }
+
+        .empty-state h3 {
+            font-size: 1.5em;
+            margin-bottom: 10px;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>🎮 Index Player</h1>
+            <p>Explore and debug your FSM Index files</p>
+        </div>
+
+        <div class="content">
+            <div class="file-loader">
+                <div class="file-input-group">
+                    <label>Index File (Binary)</label>
+                    <div class="file-input-wrapper">
+                        <input type="file" id="indexFile" accept=".bin,.gz">
+                        <button class="file-button" onclick="document.getElementById('indexFile').click()">
+                            Load Index
+                        </button>
+                    </div>
+                    <div class="file-status" id="indexStatus">No file loaded</div>
+                </div>
+
+                <div class="file-input-group">
+                    <label>Vocabulary (JSON) - Optional</label>
+                    <div class="file-input-wrapper">
+                        <input type="file" id="vocabFile" accept=".json">
+                        <button class="file-button" onclick="document.getElementById('vocabFile').click()">
+                            Load Vocabulary
+                        </button>
+                    </div>
+                    <div class="file-status" id="vocabStatus">No vocabulary loaded</div>
+                </div>
+            </div>
+
+            <div class="index-info" id="indexInfo">
+                <h2>📊 Index Information</h2>
+                <div class="info-grid">
+                    <div class="info-item">
+                        <label>Vocabulary Size</label>
+                        <div class="value" id="vocabSize">-</div>
+                    </div>
+                    <div class="info-item">
+                        <label>EOS Token ID</label>
+                        <div class="value" id="eosTokenId">-</div>
+                    </div>
+                    <div class="info-item">
+                        <label>Initial State</label>
+                        <div class="value" id="initialStateId">-</div>
+                    </div>
+                    <div class="info-item">
+                        <label>Final States Count</label>
+                        <div class="value" id="finalStatesCount">-</div>
+                    </div>
+                    <div class="info-item">
+                        <label>Total States</label>
+                        <div class="value" id="totalStates">-</div>
+                    </div>
+                    <div class="info-item">
+                        <label>Index Type</label>
+                        <div class="value" id="indexType">-</div>
+                    </div>
+                </div>
+            </div>
+
+            <div class="explorer" id="explorer">
+                <div class="current-state">
+                    <h3>Current State</h3>
+                    <div class="state-info">
+                        <span class="state-badge" id="currentStateBadge">State: -</span>
+                    </div>
+                    
+                    <div class="path-display">
+                        <h4>Path Taken (Token IDs)</h4>
+                        <div class="path-tokens" id="pathTokens">
+                            <span style="color: #999;">Start from initial state</span>
+                        </div>
+                        <div class="concatenated-result" id="concatenatedResult" style="display: none;">
+                            <h4>✨ Generated Text</h4>
+                            <div class="concatenated-text" id="concatenatedText"></div>
+                        </div>
+                    </div>
+                </div>
+
+                <div class="transitions-section">
+                    <h3>Available Transitions</h3>
+                    
+                    <div class="transition-input">
+                        <input type="text" id="tokenInput" placeholder="Enter token ID or select below...">
+                        <button onclick="applyTransition()">Apply Transition</button>
+                    </div>
+
+                    <div class="error-message" id="errorMessage"></div>
+
+                    <div class="transitions-grid" id="transitionsGrid">
+                    </div>
+                </div>
+
+                <div class="controls">
+                    <button onclick="resetToInitial()">Reset to Initial State</button>
+                    <button onclick="goBackOneStep()">← Go Back One Step</button>
+                </div>
+            </div>
+
+            <div class="empty-state" id="emptyState">
+                <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 21h10a2 2 0 002-2V9.414a1 1 0 00-.293-.707l-5.414-5.414A1 1 0 0012.586 3H7a2 2 0 00-2 2v14a2 2 0 002 2z"></path>
+                </svg>
+                <h3>No Index Loaded</h3>
+                <p>Load an index file to start exploring the FSM</p>
+            </div>
+        </div>
+    </div>
+
+    <script>
+        let indexData = null;
+        let vocabulary = null;
+        let currentState = null;
+        let pathHistory = [];
+
+        // Load Index File
+        document.getElementById('indexFile').addEventListener('change', async (e) => {
+            const file = e.target.files[0];
+            if (!file) return;
+
+            try {
+                const arrayBuffer = await file.arrayBuffer();
+                indexData = await parseIndexFile(arrayBuffer);
+                
+                document.getElementById('indexStatus').textContent = `✓ ${file.name}`;
+                document.querySelector('#indexFile + .file-button').classList.add('loaded');
+                
+                displayIndexInfo();
+                resetToInitial();
+                
+                document.getElementById('emptyState').style.display = 'none';
+                document.getElementById('indexInfo').classList.add('visible');
+                document.getElementById('explorer').classList.add('visible');
+            } catch (error) {
+                alert('Error loading index file: ' + error.message);
+                console.error(error);
+            }
+        });
+
+        // Load Vocabulary File
+        document.getElementById('vocabFile').addEventListener('change', async (e) => {
+            const file = e.target.files[0];
+            if (!file) return;
+
+            try {
+                const text = await file.text();
+                const rawVocab = JSON.parse(text);
+                
+                // Create vocabulary object with reverse mapping
+                vocabulary = { reverse: {} };
+                
+                for (const [token, id] of Object.entries(rawVocab)) {
+                    // Decode token properly - HuggingFace vocab may have escape sequences
+                    let decodedToken = token;
+                    try {
+                        // Try to interpret as UTF-8 byte sequence if it looks encoded
+                        // HuggingFace tokenizers often use Ġ prefix and special encoding
+                        decodedToken = token
+                            .replace(/Ġ/g, ' ')  // Ġ represents space in some tokenizers
+                            .replace(/Ċ/g, '\n') // Ċ represents newline
+                            .replace(/ĉ/g, '\t'); // ĉ represents tab
+                    } catch (e) {
+                        decodedToken = token;
+                    }
+                    vocabulary.reverse[id] = decodedToken;
+                }
+                
+                document.getElementById('vocabStatus').textContent = `✓ ${file.name}`;
+                document.querySelector('#vocabFile + .file-button').classList.add('loaded');
+                
+                // Refresh display if index is loaded
+                if (indexData) {
+                    updateCurrentStateDisplay();
+                    displayTransitions();
+                }
+            } catch (error) {
+                alert('Error loading vocabulary file: ' + error.message);
+                console.error(error);
+            }
+        });
+
+        async function parseIndexFile(arrayBuffer) {
+            // Decompress gzip
+            const decompressed = await decompressGzip(arrayBuffer);
+            const view = new DataView(decompressed);
+            let offset = 0;
+
+            const readU32 = () => {
+                const value = view.getUint32(offset, true); // little-endian
+                offset += 4;
+                return value;
+            };
+
+            const readU8 = () => {
+                const value = view.getUint8(offset);
+                offset += 1;
+                return value;
+            };
+
+            // Read header
+            const vocabSize = readU32();
+            const eosTokenId = readU32();
+            const initialStateId = readU32();
+            const numFinalStates = readU32();
+
+            // Read final states
+            const finalStates = [];
+            for (let i = 0; i < numFinalStates; i++) {
+                finalStates.push(readU32());
+            }
+
+            // Read index type
+            const indexType = readU8();
+
+            if (indexType !== 1) {
+                throw new Error(`Unsupported index type: ${indexType}`);
+            }
+
+            // Read transitions
+            const numStates = readU32();
+            const transitions = {};
+
+            for (let i = 0; i < numStates; i++) {
+                const stateId = readU32();
+                const numTransitions = readU32();
+                
+                transitions[stateId] = {};
+                
+                for (let j = 0; j < numTransitions; j++) {
+                    const tokenId = readU32();
+                    const nextStateId = readU32();
+                    transitions[stateId][tokenId] = nextStateId;
+                }
+            }
+
+            return {
+                vocabSize,
+                eosTokenId,
+                initialStateId,
+                finalStates,
+                indexType,
+                transitions,
+                numStates
+            };
+        }
+
+        async function decompressGzip(arrayBuffer) {
+            const ds = new DecompressionStream('gzip');
+            const writer = ds.writable.getWriter();
+            writer.write(new Uint8Array(arrayBuffer));
+            writer.close();
+
+            const output = [];
+            const reader = ds.readable.getReader();
+            
+            while (true) {
+                const { done, value } = await reader.read();
+                if (done) break;
+                output.push(value);
+            }
+
+            const totalLength = output.reduce((acc, arr) => acc + arr.length, 0);
+            const result = new Uint8Array(totalLength);
+            let offset = 0;
+            for (const arr of output) {
+                result.set(arr, offset);
+                offset += arr.length;
+            }
+
+            return result.buffer;
+        }
+
+        function displayIndexInfo() {
+            document.getElementById('vocabSize').textContent = indexData.vocabSize;
+            document.getElementById('eosTokenId').textContent = indexData.eosTokenId;
+            document.getElementById('initialStateId').textContent = indexData.initialStateId;
+            document.getElementById('finalStatesCount').textContent = indexData.finalStates.length;
+            document.getElementById('totalStates').textContent = indexData.numStates;
+            document.getElementById('indexType').textContent = indexData.indexType;
+        }
+
+        function resetToInitial() {
+            if (!indexData) return;
+            currentState = indexData.initialStateId;
+            pathHistory = [];
+            updateCurrentStateDisplay();
+            displayTransitions();
+        }
+
+        function updateCurrentStateDisplay() {
+            const badge = document.getElementById('currentStateBadge');
+            badge.textContent = `State: ${currentState}`;
+            
+            if (indexData.finalStates.includes(currentState)) {
+                badge.classList.add('final');
+                badge.textContent += ' (FINAL)';
+            } else {
+                badge.classList.remove('final');
+            }
+
+            // Update path display
+            const pathContainer = document.getElementById('pathTokens');
+            if (pathHistory.length === 0) {
+                pathContainer.innerHTML = '<span style="color: #999;">Start from initial state</span>';
+            } else {
+                pathContainer.innerHTML = pathHistory.map(tokenId => {
+                    const tokenValue = vocabulary?.reverse[tokenId];
+                    return `<span class="path-token">${tokenId}${tokenValue ? ': "' + escapeHtml(tokenValue) + '"' : ''}</span>`;
+                }).join('');
+            }
+
+            // Update concatenated result
+            const resultContainer = document.getElementById('concatenatedResult');
+            if (pathHistory.length === 0 || !vocabulary) {
+                resultContainer.style.display = 'none';
+            } else {
+                resultContainer.style.display = 'block';
+                const concatenated = pathHistory.map(tokenId => vocabulary.reverse[tokenId] || '').join('');
+                document.getElementById('concatenatedText').textContent = concatenated;
+            }
+        }
+
+        function displayTransitions() {
+            const grid = document.getElementById('transitionsGrid');
+            const stateTransitions = indexData.transitions[currentState];
+
+            if (!stateTransitions || Object.keys(stateTransitions).length === 0) {
+                grid.innerHTML = '<div style="grid-column: 1/-1; text-align: center; color: #999; padding: 20px;">No transitions available from this state</div>';
+                return;
+            }
+
+            grid.innerHTML = '';
+            
+            for (const [tokenId, nextStateId] of Object.entries(stateTransitions)) {
+                const card = document.createElement('div');
+                card.className = 'transition-card';
+                card.onclick = () => selectTransition(tokenId);
+
+                const tokenValue = vocabulary?.reverse[tokenId];
+                
+                card.innerHTML = `
+                    <div class="token-id">Token: ${tokenId}</div>
+                    ${tokenValue ? `<div class="token-value">"<span class="token-value-text">${escapeHtml(tokenValue)}</span>"</div>` : ''}
+                    <div class="next-state">→ State: ${nextStateId}${indexData.finalStates.includes(parseInt(nextStateId)) ? ' (FINAL)' : ''}</div>
+                `;
+
+                grid.appendChild(card);
+            }
+        }
+
+        function selectTransition(tokenId) {
+            document.getElementById('tokenInput').value = tokenId;
+            applyTransition();
+        }
+
+        function applyTransition() {
+            const tokenInput = document.getElementById('tokenInput');
+            const tokenId = parseInt(tokenInput.value);
+            const errorMsg = document.getElementById('errorMessage');
+
+            if (isNaN(tokenId)) {
+                showError('Please enter a valid token ID');
+                return;
+            }
+
+            const stateTransitions = indexData.transitions[currentState];
+            
+            if (!stateTransitions || !stateTransitions[tokenId]) {
+                showError(`No transition found for token ${tokenId} from state ${currentState}`);
+                return;
+            }
+
+            // Apply transition
+            pathHistory.push(tokenId);
+            currentState = stateTransitions[tokenId];
+            
+            // Clear input and error
+            tokenInput.value = '';
+            errorMsg.classList.remove('visible');
+
+            // Update display
+            updateCurrentStateDisplay();
+            displayTransitions();
+        }
+
+        function goBackOneStep() {
+            if (pathHistory.length === 0) {
+                showError('Already at initial state');
+                return;
+            }
+
+            pathHistory.pop();
+            
+            // Reconstruct state by replaying path
+            currentState = indexData.initialStateId;
+            for (const tokenId of pathHistory) {
+                currentState = indexData.transitions[currentState][tokenId];
+            }
+
+            document.getElementById('errorMessage').classList.remove('visible');
+            updateCurrentStateDisplay();
+            displayTransitions();
+        }
+
+        function showError(message) {
+            const errorMsg = document.getElementById('errorMessage');
+            errorMsg.textContent = message;
+            errorMsg.classList.add('visible');
+            setTimeout(() => errorMsg.classList.remove('visible'), 3000);
+        }
+
+        function escapeHtml(text) {
+            const div = document.createElement('div');
+            div.textContent = text;
+            return div.innerHTML;
+        }
+
+        // Allow Enter key to apply transition
+        document.getElementById('tokenInput').addEventListener('keypress', (e) => {
+            if (e.key === 'Enter') {
+                applyTransition();
+            }
+        });
+    </script>
+</body>
+</html>