huggingface · OlivierDehaene · Mar 22, 2024 · Dec 20, 2023 · Dec 20, 2023 · Dec 26, 2023
diff --git a/core/src/tokenization.rs b/core/src/tokenization.rs
@@ -125,12 +125,23 @@ fn encode_input(
         strategy: TruncationStrategy::LongestFirst,
         stride: 0,
     });
+    if inputs.is_encoded() {
+        let seq_len = inputs.len();
+        if seq_len > max_input_length {
+            return Err(TextEmbeddingsError::Validation(format!(
+                "`inputs` must have less than {max_input_length} tokens. Given: {seq_len}"
+            )));
+        }
+        return inputs.try_into_encoding(position_offset);
+    }
 
     let inputs: EncodeInput = match inputs {
         EncodingInput::Single(s) => s.into(),
         EncodingInput::Dual(s1, s2) => (s1, s2).into(),
+        _ => Err(TextEmbeddingsError::Validation(
+            "`inputs` must be a string or a tuple of strings".to_string(),
+        ))?,
     };
-
     let encoding = tokenizer
         .with_truncation(truncate_params)?
         .encode(inputs, true)?;
@@ -143,7 +154,6 @@ fn encode_input(
     }
 
     metrics::histogram!("te_request_input_length", seq_len as f64);
-
     Ok(Encoding {
         input_ids: encoding.get_ids().to_vec(),
         token_type_ids: encoding.get_type_ids().to_vec(),
@@ -163,13 +173,45 @@ pub struct Encoding {
 pub enum EncodingInput {
     Single(String),
     Dual(String, String),
+    Vector(Vec<u32>),
 }
 
 impl EncodingInput {
     fn is_empty(&self) -> bool {
         match self {
             EncodingInput::Single(s) => s.is_empty(),
             EncodingInput::Dual(s1, s2) => s1.is_empty() && s2.is_empty(),
+            EncodingInput::Vector(v) => v.is_empty(),
+        }
+    }
+
+    fn is_encoded(&self) -> bool {
+        match self {
+            EncodingInput::Single(_) => false,
+            EncodingInput::Dual(_, _) => false,
+            EncodingInput::Vector(_) => true,
+        }
+    }
+
+    fn len(&self) -> usize {
+        match self {
+            EncodingInput::Single(s) => s.len(),
+            EncodingInput::Dual(s1, s2) => s1.len() + s2.len(),
+            EncodingInput::Vector(v) => v.len(),
+        }
+    }
+
+    fn try_into_encoding(&self, position_offset: usize) -> Result<Encoding, TextEmbeddingsError> {
+        match self {
+            EncodingInput::Vector(v) => Ok(Encoding {
+                input_ids: v.clone(),
+                token_type_ids: vec![0; v.len()],
+                position_ids: (position_offset as u32..(v.len() + position_offset) as u32)
+                    .collect::<Vec<_>>(),
+            }),
+            _ => Err(TextEmbeddingsError::Validation(
+                "`inputs` must be a vector of input_ids".to_string(),
+            )),
         }
     }
 }

diff --git a/router/src/http/server.rs b/router/src/http/server.rs
@@ -1,8 +1,9 @@
 /// HTTP Server logic
 use crate::http::types::{
-    EmbedRequest, EmbedResponse, Input, OpenAICompatEmbedding, OpenAICompatErrorResponse,
-    OpenAICompatRequest, OpenAICompatResponse, OpenAICompatUsage, PredictInput, PredictRequest,
-    PredictResponse, Prediction, Rank, RerankRequest, RerankResponse, Sequence,
+    EmbedRequest, EmbedResponse, Input, OpenAICompatEmbedding,
+    OpenAICompatErrorResponse, OpenAICompatRequest, OpenAICompatResponse, OpenAICompatUsage,
+    PredictInput, PredictRequest, PredictResponse, Prediction, Rank, RerankRequest, RerankResponse,
+    Sequence,
 };
 use crate::{
     shutdown, ClassifierModel, EmbeddingModel, ErrorResponse, ErrorType, Info, ModelType,
@@ -455,7 +456,7 @@ async fn embed(
         Input::Single(input) => {
             metrics::increment_counter!("te_request_count", "method" => "single");
 
-            let compute_chars = input.chars().count();
+            let compute_chars = input.count_chars();
 
             let permit = infer.try_acquire_permit().map_err(ErrorResponse::from)?;
             let response = infer
@@ -499,7 +500,7 @@ async fn embed(
             let mut compute_chars = 0;
 
             for input in inputs {
-                compute_chars += input.chars().count();
+                compute_chars += input.count_chars();
 
                 let local_infer = infer.clone();
                 futures.push(async move {
@@ -591,7 +592,7 @@ async fn openai_embed(
         Input::Single(input) => {
             metrics::increment_counter!("te_request_count", "method" => "single");
 
-            let compute_chars = input.chars().count();
+            let compute_chars = input.count_chars();
 
             let permit = infer.try_acquire_permit().map_err(ErrorResponse::from)?;
             let response = infer
@@ -639,7 +640,7 @@ async fn openai_embed(
             let mut compute_chars = 0;
 
             for input in inputs {
-                compute_chars += input.chars().count();
+                compute_chars += input.count_chars();
 
                 let local_infer = infer.clone();
                 futures.push(async move {

diff --git a/router/src/http/types.rs b/router/src/http/types.rs
@@ -250,11 +250,36 @@ pub(crate) struct Rank {
 #[derive(Serialize, ToSchema)]
 pub(crate) struct RerankResponse(pub Vec<Rank>);
 
+#[derive(Deserialize, ToSchema, Debug)]
+#[serde(untagged)]
+pub(crate) enum InputType {
+    SingleString(String),
+    SingleInt(u32),
+    VectorInt(Vec<u32>),
+}
+impl InputType {
+    pub(crate) fn count_chars(&self) -> usize {
+        match self {
+            InputType::SingleString(s) => s.chars().count(),
+            InputType::SingleInt(_) => 1,
+            InputType::VectorInt(v) => v.len(),
+        }
+    }
+}
+impl From<InputType> for EncodingInput {
+    fn from(value: InputType) -> Self {
+        match value {
+            InputType::SingleString(s) => Self::Single(s),
+            InputType::SingleInt(i) => Self::Vector(vec![i]),
+            InputType::VectorInt(v) => Self::Vector(v),
+        }
+    }
+}
 #[derive(Deserialize, ToSchema)]
 #[serde(untagged)]
 pub(crate) enum Input {
-    Single(String),
-    Batch(Vec<String>),
+    Single(InputType),
+    Batch(Vec<InputType>),
 }
 
 #[derive(Deserialize, ToSchema)]

diff --git a/router/tests/test_http_embed.rs b/router/tests/test_http_embed.rs
@@ -19,7 +19,6 @@ async fn test_embeddings() -> Result<()> {
     let request = json!({
         "inputs": "test"
     });
-
     let client = reqwest::Client::new();
     let res = client
         .post("http://0.0.0.0:8090/embed")
@@ -31,6 +30,18 @@ async fn test_embeddings() -> Result<()> {
     let matcher = YamlMatcher::<Vec<Vec<Score>>>::new();
     insta::assert_yaml_snapshot!("embeddings_single", embeddings_single, &matcher);
 
+    let test_tokens = vec![[101, 3231, 102]]; // tokenized "test"
+    let request = json!({"inputs": &test_tokens});
+    let res = client
+        .post("http://0.0.0.0:8090/embed")
+        .json(&request)
+        .send()
+        .await?;
+
+    let embeddings_single = res.json::<Vec<Vec<Score>>>().await?;
+    let matcher = YamlMatcher::<Vec<Vec<Score>>>::new();
+    insta::assert_yaml_snapshot!("embeddings_single", embeddings_single, &matcher);
+
     let request = json!({
         "inputs": vec!["test", "test", "test", "test", "test"],
     });
@@ -41,10 +52,22 @@ async fn test_embeddings() -> Result<()> {
         .json(&request)
         .send()
         .await?;
-
     let embeddings_batch = res.json::<Vec<Vec<Score>>>().await?;
     insta::assert_yaml_snapshot!("embeddings_batch", embeddings_batch, &matcher);
+    for embeddings in &embeddings_batch {
+        assert_eq!(embeddings, &embeddings_single[0]);
+    }
 
+    let request =
+        json!({"inputs": &test_tokens.repeat(request["inputs"].as_array().unwrap().len())});
+    let res = client
+        .post("http://0.0.0.0:8090/embed")
+        .json(&request)
+        .send()
+        .await?;
+
+    let embeddings_batch = res.json::<Vec<Vec<Score>>>().await?;
+    insta::assert_yaml_snapshot!("embeddings_batch", embeddings_batch, &matcher);
     for embeddings in &embeddings_batch {
         assert_eq!(embeddings, &embeddings_single[0]);
     }