jd-opensource
diff --git a/‎xllm/api_service/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎xllm/api_service/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎xllm/api_service/api_service.cpp‎
100755100644
Lines changed: 29 additions & 7 deletions b/‎xllm/api_service/api_service.cpp‎
100755100644
Lines changed: 29 additions & 7 deletions
diff --git a/‎xllm/api_service/api_service.h‎
Lines changed: 1 addition & 0 deletions b/‎xllm/api_service/api_service.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎xllm/api_service/chat_service_impl.cpp‎
Lines changed: 4 additions & 37 deletions b/‎xllm/api_service/chat_service_impl.cpp‎
Lines changed: 4 additions & 37 deletions
diff --git a/‎xllm/api_service/embedding_service_impl.cpp‎
Lines changed: 53 additions & 1 deletion b/‎xllm/api_service/embedding_service_impl.cpp‎
Lines changed: 53 additions & 1 deletion
diff --git a/‎xllm/api_service/embedding_service_impl.h‎
Lines changed: 15 additions & 0 deletions b/‎xllm/api_service/embedding_service_impl.h‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎xllm/api_service/mm_service_utils.h‎
Lines changed: 80 additions & 0 deletions b/‎xllm/api_service/mm_service_utils.h‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎xllm/api_service/non_stream_call.h‎
Lines changed: 2 additions & 0 deletions b/‎xllm/api_service/non_stream_call.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎xllm/core/framework/model/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎xllm/core/framework/model/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
@@ -18,6 +18,7 @@ cc_library(
     stream_call.h
     models_service_impl.h
     stream_output_parser.h
+    mm_service_utils.h
   SRCS
     api_service.cpp
     call.cpp
 
@@ -64,6 +64,8 @@ APIService::APIService(Master* master,
     auto vlm_master = dynamic_cast<VLMMaster*>(master);
     mm_chat_service_impl_ =
         std::make_unique<MMChatServiceImpl>(vlm_master, model_names);
+    mm_embedding_service_impl_ =
+        std::make_unique<MMEmbeddingServiceImpl>(vlm_master, model_names);
   } else if (FLAGS_backend == "dit") {
     image_generation_service_impl_ =
         std::make_unique<ImageGenerationServiceImpl>(
@@ -190,10 +192,13 @@ void APIService::Embeddings(::google::protobuf::RpcController* controller,
   // TODO with xllm-service
 }
 
-void APIService::EmbeddingsHttp(::google::protobuf::RpcController* controller,
-                                const proto::HttpRequest* request,
-                                proto::HttpResponse* response,
-                                ::google::protobuf::Closure* done) {
+namespace {
+template <typename EmbeddingCall, typename Service>
+void handle_embedding_request(std::unique_ptr<Service>& embedding_service_impl_,
+                              ::google::protobuf::RpcController* controller,
+                              const proto::HttpRequest* request,
+                              proto::HttpResponse* response,
+                              ::google::protobuf::Closure* done) {
   xllm::ClosureGuard done_guard(
       done,
       std::bind(request_in_metric, nullptr),
@@ -202,12 +207,13 @@ void APIService::EmbeddingsHttp(::google::protobuf::RpcController* controller,
     LOG(ERROR) << "brpc request | respose | controller is null";
     return;
   }
-
   auto arena = response->GetArena();
   auto req_pb =
-      google::protobuf::Arena::CreateMessage<proto::EmbeddingRequest>(arena);
+      google::protobuf::Arena::CreateMessage<typename EmbeddingCall::ReqType>(
+          arena);
   auto resp_pb =
-      google::protobuf::Arena::CreateMessage<proto::EmbeddingResponse>(arena);
+      google::protobuf::Arena::CreateMessage<typename EmbeddingCall::ResType>(
+          arena);
 
   auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
   std::string error;
@@ -230,6 +236,22 @@ void APIService::EmbeddingsHttp(::google::protobuf::RpcController* controller,
       ctrl, done_guard.release(), req_pb, resp_pb);
   embedding_service_impl_->process_async(call);
 }
+}  // namespace
+
+void APIService::EmbeddingsHttp(::google::protobuf::RpcController* controller,
+                                const proto::HttpRequest* request,
+                                proto::HttpResponse* response,
+                                ::google::protobuf::Closure* done) {
+  if (FLAGS_backend == "llm") {
+    CHECK(embedding_service_impl_) << " embedding service is invalid.";
+    handle_embedding_request<EmbeddingCall, EmbeddingServiceImpl>(
+        embedding_service_impl_, controller, request, response, done);
+  } else if (FLAGS_backend == "vlm") {
+    CHECK(mm_embedding_service_impl_) << " mm embedding service is invalid.";
+    handle_embedding_request<MMEmbeddingCall, MMEmbeddingServiceImpl>(
+        mm_embedding_service_impl_, controller, request, response, done);
+  }
+}
 
 void APIService::ImageGeneration(::google::protobuf::RpcController* controller,
                                  const proto::ImageGenerationRequest* request,
 
@@ -120,6 +120,7 @@ class APIService : public proto::XllmAPIService {
   std::unique_ptr<ChatServiceImpl> chat_service_impl_;
   std::unique_ptr<MMChatServiceImpl> mm_chat_service_impl_;
   std::unique_ptr<EmbeddingServiceImpl> embedding_service_impl_;
+  std::unique_ptr<MMEmbeddingServiceImpl> mm_embedding_service_impl_;
   std::unique_ptr<ModelsServiceImpl> models_service_impl_;
   std::unique_ptr<ImageGenerationServiceImpl> image_generation_service_impl_;
   std::unique_ptr<RerankServiceImpl> rerank_service_impl_;
 
@@ -36,6 +36,7 @@ limitations under the License.
 #include "core/runtime/vlm_master.h"
 #include "core/util/utils.h"
 #include "core/util/uuid.h"
+#include "mm_service_utils.h"
 
 namespace xllm {
 namespace {
@@ -737,43 +738,9 @@ void MMChatServiceImpl::process_async_impl(std::shared_ptr<MMChatCall> call) {
       rpc_request, call->get_x_request_id(), call->get_x_request_time());
 
   std::vector<Message> messages;
-  messages.reserve(rpc_request.messages_size());
-
-  for (const auto& req_message : req_messages) {
-    MMContentVec contents;
-    for (const auto& input : req_message.content()) {
-      auto& item = const_cast<::xllm::proto::MMInputData&>(input);
-      if (item.type() == "text") {
-        contents.emplace_back(item.type(), *item.release_text());
-      } else if (item.type() == "image_url") {
-        ImageURL image_url;
-        image_url.url = std::move(*item.mutable_image_url()->release_url());
-        contents.emplace_back(item.type(), image_url);
-      } else if (item.type() == "video_url") {
-        VideoURL video_url;
-        video_url.url = std::move(*item.mutable_video_url()->release_url());
-        contents.emplace_back(item.type(), video_url);
-      } else if (item.type() == "audio_url") {
-        AudioURL audio_url;
-        audio_url.url = std::move(*item.mutable_audio_url()->release_url());
-        contents.emplace_back(item.type(), audio_url);
-      } else {
-        call->finish_with_error(StatusCode::INVALID_ARGUMENT,
-                                "message content type is invalid.");
-        return;
-      }
-    }
-    messages.emplace_back(req_message.role(), std::move(contents));
-  }
-
-  //  check if the request image number exceeds the allowed image limit.
-  for (auto& msg : messages) {
-    if (msg.calc_count("image_url") > master_->get_image_limit()) {
-      call->finish_with_error(StatusCode::INVALID_ARGUMENT,
-                              "Number of images in a single message exceeds "
-                              "the allowed image limit.");
-      return;
-    }
+  if (!build_messages<MMChatCall>(
+          req_messages, messages, call, master_->get_image_limit())) {
+    return;
   }
 
   bool include_usage = false;
 
@@ -21,13 +21,15 @@ limitations under the License.
 
 #include "common/instance_name.h"
 #include "framework/request/request_params.h"
+#include "mm_service_utils.h"
 #include "runtime/llm_master.h"
 #include "util/utils.h"
 #include "util/uuid.h"
 
 namespace xllm {
 namespace {
 
+template <typename EmbeddingCall>
 bool send_result_to_client_brpc(std::shared_ptr<EmbeddingCall> call,
                                 const std::string& request_id,
                                 int64_t created_time,
@@ -113,9 +115,59 @@ void EmbeddingServiceImpl::process_async_impl(
           }
         }
 
-        return send_result_to_client_brpc(
+        return send_result_to_client_brpc<EmbeddingCall>(
             call, request_id, created_time, model, req_output);
       });
 }
 
+MMEmbeddingServiceImpl::MMEmbeddingServiceImpl(
+    VLMMaster* master,
+    const std::vector<std::string>& models)
+    : APIServiceImpl(models), master_(master) {
+  CHECK(master_ != nullptr);
+}
+
+void MMEmbeddingServiceImpl::process_async_impl(
+    std::shared_ptr<MMEmbeddingCall> call) {
+  const auto& rpc_request = call->request();
+  // check if model is supported
+  const auto& model = rpc_request.model();
+  if (!models_.contains(model)) {
+    call->finish_with_error(StatusCode::UNKNOWN, "Model not supported");
+    return;
+  }
+
+  // create RequestParams for embeddings request
+  // set is_embeddings and max_tokens = 1 to control engine step once.
+  RequestParams request_params(
+      rpc_request, call->get_x_request_id(), call->get_x_request_time());
+
+  auto& req_messages = rpc_request.messages();
+
+  std::vector<Message> messages;
+  if (!build_messages<MMEmbeddingCall>(
+          req_messages, messages, call, master_->get_image_limit())) {
+    return;
+  }
+  auto request_id = request_params.request_id;
+  // schedule the request
+  master_->handle_request(
+      std::move(messages),
+      std::move(request_params),
+      [call,
+       model,
+       request_id = request_id,
+       created_time = absl::ToUnixSeconds(absl::Now())](
+          const RequestOutput& req_output) -> bool {
+        if (req_output.status.has_value()) {
+          const auto& status = req_output.status.value();
+          if (!status.ok()) {
+            return call->finish_with_error(status.code(), status.message());
+          }
+        }
+
+        return send_result_to_client_brpc<MMEmbeddingCall>(
+            call, request_id, created_time, model, req_output);
+      });
+}
 }  // namespace xllm
@@ -19,6 +19,7 @@ limitations under the License.
 #include "api_service/api_service_impl.h"
 #include "api_service/call.h"
 #include "api_service/non_stream_call.h"
+#include "core/runtime/vlm_master.h"
 #include "embedding.pb.h"
 
 namespace xllm {
@@ -40,4 +41,18 @@ class EmbeddingServiceImpl final : public APIServiceImpl<EmbeddingCall> {
   LLMMaster* master_ = nullptr;
 };
 
+using MMEmbeddingCall =
+    NonStreamCall<proto::MMEmbeddingRequest, proto::EmbeddingResponse>;
+class MMEmbeddingServiceImpl : public APIServiceImpl<MMEmbeddingCall> {
+ public:
+  MMEmbeddingServiceImpl(VLMMaster* master,
+                         const std::vector<std::string>& models);
+  // brpc call_data needs to use shared_ptr
+  void process_async_impl(std::shared_ptr<MMEmbeddingCall> call);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MMEmbeddingServiceImpl);
+  VLMMaster* master_ = nullptr;
+};
+
 }  // namespace xllm
@@ -0,0 +1,80 @@
+/* Copyright 2025 The xLLM Authors. All Rights Reserved.
+Copyright 2024 The ScaleLLM Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://github.com/jd-opensource/xllm/blob/main/LICENSE
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#pragma once
+
+#include "core/common/message.h"
+#include "core/common/types.h"
+#include "multimodal.pb.h"
+
+namespace xllm {
+
+template <typename Call>
+bool build_messages(const google::protobuf::RepeatedPtrField<
+                        xllm::proto::MMChatMessage>& req_messages,
+                    std::vector<Message>& out_messages,
+                    std::shared_ptr<Call> call,
+                    int image_limit) {
+  out_messages.clear();
+  out_messages.reserve(req_messages.size());
+
+  for (const auto& req_message : req_messages) {
+    MMContentVec contents;
+
+    for (const auto& input : req_message.content()) {
+      auto& item = const_cast<::xllm::proto::MMInputData&>(input);
+
+      if (item.type() == "text") {
+        contents.emplace_back(item.type(), *item.release_text());
+
+      } else if (item.type() == "image_url") {
+        ImageURL image_url;
+        image_url.url = std::move(*item.mutable_image_url()->release_url());
+        contents.emplace_back(item.type(), image_url);
+
+      } else if (item.type() == "video_url") {
+        VideoURL video_url;
+        video_url.url = std::move(*item.mutable_video_url()->release_url());
+        contents.emplace_back(item.type(), video_url);
+
+      } else if (item.type() == "audio_url") {
+        AudioURL audio_url;
+        audio_url.url = std::move(*item.mutable_audio_url()->release_url());
+        contents.emplace_back(item.type(), audio_url);
+
+      } else {
+        call->finish_with_error(StatusCode::INVALID_ARGUMENT,
+                                "message content type is invalid.");
+        return false;
+      }
+    }
+
+    out_messages.emplace_back(req_message.role(), std::move(contents));
+  }
+
+  for (auto& msg : out_messages) {
+    if (msg.calc_count("image_url") > image_limit) {
+      call->finish_with_error(StatusCode::INVALID_ARGUMENT,
+                              "Number of images in a single message exceeds "
+                              "the allowed image limit.");
+      return false;
+    }
+  }
+
+  return true;
+};
+
+}  // namespace xllm
@@ -33,6 +33,8 @@ namespace xllm {
 template <typename Request, typename Response>
 class NonStreamCall : public Call {
  public:
+  using ReqType = Request;
+  using ResType = Response;
   NonStreamCall(brpc::Controller* controller,
                 ::google::protobuf::Closure* done,
                 Request* request,
 
@@ -28,6 +28,7 @@ cc_library(
     causal_vlm.h
     dit_model.h
     embedding_lm.h
+    embedding_vlm.h
     model_args.h
     npu_dp_ep_padding.h
     model_input_params.h