From 4b357ffdfac52ba1b8e38134e87e4e229edd7e83 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 19 Sep 2025 11:19:30 +0200 Subject: [PATCH 01/43] tests are green --- .../text_generation/parsed_output_sample.cpp | 67 +++++++ src/cpp/include/openvino/genai/parsers.hpp | 102 +++++++++++ src/cpp/src/parsers.cpp | 167 ++++++++++++++++++ src/cpp/src/parsers.hpp | 48 +++++ tests/cpp/CMakeLists.txt | 2 +- tests/cpp/parser.cpp | 111 ++++++++++++ 6 files changed, 496 insertions(+), 1 deletion(-) create mode 100644 samples/cpp/text_generation/parsed_output_sample.cpp create mode 100644 src/cpp/include/openvino/genai/parsers.hpp create mode 100644 src/cpp/src/parsers.cpp create mode 100644 src/cpp/src/parsers.hpp create mode 100644 tests/cpp/parser.cpp diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp new file mode 100644 index 0000000000..ada4af6751 --- /dev/null +++ b/samples/cpp/text_generation/parsed_output_sample.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/llm_pipeline.hpp" +#include "openvino/genai/parsers.hpp" +#include "openvino/genai/text_streamer.hpp" + +using ov::genai::ParsingState; + +class CurrentStreamer : public ov::genai::TextParserStreamer { +private: + ParsingState m_previous_state = ParsingState::UNDEFINED; +public: + CurrentStreamer(const ov::genai::Tokenizer& tokenizer) + : ov::genai::TextParserStreamer(tokenizer) {} + ov::genai::StreamingStatus write(const ov::genai::ParsedMessage& message) { + + // if (m_previous_state == ParsingState::UNDEFINED && message["state"] == ParsingState::REASONING) { + // std::cout << "Reasoning: " << std::endl; + // std::cout << message["reasoning_content"].value(); + // } else if (m_previous_state == ParsingState::REASONING && message["state"] == ParsingState::CONTENT) { + // std::cout << std::endl << "Content: " << std::endl; + // std::cout << message["content"].value(); + // } else if (m_previous_state == ParsingState::REASONING && message["state"] == ParsingState::REASONING) { + // std::cout << message["reasoning_content"].value(); + // } else if (m_previous_state == ParsingState::CONTENT && message["state"] == ParsingState::CONTENT) { + // std::cout << message["content"].value(); + // } + std::cout << message.at("content"); + + return ov::genai::StreamingStatus::RUNNING; + } +}; + + +int main(int argc, char* argv[]) try { + if (argc < 2 || argc > 3) { + throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); + } + // std::string prompt = "<|begin▁of▁sentence|><|User|>Please think of a dificcult task to solve x**2 + y**2 = 1<|Assistant|>"; + std::string prompt = "<|begin▁of▁sentence|><|User|>Why is the Sky blue?<|Assistant|>"; + std::string models_path = argv[1]; + + // Default device is CPU; can be overridden by the second argument + std::string device = (argc == 3) ? argv[2] : "CPU"; // GPU, NPU can be used as well + ov::genai::LLMPipeline pipe(models_path, device); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 1000; + + auto tok = pipe.get_tokenizer(); + std::shared_ptr streamer = std::make_shared(tok); + + pipe.generate(prompt, config, streamer); + + +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp new file mode 100644 index 0000000000..043141a73f --- /dev/null +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -0,0 +1,102 @@ +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once +#include "openvino/genai/text_streamer.hpp" + +namespace ov { +namespace genai { + +enum class ParsingState { + CONTENT, + REASONING, + TOOL_CALLING, + UNDEFINED +}; + + +using ParsedMessage = std::map; + +class ParsedJSONMessage { +public: + std::map content; +}; + + +// struct DeltaMessage { +// std::map content; +// std::optional content; +// std::optional reasoning_content; +// ParsingState state = ParsingState::UNDEFINED; + +// // std::vector tool_calls; + +// DeltaMessage() = default; +// }; + + +class IncrementalParserBase { +public: + IncrementalParserBase() = default; + + virtual ParsedMessage parse( + const std::string& previous_text, + const std::string& delta_text, + const std::optional>& previous_tokens = std::nullopt, + const std::optional>& delta_tokens = std::nullopt + ) = 0; +}; + +class ParserBase { +public: + ParserBase() = default; + + virtual ParsedMessage parse(ParsedMessage& text) = 0; +}; + + + +class TextParserStreamer : public ov::genai::TextStreamer { +public: + TextParserStreamer(const Tokenizer& tokenizer); + + virtual StreamingStatus write(ParsedMessage& message) = 0; + + ov::genai::CallbackTypeVariant write(std::string message); +private: + std::string m_text_buffer; + std::shared_ptr m_reasoning_parser; + std::shared_ptr m_tool_calling_parser; +}; + +class Llama32PythonicParser : public ParserBase { +// Does not modify original content, only extracts and adds tool calls +public: + Llama32PythonicParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} + + ParsedMessage parse(ParsedMessage& input) override; + +private: + bool m_keep_original_content = true; +}; + +class BaseReasoningParser : public ParserBase{ +public: + BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "", std::string close_tag = "") : + m_expect_open_tag(expect_open_tag), + m_keep_original_content(keep_original_content), + m_open_tag(open_tag), + m_close_tag(close_tag) {} + + ParsedMessage parse(ParsedMessage& input) override; + +private: + bool m_expect_open_tag = true; + bool m_keep_original_content = true; + std::string m_open_tag = ""; + std::string m_close_tag = ""; +}; + + +} // namespace genai +} // namespace ov diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp new file mode 100644 index 0000000000..8a932bbba7 --- /dev/null +++ b/src/cpp/src/parsers.cpp @@ -0,0 +1,167 @@ +#include "openvino/genai/parsers.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using json = nlohmann::json; + +namespace ov::genai { + +std::string state_to_string(const ParsingState state) { + switch (state) { + case ParsingState::CONTENT: + return "CONTENT"; + case ParsingState::REASONING: + return "REASONING"; + case ParsingState::TOOL_CALLING: + return "TOOL_CALLING"; + case ParsingState::UNDEFINED: + return "UNDEFINED"; + default: + return "UNKNOWN"; + } +} + +class DeepSeekR1Parser : public IncrementalParserBase { +private: + bool m_starts_with_thinking = true; + ParsingState m_parsing_state = ParsingState::REASONING; +public: + DeepSeekR1Parser() = default; + std::map accumulated_parsed; + + ParsedMessage parse( + const std::string& previous_text, + const std::string& delta_text, + const std::optional>& previous_tokens = std::nullopt, + const std::optional>& delta_tokens = std::nullopt) { + ParsedMessage msg; + + if (!m_starts_with_thinking) { + m_parsing_state = ParsingState::UNDEFINED; + } else { + m_parsing_state = ParsingState::REASONING; + } + + if (m_parsing_state == ParsingState::UNDEFINED && delta_text.find("") != std::string::npos) { + m_parsing_state = ParsingState::REASONING; + auto think_idx = delta_text.find(""); + msg["reasoning_content"] = delta_text.substr(think_idx + std::string("").size(), delta_text.size() - (think_idx + std::string("").size())); + } else if (delta_text.find("") != std::string::npos && m_parsing_state == ParsingState::REASONING) { + auto think_idx = delta_text.find(""); + + msg["reasoning_content"] = delta_text.substr(0, think_idx); + msg["content"] = delta_text.substr(think_idx + std::string("").size(), delta_text.size() - (think_idx + std::string("").size())); + + m_parsing_state = ParsingState::CONTENT; + } else if (m_parsing_state == ParsingState::REASONING) { + msg["reasoning_content"] = delta_text; + } else if (m_parsing_state == ParsingState::CONTENT) { + msg["content"] = delta_text; + } else { + throw std::runtime_error("Unexpected state in DeepSeekR1Parser"); + } + msg["state"] = state_to_string(m_parsing_state); + + // TODO: consider accumulating all fiels and returning accumulated fields instead of parsing once more at the end. + + // std::string accumulated_reasoning += msg["reasoning_content"]; + accumulated_parsed["content"] += msg["content"]; + + // accumulated_parsed["reasoning_content"] = accumulated_reasoning; + // TODO: if thinking is closed, disable parsing and give content without cutting thinking. + return msg; + } +}; + + +ParsedMessage Llama32PythonicParser::parse(ParsedMessage& input) { + // Input example + // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; + + // Regex to capture the [...] part + smatch m; + const std::string& text = input.at("content"); + regex r(R"(\[.*?\])"); + if (regex_search(text, m, r)) { + // Strip outer [ ] + string call = m.str().substr(1, m.str().size() - 2); + + // Split function name and arguments + size_t pos = call.find('('); + string name = call.substr(0, pos); + string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...) + + // Parse arguments of the form key='value' + map kv; + regex arg_re(R"((\w+)\s*=\s*'([^']*)')"); + auto it = sregex_iterator(args.begin(), args.end(), arg_re); + for (; it != sregex_iterator(); ++it) { + kv[(*it)[1]] = (*it)[2]; + } + json j = json::array({{ + {"name", name}, + {"arguments", kv} + }}); + if (!m_keep_original_content) { + input["content"] = regex_replace(text, r, ""); + } + input["tool_calls"] = j.dump(); + return input; + } + return ParsedMessage{}; +} + +ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) { + ParsedMessage res; + std::string reasoning_content; + const std::string& content = input.at("content"); + res["content"] = content; + + size_t start = content.find(m_open_tag); + size_t end = content.find(m_close_tag); + + if (start != std::string::npos && end != std::string::npos && end > start) { + reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size())); + if (!m_keep_original_content) { + // Remove ... from content + res["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size()); + } + } else { + reasoning_content = ""; + } + + res["reasoning_content"] = reasoning_content; + return res; +} + + +TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer) + : ov::genai::TextStreamer(tokenizer, [this](std::string s) -> ov::genai::CallbackTypeVariant { + return this->write(s); + }) { + m_reasoning_parser = std::make_shared(); + } + +StreamingStatus TextParserStreamer::write(ParsedMessage& message) { + return StreamingStatus::RUNNING; +} + +ov::genai::CallbackTypeVariant TextParserStreamer::write(std::string message) { + // for (auto& parser: m_parsers) { + // if (parser.is_active()) { + // msg = parser.parse(m_text_buffer, message, msg); + // } + // } + + // m_text_buffer += message; + // return write(msg); +} + +} // namespace ov::genai diff --git a/src/cpp/src/parsers.hpp b/src/cpp/src/parsers.hpp new file mode 100644 index 0000000000..21f474e089 --- /dev/null +++ b/src/cpp/src/parsers.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once +#include "openvino/genai/text_streamer.hpp" + +namespace ov { +namespace genai { + +struct DeltaToolCall; // Forward declaration, define as needed + +struct DeltaMessage { + std::optional role; + std::optional content; + std::optional reasoning_content; + // std::vector tool_calls; + + DeltaMessage() + : role(std::nullopt), + content(std::nullopt), + reasoning_content(std::nullopt) {} +}; + +class TextParserStreamer : public ov::genai::TextStreamer { +public: + TextParserStreamer(const Tokenizer& tokenizer); + + StreamingStatus write(const DeltaMessage& message); + + ov::genai::CallbackTypeVariant write(std::string message); +}; + +class ReasoningParserBase { +public: + ReasoningParserBase() = default; + + void parse(const std::string& text); +}; + +class ToolCallingParserBase { +public: + ToolCallingParserBase() = default; + + void parse(const std::string& text); +}; + +} // namespace genai +} // namespace ov diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index bdf959eb5d..f708e00e55 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -26,7 +26,7 @@ set(TEST_TARGET_NAME "tests_continuous_batching") add_executable(${TEST_TARGET_NAME} ${tests_src} $) -target_link_libraries(${TEST_TARGET_NAME} PRIVATE $ gtest_main gmock_main) +target_link_libraries(${TEST_TARGET_NAME} PRIVATE $ gtest_main gmock_main nlohmann_json::nlohmann_json) target_include_directories(${TEST_TARGET_NAME} PRIVATE "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src" $) diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp new file mode 100644 index 0000000000..35b49fde88 --- /dev/null +++ b/tests/cpp/parser.cpp @@ -0,0 +1,111 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include "openvino/genai/generation_config.hpp" +#include "openvino/genai/parsers.hpp" +#include "nlohmann/json.hpp" +#include "openvino/genai/llm_pipeline.hpp" + +using namespace ov::genai; + +nlohmann::json convert_to_json(const ParsedMessage& msg) { + nlohmann::json j; + for (const auto& [key, value] : msg) { + if (key == "tool_calls") { + j[key] = nlohmann::json::parse(value); + continue; + } + j[key] = value; + } + return j; +} + +nlohmann::json run_parser_test(std::shared_ptr parser, const std::string& prompt, const nlohmann::json& expected) { + ParsedMessage input; + input["content"] = prompt; + return convert_to_json(parser->parse(input)); +} + + +TEST(ParserTest, test_llama32_parser_1) { + std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>)"; + nlohmann::json expected; + + // By default content should keep original values. + expected["content"] = prompt; + + expected["tool_calls"] = nlohmann::json::array({ + { + {"name", "get_weather"}, + {"arguments", { + {"location", "New York, NY"}, + {"unit", "celsius"} + }} + } + }); + std::shared_ptr parser = std::make_shared(); + + nlohmann::json res = run_parser_test(parser, prompt, expected); + + ASSERT_EQ(res, expected); +} + +TEST(ParserTest, test_llama32_parser_2) { + std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>)"; + nlohmann::json expected; + + // In this test tool calling part will be cut from the content after parsing. + expected["content"] = std::string(R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n<|eom_id|>)"); + + expected["tool_calls"] = nlohmann::json::array({ + { + {"name", "get_weather"}, + {"arguments", { + {"location", "New York, NY"}, + {"unit", "celsius"} + }} + } + }); + auto parser = std::make_shared(/*keep_original_content*/ false); + + nlohmann::json res = run_parser_test(parser, prompt, expected); + + ASSERT_EQ(res, expected); +} + +TEST(ParserTest, test_reasoning_parser_1) { + std::string prompt = R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"; + nlohmann::json expected; + + // In this test reasoning part will be cut from the content after parsing. + expected["content"] = std::string(R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"); + + expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"); + auto parser = std::make_shared( + /*expect_open_tag*/ true, + /*keep_original_content*/ false + ); + + nlohmann::json res = run_parser_test(parser, prompt, expected); + + ASSERT_EQ(res, expected); +} + +TEST(ParserTest, test_reasoning_parser_2) { + std::string prompt = R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"; + nlohmann::json expected; + + // In this test content should keep original values. + expected["content"] = prompt; + + expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"); + auto parser = std::make_shared( + /*expect_open_tag*/ true, + /*keep_original_content*/ true + ); + + nlohmann::json res = run_parser_test(parser, prompt, expected); + + ASSERT_EQ(res, expected); +} From 525a4d8167e56edf92f7a52f0c9e6f38348d3ce2 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Tue, 23 Sep 2025 22:12:09 +0200 Subject: [PATCH 02/43] python api added --- src/cpp/include/openvino/genai/parsers.hpp | 58 ++++--- src/cpp/src/parsers.cpp | 150 +++++++++--------- src/python/openvino_genai/__init__.py | 8 +- .../openvino_genai/py_openvino_genai.pyi | 31 +++- src/python/py_openvino_genai.cpp | 2 + src/python/py_parsers.cpp | 91 +++++++++++ src/python/py_streamers.cpp | 35 +++- tests/cpp/parser.cpp | 29 ++++ tests/python_tests/test_parsers.py | 49 ++++++ 9 files changed, 351 insertions(+), 102 deletions(-) create mode 100644 src/python/py_parsers.cpp create mode 100644 tests/python_tests/test_parsers.py diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 043141a73f..291df915a6 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -3,17 +3,14 @@ #pragma once #include "openvino/genai/text_streamer.hpp" +#include +#include +#include +#include namespace ov { namespace genai { -enum class ParsingState { - CONTENT, - REASONING, - TOOL_CALLING, - UNDEFINED -}; - using ParsedMessage = std::map; @@ -22,29 +19,41 @@ class ParsedJSONMessage { std::map content; }; - -// struct DeltaMessage { -// std::map content; -// std::optional content; -// std::optional reasoning_content; -// ParsingState state = ParsingState::UNDEFINED; - -// // std::vector tool_calls; - -// DeltaMessage() = default; -// }; - - class IncrementalParserBase { public: IncrementalParserBase() = default; virtual ParsedMessage parse( + ParsedMessage& msg, const std::string& previous_text, const std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) = 0; + + virtual bool is_active() const = 0; +}; + +class DeepSeekR1ReasoningParser : public IncrementalParserBase { +private: + bool m_starts_with_thinking = true; + bool m_think_tag_opened = false; + bool m_deactivated = false; + std::string m_open_tag = ""; + std::string m_close_tag = ""; +public: + DeepSeekR1ReasoningParser() = default; + std::map accumulated_parsed; + + ParsedMessage parse( + ParsedMessage& msg, + const std::string& previous_text, + const std::string& delta_text, + const std::optional>& previous_tokens = std::nullopt, + const std::optional>& delta_tokens = std::nullopt + ) override; + static std::string name() { return "DeepSeekR1ReasoningParser"; } + bool is_active() const override; }; class ParserBase { @@ -54,19 +63,20 @@ class ParserBase { virtual ParsedMessage parse(ParsedMessage& text) = 0; }; +using ParserVariant = std::variant, std::string>; class TextParserStreamer : public ov::genai::TextStreamer { public: - TextParserStreamer(const Tokenizer& tokenizer); + TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers = {}); virtual StreamingStatus write(ParsedMessage& message) = 0; ov::genai::CallbackTypeVariant write(std::string message); + ParsedMessage m_parsed_message; private: std::string m_text_buffer; - std::shared_ptr m_reasoning_parser; - std::shared_ptr m_tool_calling_parser; + std::vector> m_parsers; }; class Llama32PythonicParser : public ParserBase { @@ -75,7 +85,7 @@ class Llama32PythonicParser : public ParserBase { Llama32PythonicParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} ParsedMessage parse(ParsedMessage& input) override; - + static std::string name() { return "Llama32PythonicParser"; } private: bool m_keep_original_content = true; }; diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 8a932bbba7..76a6eb6c15 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -13,72 +13,54 @@ using json = nlohmann::json; namespace ov::genai { -std::string state_to_string(const ParsingState state) { - switch (state) { - case ParsingState::CONTENT: - return "CONTENT"; - case ParsingState::REASONING: - return "REASONING"; - case ParsingState::TOOL_CALLING: - return "TOOL_CALLING"; - case ParsingState::UNDEFINED: - return "UNDEFINED"; - default: - return "UNKNOWN"; - } -} +static std::map> registered_incremental_parsers; +static std::map> registered_base_parsers; -class DeepSeekR1Parser : public IncrementalParserBase { -private: - bool m_starts_with_thinking = true; - ParsingState m_parsing_state = ParsingState::REASONING; -public: - DeepSeekR1Parser() = default; - std::map accumulated_parsed; - - ParsedMessage parse( - const std::string& previous_text, - const std::string& delta_text, - const std::optional>& previous_tokens = std::nullopt, - const std::optional>& delta_tokens = std::nullopt) { - ParsedMessage msg; - - if (!m_starts_with_thinking) { - m_parsing_state = ParsingState::UNDEFINED; - } else { - m_parsing_state = ParsingState::REASONING; - } +bool DeepSeekR1ReasoningParser::is_active() const { + return !m_deactivated; +} - if (m_parsing_state == ParsingState::UNDEFINED && delta_text.find("") != std::string::npos) { - m_parsing_state = ParsingState::REASONING; - auto think_idx = delta_text.find(""); - msg["reasoning_content"] = delta_text.substr(think_idx + std::string("").size(), delta_text.size() - (think_idx + std::string("").size())); - } else if (delta_text.find("") != std::string::npos && m_parsing_state == ParsingState::REASONING) { - auto think_idx = delta_text.find(""); - - msg["reasoning_content"] = delta_text.substr(0, think_idx); - msg["content"] = delta_text.substr(think_idx + std::string("").size(), delta_text.size() - (think_idx + std::string("").size())); - - m_parsing_state = ParsingState::CONTENT; - } else if (m_parsing_state == ParsingState::REASONING) { - msg["reasoning_content"] = delta_text; - } else if (m_parsing_state == ParsingState::CONTENT) { - msg["content"] = delta_text; - } else { - throw std::runtime_error("Unexpected state in DeepSeekR1Parser"); - } - msg["state"] = state_to_string(m_parsing_state); - - // TODO: consider accumulating all fiels and returning accumulated fields instead of parsing once more at the end. - - // std::string accumulated_reasoning += msg["reasoning_content"]; - accumulated_parsed["content"] += msg["content"]; - - // accumulated_parsed["reasoning_content"] = accumulated_reasoning; - // TODO: if thinking is closed, disable parsing and give content without cutting thinking. +ParsedMessage DeepSeekR1ReasoningParser::parse( + ParsedMessage& msg, + const std::string& previous_text, + const std::string& delta_text, + const std::optional>& previous_tokens, + const std::optional>& delta_tokens +) { + if (msg.find("reasoning_content") == msg.end()) { + msg["reasoning_content"] = ""; + } + if (msg.find("content") == msg.end()) { + msg["content"] = ""; + } + + if (m_deactivated) { + msg["content"] += delta_text; return msg; } -}; + if (m_starts_with_thinking) { + m_think_tag_opened = true; + } + + bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos; + + if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos) { + // Thinking has started + auto think_idx = delta_text.find(m_open_tag); + msg["reasoning_content"] += delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size())); + m_think_tag_opened = true; + } else if (m_think_tag_opened && delta_text.find(m_close_tag) != std::string::npos) { + auto think_idx = delta_text.find(m_close_tag); + msg["reasoning_content"] += delta_text.substr(0, think_idx); + msg["content"] += delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); + m_think_tag_opened = false; + m_deactivated = true; + } else if (m_think_tag_opened) { + msg["reasoning_content"] += delta_text; + } + + return msg; +} ParsedMessage Llama32PythonicParser::parse(ParsedMessage& input) { @@ -142,26 +124,50 @@ ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) { } -TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer) +TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers) : ov::genai::TextStreamer(tokenizer, [this](std::string s) -> ov::genai::CallbackTypeVariant { return this->write(s); }) { - m_reasoning_parser = std::make_shared(); + for (auto& parser : parsers) { + if (std::holds_alternative>(parser)) { + m_parsers.push_back(std::get>(parser)); + } else { + auto parser_name = std::get(parser); + if (registered_incremental_parsers.find(parser_name) != registered_incremental_parsers.end()) { + m_parsers.push_back(registered_incremental_parsers[parser_name]); + } + } + } } StreamingStatus TextParserStreamer::write(ParsedMessage& message) { + if (message.find("content") != message.end()) { + std::cout << message.at("content") << std::endl; + } return StreamingStatus::RUNNING; } ov::genai::CallbackTypeVariant TextParserStreamer::write(std::string message) { - // for (auto& parser: m_parsers) { - // if (parser.is_active()) { - // msg = parser.parse(m_text_buffer, message, msg); - // } - // } - - // m_text_buffer += message; - // return write(msg); + for (auto& parser: m_parsers) { + if (parser->is_active()) { + m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message); + } + } + + m_text_buffer = message; + return write(m_parsed_message); } + +// static initializer to register available buildin parsers +static bool register_backends() { + registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared(); + + registered_base_parsers[Llama32PythonicParser::name()] = std::make_shared(); + return true; +} + +// Ensure the backends are registered before main +static bool are_backends_registered = register_backends(); + } // namespace ov::genai diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index 218f782e29..19e4ebe97a 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -18,7 +18,13 @@ StreamerBase, get_version, StreamingStatus, - TextStreamer + TextStreamer, + TextParserStreamer +) + +from .py_openvino_genai import ( + ParserBase, + IncrementalParserBase ) __version__ = get_version() diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index b300311721..ada0fa5ca6 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import collections.abc import openvino._pyopenvino import typing -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -1374,6 +1374,17 @@ class ImageGenerationPerfMetrics: @property def raw_metrics(self) -> RawImageGenerationPerfMetrics: ... +class IncrementalParserBase: + def __init__(self) -> None: + ... + def is_active(self) -> bool: + """ + Indicates whether the parser is active and should be used during parsing. + """ + def parse(self, msg: collections.abc.Mapping[str, str], previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> dict[str, str]: + """ + Parse is called every time new text delta is decoded. Returns a ParsedMessage with parsed content. + """ class InpaintingPipeline: """ This class is used for generation with inpainting models. @@ -1741,6 +1752,13 @@ class MeanStdPair: @property def std(self) -> float: ... +class ParserBase: + def __init__(self) -> None: + ... + def parse(self, text: collections.abc.Mapping[str, str]) -> dict[str, str]: + """ + Parse is called with the full text. Returns a ParsedMessage with parsed content. + """ class PerfMetrics: """ @@ -3133,6 +3151,15 @@ class TextEmbeddingPipeline: """ Waits computed embeddings for a query """ +class TextParserStreamer: + def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[...] = []) -> None: + """ + TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers. + """ + def write(self, message: collections.abc.Mapping[str, str]) -> StreamingStatus: + """ + Write is called with a ParsedMessage. Returns StreamingStatus. + """ class TextRerankPipeline: """ Text rerank pipeline @@ -3196,8 +3223,6 @@ class TextStreamer(StreamerBase): """ def __init__(self, tokenizer: Tokenizer, callback: collections.abc.Callable[[str], bool | openvino_genai.py_openvino_genai.StreamingStatus], detokenization_params: collections.abc.Mapping[str, typing.Any] = {}) -> None: ... - def end(self) -> None: - ... def write(self, token: typing.SupportsInt | collections.abc.Sequence[typing.SupportsInt]) -> StreamingStatus: ... class TokenizedInputs: diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp index 8cec4de360..ed4aa8d3af 100644 --- a/src/python/py_openvino_genai.cpp +++ b/src/python/py_openvino_genai.cpp @@ -33,6 +33,7 @@ void init_lora_adapter(py::module_& m); void init_perf_metrics(py::module_& m); void init_tokenizer(py::module_& m); void init_streamers(py::module_& m); +void init_parsers(py::module_& m); void init_generation_config(py::module_& m); void init_continuous_batching_pipeline(py::module_& m); @@ -117,6 +118,7 @@ PYBIND11_MODULE(py_openvino_genai, m) { init_generation_config(m); init_tokenizer(m); init_streamers(m); + init_parsers(m); init_llm_pipeline(m); init_continuous_batching_pipeline(m); diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp new file mode 100644 index 0000000000..1d3f066334 --- /dev/null +++ b/src/python/py_parsers.cpp @@ -0,0 +1,91 @@ +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include + +#include "openvino/genai/parsers.hpp" +#include "py_utils.hpp" + +namespace py = pybind11; + +using ov::genai::ParsedMessage; +using ov::genai::IncrementalParserBase; +using ov::genai::ParserVariant; +using ov::genai::ParserBase; +using ov::genai::Tokenizer; +using ov::genai::StreamingStatus; + +namespace pyutils = ov::genai::pybind::utils; + +namespace { + + +class ConstructableIncrementalParserBase: public IncrementalParserBase { +public: + ParsedMessage parse( + ParsedMessage& msg, + const std::string& previous_text, + const std::string& delta_text, + const std::optional>& previous_tokens = std::nullopt, + const std::optional>& delta_tokens = std::nullopt + ) override { + PYBIND11_OVERRIDE_PURE( + ParsedMessage, // Return type + IncrementalParserBase, // Parent class + parse, // Name of function in C++ (must match Python name) + msg, + previous_text, + delta_text, + previous_tokens, + delta_tokens + ); + } + + bool is_active() const override { + PYBIND11_OVERRIDE_PURE( + bool, // Return type + IncrementalParserBase, // Parent class + is_active, // Name of function in C++ (must match Python name) + ); + } +}; + +class ConstructableParserBase: public ParserBase { +public: + ParsedMessage parse(ParsedMessage& text) override { + PYBIND11_OVERRIDE_PURE( + ParsedMessage, // Return type + ParserBase, // Parent class + parse, // Name of function in C++ (must match Python name) + text // Argument(s) + ); + } +}; + +} // namespace + +void init_parsers(py::module_& m) { + py::class_>(m, "IncrementalParserBase") + .def(py::init<>()) + .def("parse", + &IncrementalParserBase::parse, + "Parse is called every time new text delta is decoded. Returns a ParsedMessage with parsed content.", + py::arg("msg"), + py::arg("previous_text"), + py::arg("delta_text"), + py::arg("previous_tokens") = std::nullopt, + py::arg("delta_tokens") = std::nullopt) + .def("is_active", &IncrementalParserBase::is_active, "Indicates whether the parser is active and should be used during parsing."); + + py::class_>(m, "ParserBase") + .def(py::init<>()) + .def("parse", + &ParserBase::parse, + "Parse is called with the full text. Returns a ParsedMessage with parsed content.", + py::arg("text")); + +} \ No newline at end of file diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 704be71959..dbeeb1a20e 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -9,6 +9,7 @@ #include "openvino/genai/streamer_base.hpp" #include "openvino/genai/text_streamer.hpp" +#include "openvino/genai/parsers.hpp" #include "py_utils.hpp" namespace py = pybind11; @@ -16,6 +17,9 @@ namespace py = pybind11; using ov::genai::CallbackTypeVariant; using ov::genai::StreamingStatus; using ov::genai::TextStreamer; +using ov::genai::TextParserStreamer; +using ov::genai::IncrementalParserBase; +using ov::genai::ParsedMessage; using ov::genai::Tokenizer; namespace pyutils = ov::genai::pybind::utils; @@ -66,6 +70,20 @@ class ConstructableStreamer: public StreamerBase { } }; +class ConstructableTextParserStreamer: public TextParserStreamer { +public: + using TextParserStreamer::TextParserStreamer; // inherit base constructors + + StreamingStatus write(ParsedMessage& message) override { + PYBIND11_OVERRIDE_PURE( + StreamingStatus, // Return type + TextParserStreamer, // Parent class + write, // Name of function in C++ (must match Python name) + message // Argument(s) + ); + } +}; + } // namespace void init_streamers(py::module_& m) { @@ -109,6 +127,19 @@ void init_streamers(py::module_& m) { return self.write(tokens); } }, - py::arg("token")) - .def("end", &TextStreamer::end); + py::arg("token")); + + py::class_>(m, "TextParserStreamer") + .def(py::init([](const Tokenizer& tokenizer, + std::vector> parsers) { + std::vector variants(parsers.begin(), parsers.end()); + return std::make_shared(tokenizer, variants); + }), + py::arg("tokenizer"), + py::arg("parsers") = std::vector>({}), + "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.") + .def("write", + py::overload_cast(&TextParserStreamer::write), + py::arg("message"), + "Write is called with a ParsedMessage. Returns StreamingStatus."); } diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index 35b49fde88..0018dbd461 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -109,3 +109,32 @@ TEST(ParserTest, test_reasoning_parser_2) { ASSERT_EQ(res, expected); } + +class DeepSeekR1ReasoningParserTest : public ::testing::Test { +protected: + ov::genai::DeepSeekR1ReasoningParser parser; + ParsedMessage msg; +}; + +TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) { + std::vector input_stream = { + "<|begin▁of▁sentence|>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", + " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", + " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", + " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", + " ", "3", ".\n", "", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", + " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", + "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", + " it", ".", "**\n", " \n", " ", " \\", "[\n", " " + }; + + std::string ref_res = "First, I recognize that the question is asking for the sum of 2 and 1.\n\nI know that addition involves combining two numbers to find their total.\n\nStarting with 2, I add 1 to it.\n\n2 plus 1 equals 3.\n"; + + ParsedMessage msg; + for (int i = 1; i < input_stream.size(); i++) { + std::string previous_text = input_stream[i - 1]; + std::string delta_text = input_stream[i]; + msg = parser.parse(msg, previous_text, delta_text); + } + ASSERT_EQ(msg["reasoning_content"], ref_res); +} diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py new file mode 100644 index 0000000000..724384bcad --- /dev/null +++ b/tests/python_tests/test_parsers.py @@ -0,0 +1,49 @@ +# Copyright (C) 2023-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import dataclasses +import json +from typing import Optional + +import numpy as np +import openvino +import pytest +from openvino_genai import Tokenizer +from transformers import AutoTokenizer +from +from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model + + +@pytest.fixture(scope="module") +def hf_ov_genai_models(request, tmp_path_factory): + model_id, args = request.param + tok_load_properties = {"add_second_input": args.pop("add_second_input")} if "add_second_input" in args else {} + + hf_args = args.copy() # to overcome mutable default argument side effects + if "padding_side" in hf_args and hf_args["padding_side"] is None: + # HF does not accept None. + # Need to remove padding_side and let HF to choose default value, + hf_args.pop("padding_side") + else: + hf_args["truncation_side"] = hf_args["padding_side"] + model_dir = tmp_path_factory.getbasetemp() / model_id.replace("/", "_") + model_dir.mkdir(exist_ok=True, parents=True) + + hf_tokenizer = AutoTokenizer.from_pretrained(model_id, **hf_args) + convert_args = {"number_of_inputs": hf_args.pop("number_of_inputs")} if "number_of_inputs" in hf_args else {} + convert_and_save_tokenizer(hf_tokenizer, model_dir, **convert_args) + + genai_tokenizer = Tokenizer(model_dir, tok_load_properties) + return hf_tokenizer, genai_tokenizer + + +@pytest.mark.precommit +@pytest.mark.parametrize( + "hf_ov_genai_models", + ["katuni4ka/tiny-random-phi3"], + indirect=True +) +def test_non_string_chat_template(hf_ov_genai_models): + hf_tokenizer, genai_tokenizer = hf_ov_genai_models + + + From 3ab9757709ca368c15f84a6fca92c238b524a00e Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 24 Sep 2025 11:01:43 +0200 Subject: [PATCH 03/43] refactored --- src/cpp/include/openvino/genai/parsers.hpp | 21 ++------- .../include/openvino/genai/text_streamer.hpp | 14 ++++++ src/cpp/src/parsers.cpp | 46 ++----------------- src/cpp/src/text_streamer.cpp | 38 ++++++++++++++- src/python/py_streamers.cpp | 15 +++--- tests/python_tests/test_parsers.py | 31 +++++++++++-- 6 files changed, 97 insertions(+), 68 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 291df915a6..41336e4f95 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -2,8 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 #pragma once -#include "openvino/genai/text_streamer.hpp" #include +#include +#include #include #include #include @@ -32,6 +33,7 @@ class IncrementalParserBase { ) = 0; virtual bool is_active() const = 0; + static std::map> registered_parsers; }; class DeepSeekR1ReasoningParser : public IncrementalParserBase { @@ -61,24 +63,11 @@ class ParserBase { ParserBase() = default; virtual ParsedMessage parse(ParsedMessage& text) = 0; + static std::map> registered_parsers; }; using ParserVariant = std::variant, std::string>; - -class TextParserStreamer : public ov::genai::TextStreamer { -public: - TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers = {}); - - virtual StreamingStatus write(ParsedMessage& message) = 0; - - ov::genai::CallbackTypeVariant write(std::string message); - ParsedMessage m_parsed_message; -private: - std::string m_text_buffer; - std::vector> m_parsers; -}; - class Llama32PythonicParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: @@ -104,7 +93,7 @@ class BaseReasoningParser : public ParserBase{ bool m_expect_open_tag = true; bool m_keep_original_content = true; std::string m_open_tag = ""; - std::string m_close_tag = ""; + std::string m_close_tag = ""; }; diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index f7c64e3586..658e4c2e38 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -5,6 +5,7 @@ #include "openvino/genai/streamer_base.hpp" #include "openvino/genai/tokenizer.hpp" +#include "openvino/genai/parsers.hpp" namespace ov { namespace genai { @@ -46,5 +47,18 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase { void compute_decoded_length_for_position(size_t cache_position); }; +class TextParserStreamer : public TextStreamer { +public: + TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers = {}); + + virtual StreamingStatus write(ParsedMessage& message); + + CallbackTypeVariant write(std::string message); + ParsedMessage m_parsed_message; +private: + std::string m_text_buffer; + std::vector> m_parsers; +}; + } // namespace genai } // namespace ov diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 76a6eb6c15..df30670145 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -13,9 +13,6 @@ using json = nlohmann::json; namespace ov::genai { -static std::map> registered_incremental_parsers; -static std::map> registered_base_parsers; - bool DeepSeekR1ReasoningParser::is_active() const { return !m_deactivated; } @@ -123,47 +120,14 @@ ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) { return res; } - -TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers) - : ov::genai::TextStreamer(tokenizer, [this](std::string s) -> ov::genai::CallbackTypeVariant { - return this->write(s); - }) { - for (auto& parser : parsers) { - if (std::holds_alternative>(parser)) { - m_parsers.push_back(std::get>(parser)); - } else { - auto parser_name = std::get(parser); - if (registered_incremental_parsers.find(parser_name) != registered_incremental_parsers.end()) { - m_parsers.push_back(registered_incremental_parsers[parser_name]); - } - } - } - } - -StreamingStatus TextParserStreamer::write(ParsedMessage& message) { - if (message.find("content") != message.end()) { - std::cout << message.at("content") << std::endl; - } - return StreamingStatus::RUNNING; -} - -ov::genai::CallbackTypeVariant TextParserStreamer::write(std::string message) { - for (auto& parser: m_parsers) { - if (parser->is_active()) { - m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message); - } - } - - m_text_buffer = message; - return write(m_parsed_message); -} - +std::map> IncrementalParserBase::registered_parsers; +std::map> ParserBase::registered_parsers; // static initializer to register available buildin parsers static bool register_backends() { - registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared(); - - registered_base_parsers[Llama32PythonicParser::name()] = std::make_shared(); + IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared(); + + ParserBase::registered_parsers[Llama32PythonicParser::name()] = std::make_shared(); return true; } diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index 9a0b4e125a..0654abf7dc 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -122,7 +122,43 @@ void TextStreamer::end() { return; } -ov::genai::StreamerBase::~StreamerBase() = default; +StreamerBase::~StreamerBase() = default; + +TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers) + : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant { + return this->write(s); + }) { + for (auto& parser : parsers) { + if (std::holds_alternative(parser)) { + auto parser_name = std::get(parser); + if (IncrementalParserBase::registered_parsers.find(parser_name) != IncrementalParserBase::registered_parsers.end()) { + m_parsers.push_back(IncrementalParserBase::registered_parsers[parser_name]); + } else { + OPENVINO_THROW("Parser with name " + parser_name + " is not registered"); + } + } else { + m_parsers.push_back(std::get>(parser)); + } + } + } + +StreamingStatus TextParserStreamer::write(ParsedMessage& message) { + if (message.find("content") != message.end()) { + std::cout << message.at("content") << std::endl; + } + return StreamingStatus::RUNNING; +} + +CallbackTypeVariant TextParserStreamer::write(std::string message) { + for (auto& parser: m_parsers) { + // if (parser->is_active()) { + m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message); + // } + } + + m_text_buffer = message; + return write(m_parsed_message); +} } // namespace genai } // namespace ov \ No newline at end of file diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index dbeeb1a20e..a2f8d16420 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -75,7 +75,7 @@ class ConstructableTextParserStreamer: public TextParserStreamer { using TextParserStreamer::TextParserStreamer; // inherit base constructors StreamingStatus write(ParsedMessage& message) override { - PYBIND11_OVERRIDE_PURE( + PYBIND11_OVERRIDE( StreamingStatus, // Return type TextParserStreamer, // Parent class write, // Name of function in C++ (must match Python name) @@ -131,15 +131,18 @@ void init_streamers(py::module_& m) { py::class_>(m, "TextParserStreamer") .def(py::init([](const Tokenizer& tokenizer, - std::vector> parsers) { - std::vector variants(parsers.begin(), parsers.end()); - return std::make_shared(tokenizer, variants); + std::vector, std::string>> parsers) { + return std::make_shared(tokenizer, parsers); }), py::arg("tokenizer"), - py::arg("parsers") = std::vector>({}), + py::arg("parsers") = std::vector, std::string>>({}), "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.") .def("write", py::overload_cast(&TextParserStreamer::write), py::arg("message"), - "Write is called with a ParsedMessage. Returns StreamingStatus."); + "Write is called with a ParsedMessage. Returns StreamingStatus.") + .def("write", + py::overload_cast(&TextParserStreamer::write), + py::arg("message"), + "Write is called with a string message. Returns CallbackTypeVariant."); } diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 724384bcad..fd762381f0 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -7,9 +7,8 @@ import numpy as np import openvino import pytest -from openvino_genai import Tokenizer +from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer from transformers import AutoTokenizer -from from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model @@ -39,11 +38,35 @@ def hf_ov_genai_models(request, tmp_path_factory): @pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", - ["katuni4ka/tiny-random-phi3"], + [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], indirect=True ) def test_non_string_chat_template(hf_ov_genai_models): hf_tokenizer, genai_tokenizer = hf_ov_genai_models + class CustomStreamer(TextParserStreamer): + def write(self, message): + if "content" in message: + print(message["content"]) + return True + breakpoint() + streamer = CustomStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"]) + + msg = {} + stream_string = [ + "<|begin▁of▁sentence|>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", + " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", + " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", + " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", + " ", "3", ".\n", "", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", + " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", + "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", + " it", ".", "**\n", " \n", " ", " \\", "[\n", " " + ] - + for subword in stream_string: + msg = streamer.write(subword) + + # for (prev_subword, subword) in zip(stream_string[:-1], stream_string[1:]): + # msg = streamer.write(msg, prev_subword, subword) + breakpoint() From 5624fc2117844f9958fa5a3e0a1b7efde000c2ad Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 25 Sep 2025 15:04:55 +0200 Subject: [PATCH 04/43] add parsing at the end of generate() --- .../openvino/genai/generation_config.hpp | 4 ++ .../include/openvino/genai/llm_pipeline.hpp | 1 + src/cpp/include/openvino/genai/parsers.hpp | 7 +-- .../include/openvino/genai/text_streamer.hpp | 5 +- src/cpp/src/generation_config.cpp | 1 + src/cpp/src/llm/pipeline.cpp | 59 +++++++++++++++++- src/cpp/src/parsers.cpp | 22 +++++-- src/cpp/src/text_streamer.cpp | 8 +-- src/python/py_generation_config.cpp | 1 + src/python/py_streamers.cpp | 16 +++-- tests/python_tests/test_parsers.py | 62 ++++++++++++++++--- 11 files changed, 154 insertions(+), 32 deletions(-) diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index 3020be34bc..1d832c3c8e 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -13,6 +13,7 @@ #include "openvino/genai/tokenizer.hpp" #include "openvino/genai/scheduler_config.hpp" #include "openvino/genai/lora_adapter.hpp" +#include "openvino/genai/parsers.hpp" namespace ov { namespace genai { @@ -348,6 +349,9 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { bool is_prompt_lookup() const; bool is_structured_output_generation() const; + // parsers + std::vector>> parsers; + OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release") bool is_speculative_decoding() const; diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp index eea94591c3..4f2c8405f1 100644 --- a/src/cpp/include/openvino/genai/llm_pipeline.hpp +++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp @@ -68,6 +68,7 @@ class DecodedResults { std::vector scores; PerfMetrics perf_metrics; std::shared_ptr extended_perf_metrics; + std::vector parsed; // @brief Convert DecodedResults to a string. operator std::string() const { diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 41336e4f95..7030acbf9a 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -15,11 +15,6 @@ namespace genai { using ParsedMessage = std::map; -class ParsedJSONMessage { -public: - std::map content; -}; - class IncrementalParserBase { public: IncrementalParserBase() = default; @@ -44,7 +39,7 @@ class DeepSeekR1ReasoningParser : public IncrementalParserBase { std::string m_open_tag = ""; std::string m_close_tag = ""; public: - DeepSeekR1ReasoningParser() = default; + DeepSeekR1ReasoningParser(bool starts_with_thinking = true) : m_starts_with_thinking(starts_with_thinking) {}; std::map accumulated_parsed; ParsedMessage parse( diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 658e4c2e38..872e79dbf9 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -54,8 +54,11 @@ class TextParserStreamer : public TextStreamer { virtual StreamingStatus write(ParsedMessage& message); CallbackTypeVariant write(std::string message); - ParsedMessage m_parsed_message; + + ParsedMessage get_parsed_message() const { return m_parsed_message; } + std::vector> get_parsers() const { return m_parsers; } private: + ParsedMessage m_parsed_message; std::string m_text_buffer; std::vector> m_parsers; }; diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp index db7d621ed5..f370042b25 100644 --- a/src/cpp/src/generation_config.cpp +++ b/src/cpp/src/generation_config.cpp @@ -153,6 +153,7 @@ void GenerationConfig::update_generation_config(const ov::AnyMap& properties) { // Structured output read_anymap_param(properties, "structured_output_config", structured_output_config); + read_anymap_param(properties, "parsers", parsers); } diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 76d1fe24dc..38f00e4e45 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -205,7 +205,64 @@ DecodedResults LLMPipeline::generate( StringInputs inputs, OptionalGenerationConfig generation_config, StreamerVariant streamer) { - return m_pimpl->generate(inputs, generation_config, streamer); + auto res = m_pimpl->generate(inputs, generation_config, streamer); + + std::vector> incremental_parsers; + // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message + if (auto streamer_obj = std::get_if>(&streamer)) { + if (auto parser_streamer = std::dynamic_pointer_cast(*streamer_obj)) { + incremental_parsers = parser_streamer->get_parsers(); + } + } + + + if (incremental_parsers.empty() && (!generation_config.has_value() || (*generation_config).parsers.empty())) { + return res; + } + + std::vector> parsers; + if (generation_config.has_value() && !(*generation_config).parsers.empty()) { + for (auto& parser_variant : (*generation_config).parsers) { + if (std::holds_alternative(parser_variant)) { + auto parser_name = std::get(parser_variant); + if (ParserBase::registered_parsers.find(parser_name) == ParserBase::registered_parsers.end()) { + OPENVINO_THROW("Parser with name ", parser_name, " is not registered"); + } + parsers.push_back(ParserBase::registered_parsers[parser_name]); + } else if (std::holds_alternative>(parser_variant)) { + auto parser = std::get>(parser_variant); + parsers.push_back(parser); + } + } + } + + res.parsed.resize(res.texts.size()); + + // BaseParsers have priority over IncrementalParsers + if (!parsers.empty()) { + for (size_t i = 0; i < res.texts.size(); ++i) { + auto& message = res.texts[i]; + ParsedMessage msg; + for (auto& parser: parsers) { + msg = parser->parse(msg); + } + res.parsed[i] = msg; + } + return res; + } + + // At this place we have only IncrementalParsers + for (size_t i = 0; i < res.texts.size(); ++i) { + auto& message = res.texts[i]; + ParsedMessage msg; + for (auto& parser: incremental_parsers) { + // Previous is and empty message because we populate message with the full generated text. + msg = parser->parse(msg, "", message); + } + res.parsed[i] = msg; + } + + return res; } DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config_map) { diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index df30670145..ed4fcca3a5 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -35,18 +35,14 @@ ParsedMessage DeepSeekR1ReasoningParser::parse( msg["content"] += delta_text; return msg; } - if (m_starts_with_thinking) { - m_think_tag_opened = true; - } - bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos; - if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos) { + if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) { // Thinking has started auto think_idx = delta_text.find(m_open_tag); msg["reasoning_content"] += delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size())); m_think_tag_opened = true; - } else if (m_think_tag_opened && delta_text.find(m_close_tag) != std::string::npos) { + } else if ((m_think_tag_opened || m_starts_with_thinking) && delta_text.find(m_close_tag) != std::string::npos) { auto think_idx = delta_text.find(m_close_tag); msg["reasoning_content"] += delta_text.substr(0, think_idx); msg["content"] += delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); @@ -126,6 +122,8 @@ std::map> ParserBase::registered_parser // static initializer to register available buildin parsers static bool register_backends() { IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared(); + IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared(); + IncrementalParserBase::registered_parsers["Phi-4-reasoning"] = std::make_shared(/*starts_with_thinking*/ false); ParserBase::registered_parsers[Llama32PythonicParser::name()] = std::make_shared(); return true; @@ -134,4 +132,16 @@ static bool register_backends() { // Ensure the backends are registered before main static bool are_backends_registered = register_backends(); +static std::vector get_parsers_names() { + if (!are_backends_registered) { + register_backends(); + } + + std::vector names; + for (const auto& [name, _] : IncrementalParserBase::registered_parsers) { + names.push_back(name); + } + return names; +} + } // namespace ov::genai diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index 0654abf7dc..a5238b4a35 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -131,11 +131,10 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector

(parser)) { auto parser_name = std::get(parser); - if (IncrementalParserBase::registered_parsers.find(parser_name) != IncrementalParserBase::registered_parsers.end()) { - m_parsers.push_back(IncrementalParserBase::registered_parsers[parser_name]); - } else { + if (IncrementalParserBase::registered_parsers.find(parser_name) == IncrementalParserBase::registered_parsers.end()) { OPENVINO_THROW("Parser with name " + parser_name + " is not registered"); } + m_parsers.push_back(IncrementalParserBase::registered_parsers[parser_name]); } else { m_parsers.push_back(std::get>(parser)); } @@ -154,6 +153,7 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) { // if (parser->is_active()) { m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message); // } + // m_parsed_message["content"] += message; } m_text_buffer = message; @@ -161,4 +161,4 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) { } } // namespace genai -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp index 9459b8dfc3..c446966772 100644 --- a/src/python/py_generation_config.cpp +++ b/src/python/py_generation_config.cpp @@ -280,6 +280,7 @@ void init_generation_config(py::module_& m) { .def_readwrite("include_stop_str_in_output", &GenerationConfig::include_stop_str_in_output) .def_readwrite("stop_token_ids", &GenerationConfig::stop_token_ids) .def_readwrite("structured_output_config", &GenerationConfig::structured_output_config) + .def_readwrite("parsers", &GenerationConfig::parsers) .def_readwrite("adapters", &GenerationConfig::adapters) .def_readwrite("apply_chat_template", &GenerationConfig::apply_chat_template) .def("set_eos_token_id", &GenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id")) diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index a2f8d16420..e643666b7e 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -129,7 +129,7 @@ void init_streamers(py::module_& m) { }, py::arg("token")); - py::class_>(m, "TextParserStreamer") + py::class_, TextStreamer>(m, "TextParserStreamer") .def(py::init([](const Tokenizer& tokenizer, std::vector, std::string>> parsers) { return std::make_shared(tokenizer, parsers); @@ -137,12 +137,16 @@ void init_streamers(py::module_& m) { py::arg("tokenizer"), py::arg("parsers") = std::vector, std::string>>({}), "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.") - .def("write", - py::overload_cast(&TextParserStreamer::write), - py::arg("message"), - "Write is called with a ParsedMessage. Returns StreamingStatus.") + // .def("write", + // py::overload_cast(&TextParserStreamer::write), + // py::arg("message"), + // "Write is called with a ParsedMessage. Returns StreamingStatus.") .def("write", py::overload_cast(&TextParserStreamer::write), py::arg("message"), - "Write is called with a string message. Returns CallbackTypeVariant."); + "Write is called with a string message. Returns CallbackTypeVariant.") + + .def("get_parsed_message", &TextParserStreamer::get_parsed_message, "Get the current parsed message") + + .def("get_parsers", &TextParserStreamer::get_parsers, "Get the list of parsers"); } diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index fd762381f0..d8a323f3bc 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -41,7 +41,7 @@ def hf_ov_genai_models(request, tmp_path_factory): [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], indirect=True ) -def test_non_string_chat_template(hf_ov_genai_models): +def test_parsers_1(hf_ov_genai_models): hf_tokenizer, genai_tokenizer = hf_ov_genai_models class CustomStreamer(TextParserStreamer): def write(self, message): @@ -49,8 +49,7 @@ def write(self, message): print(message["content"]) return True - breakpoint() - streamer = CustomStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"]) + streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"]) msg = {} stream_string = [ @@ -64,9 +63,56 @@ def write(self, message): " it", ".", "**\n", " \n", " ", " \\", "[\n", " " ] - for subword in stream_string: - msg = streamer.write(subword) + full_str = ''.join(stream_string) + think_content = full_str.split("")[0] + content = full_str.split("")[1] + + parsers = streamer.get_parsers() + + extended = stream_string[:] + extended.append("") + + for parser in parsers: + for (prev_subword, subword) in zip(extended, stream_string): + msg = parser.parse(msg, prev_subword, subword) + + assert msg['reasoning_content'] == think_content + assert msg['content'] == content + +def test_parsers_2(hf_ov_genai_models): + hf_tokenizer, genai_tokenizer = hf_ov_genai_models + class CustomStreamer(TextParserStreamer): + def write(self, message): + if "content" in message: + print(message["content"]) + return True + + streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"]) + + msg = {} + stream_string = [ + "<|begin▁of▁sentence|>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", + " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", + " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", + " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", + " ", "3", ".\n", "", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", + " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", + "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", + " it", ".", "**\n", " \n", " ", " \\", "[\n", " " + ] + + full_str = ''.join(stream_string) + think_content = full_str.split("")[0] + content = full_str.split("")[1] + + parsers = streamer.get_parsers() + + extended = stream_string[:] + extended.append("") + + for parser in parsers: + for (prev_subword, subword) in zip(extended, stream_string): + msg = parser.parse(msg, prev_subword, subword) - # for (prev_subword, subword) in zip(stream_string[:-1], stream_string[1:]): - # msg = streamer.write(msg, prev_subword, subword) - breakpoint() + assert msg['reasoning_content'] == think_content + assert msg['content'] == content From 3407d8fbf2865b062da1ae8d4f99b956584a8d29 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Tue, 30 Sep 2025 01:18:34 +0200 Subject: [PATCH 05/43] hide map with predefined initialized parsers; add TODOs --- .github/workflows/linux.yml | 2 +- .github/workflows/mac.yml | 2 +- .github/workflows/windows.yml | 2 +- samples/cpp/text_generation/CMakeLists.txt | 1 + .../text_generation/parsed_output_sample.cpp | 17 +---- src/cpp/include/openvino/genai/parsers.hpp | 42 +++++++++---- .../include/openvino/genai/text_streamer.hpp | 2 +- src/cpp/src/continuous_batching/pipeline.cpp | 4 +- src/cpp/src/llm/pipeline.cpp | 30 +++------ src/cpp/src/parsers.cpp | 63 +++++++++++++------ src/cpp/src/parsers.hpp | 48 -------------- src/cpp/src/text_streamer.cpp | 22 +++---- src/python/py_parsers.cpp | 10 +-- src/python/py_streamers.cpp | 11 ++-- tests/cpp/parser.cpp | 12 ++-- tests/python_tests/test_parsers.py | 2 + tests/python_tests/test_text_streamer.py | 14 +++++ 17 files changed, 137 insertions(+), 147 deletions(-) delete mode 100644 src/cpp/src/parsers.hpp diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 7ff8c29af3..9500add7c3 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -782,7 +782,7 @@ jobs: run: | source ${{ env.INSTALL_DIR }}/setupvars.sh chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching - ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" + ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*" - name: Test Continuous Batching Tools if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching }} diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 5620804095..d1be431238 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -695,7 +695,7 @@ jobs: run: | source ${{ env.INSTALL_DIR }}/setupvars.sh chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching - ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" + ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.* - name: Test C++ Tools run: | diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index bb544bc0cf..fbd082bede 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -866,7 +866,7 @@ jobs: - name: gtests unit tests run: | . "${{ env.INSTALL_DIR }}/setupvars.ps1" - & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*" + & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*" --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.* - name: Test C++ Tools run: | diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt index ebaf32c7f4..7493362e81 100644 --- a/samples/cpp/text_generation/CMakeLists.txt +++ b/samples/cpp/text_generation/CMakeLists.txt @@ -29,6 +29,7 @@ set (SAMPLE_LIST lora_greedy_causal_lm multinomial_causal_lm prompt_lookup_decoding_lm + parsed_output_sample speculative_decoding_lm) foreach(sample IN LISTS SAMPLE_LIST) diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp index ada4af6751..1b481a2a4a 100644 --- a/samples/cpp/text_generation/parsed_output_sample.cpp +++ b/samples/cpp/text_generation/parsed_output_sample.cpp @@ -5,29 +5,14 @@ #include "openvino/genai/parsers.hpp" #include "openvino/genai/text_streamer.hpp" -using ov::genai::ParsingState; class CurrentStreamer : public ov::genai::TextParserStreamer { private: - ParsingState m_previous_state = ParsingState::UNDEFINED; public: CurrentStreamer(const ov::genai::Tokenizer& tokenizer) : ov::genai::TextParserStreamer(tokenizer) {} - ov::genai::StreamingStatus write(const ov::genai::ParsedMessage& message) { - - // if (m_previous_state == ParsingState::UNDEFINED && message["state"] == ParsingState::REASONING) { - // std::cout << "Reasoning: " << std::endl; - // std::cout << message["reasoning_content"].value(); - // } else if (m_previous_state == ParsingState::REASONING && message["state"] == ParsingState::CONTENT) { - // std::cout << std::endl << "Content: " << std::endl; - // std::cout << message["content"].value(); - // } else if (m_previous_state == ParsingState::REASONING && message["state"] == ParsingState::REASONING) { - // std::cout << message["reasoning_content"].value(); - // } else if (m_previous_state == ParsingState::CONTENT && message["state"] == ParsingState::CONTENT) { - // std::cout << message["content"].value(); - // } + ov::genai::StreamingStatus write(ov::genai::ParsedMessage& message) { std::cout << message.at("content"); - return ov::genai::StreamingStatus::RUNNING; } }; diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 7030acbf9a..020176d117 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -19,57 +20,74 @@ class IncrementalParserBase { public: IncrementalParserBase() = default; - virtual ParsedMessage parse( + // We return string which with filtered text to be added to content. + virtual std::string parse( ParsedMessage& msg, const std::string& previous_text, - const std::string& delta_text, + std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) = 0; virtual bool is_active() const = 0; - static std::map> registered_parsers; + static std::shared_ptr get_parser(std::string name); }; -class DeepSeekR1ReasoningParser : public IncrementalParserBase { +class ReasoningParser : public IncrementalParserBase { private: bool m_starts_with_thinking = true; + bool m_keep_original_content = true; bool m_think_tag_opened = false; bool m_deactivated = false; std::string m_open_tag = ""; std::string m_close_tag = ""; public: - DeepSeekR1ReasoningParser(bool starts_with_thinking = true) : m_starts_with_thinking(starts_with_thinking) {}; + ReasoningParser(bool starts_with_thinking = true, + bool keep_original_content = true) + : m_starts_with_thinking(starts_with_thinking), + m_keep_original_content(keep_original_content) {} std::map accumulated_parsed; - ParsedMessage parse( + std::string parse( ParsedMessage& msg, const std::string& previous_text, - const std::string& delta_text, + std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) override; - static std::string name() { return "DeepSeekR1ReasoningParser"; } bool is_active() const override; }; +class DeepSeekR1ReasoningParser : public ReasoningParser { +public: + DeepSeekR1ReasoningParser(bool starts_with_thinking = true) : ReasoningParser(starts_with_thinking) {}; + static std::string name() { return "DeepSeekR1ReasoningParser"; } +}; + +class Phi4ReasoningParser : public ReasoningParser { +public: + Phi4ReasoningParser(bool starts_with_thinking = false) : ReasoningParser(starts_with_thinking) {}; + static std::string name() { return "Phi4ReasoningParser"; } +}; + class ParserBase { public: ParserBase() = default; virtual ParsedMessage parse(ParsedMessage& text) = 0; - static std::map> registered_parsers; + static std::shared_ptr get_parser(std::string name); }; using ParserVariant = std::variant, std::string>; -class Llama32PythonicParser : public ParserBase { +class Llama32PythonicToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: - Llama32PythonicParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} + // TODO: Check that vLLM has the same default. + Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} ParsedMessage parse(ParsedMessage& input) override; - static std::string name() { return "Llama32PythonicParser"; } + static std::string name() { return "Llama32PythonicToolParser"; } private: bool m_keep_original_content = true; }; diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 872e79dbf9..055adbfbe2 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -51,7 +51,7 @@ class TextParserStreamer : public TextStreamer { public: TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers = {}); - virtual StreamingStatus write(ParsedMessage& message); + virtual StreamingStatus write(ParsedMessage& message) = 0; CallbackTypeVariant write(std::string message); diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp index 404ee620e1..25256c8d70 100644 --- a/src/cpp/src/continuous_batching/pipeline.cpp +++ b/src/cpp/src/continuous_batching/pipeline.cpp @@ -58,7 +58,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p auto model = utils::read_model(models_path, properties); auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model); - properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; + // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; auto tokenizer = ov::genai::Tokenizer(models_path, tokenizer_properties); auto generation_config = utils::from_config_json_if_exists(models_path); @@ -98,7 +98,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( auto model = utils::read_model(models_path, properties_without_draft_model); auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model); - properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; + // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; auto generation_config = utils::from_config_json_if_exists(models_path); diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 38f00e4e45..43f90980a6 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -207,16 +207,15 @@ DecodedResults LLMPipeline::generate( StreamerVariant streamer) { auto res = m_pimpl->generate(inputs, generation_config, streamer); - std::vector> incremental_parsers; // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message if (auto streamer_obj = std::get_if>(&streamer)) { if (auto parser_streamer = std::dynamic_pointer_cast(*streamer_obj)) { - incremental_parsers = parser_streamer->get_parsers(); + res.parsed.resize(res.texts.size()); + res.parsed[0] = parser_streamer->get_parsed_message(); } } - - if (incremental_parsers.empty() && (!generation_config.has_value() || (*generation_config).parsers.empty())) { + if (!generation_config.has_value() || (*generation_config).parsers.empty()) { return res; } @@ -225,10 +224,11 @@ DecodedResults LLMPipeline::generate( for (auto& parser_variant : (*generation_config).parsers) { if (std::holds_alternative(parser_variant)) { auto parser_name = std::get(parser_variant); - if (ParserBase::registered_parsers.find(parser_name) == ParserBase::registered_parsers.end()) { + auto parser = ParserBase::get_parser(parser_name); + if (!parser) { OPENVINO_THROW("Parser with name ", parser_name, " is not registered"); } - parsers.push_back(ParserBase::registered_parsers[parser_name]); + parsers.push_back(parser); } else if (std::holds_alternative>(parser_variant)) { auto parser = std::get>(parser_variant); parsers.push_back(parser); @@ -238,28 +238,18 @@ DecodedResults LLMPipeline::generate( res.parsed.resize(res.texts.size()); - // BaseParsers have priority over IncrementalParsers + // Apply Base parsers sequentially even if IncrementalParser has run. if (!parsers.empty()) { for (size_t i = 0; i < res.texts.size(); ++i) { auto& message = res.texts[i]; - ParsedMessage msg; + ParsedMessage& msg = res.parsed[i]; for (auto& parser: parsers) { + // TODO: check if is_active() is needed here + // TODO: Check the state of incremental parser and reset if necessary msg = parser->parse(msg); } res.parsed[i] = msg; } - return res; - } - - // At this place we have only IncrementalParsers - for (size_t i = 0; i < res.texts.size(); ++i) { - auto& message = res.texts[i]; - ParsedMessage msg; - for (auto& parser: incremental_parsers) { - // Previous is and empty message because we populate message with the full generated text. - msg = parser->parse(msg, "", message); - } - res.parsed[i] = msg; } return res; diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index ed4fcca3a5..2a89716370 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -13,14 +13,14 @@ using json = nlohmann::json; namespace ov::genai { -bool DeepSeekR1ReasoningParser::is_active() const { +bool ReasoningParser::is_active() const { return !m_deactivated; } -ParsedMessage DeepSeekR1ReasoningParser::parse( +std::string ReasoningParser::parse( ParsedMessage& msg, const std::string& previous_text, - const std::string& delta_text, + std::string& delta_text, const std::optional>& previous_tokens, const std::optional>& delta_tokens ) { @@ -31,10 +31,6 @@ ParsedMessage DeepSeekR1ReasoningParser::parse( msg["content"] = ""; } - if (m_deactivated) { - msg["content"] += delta_text; - return msg; - } bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos; if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) { @@ -42,21 +38,30 @@ ParsedMessage DeepSeekR1ReasoningParser::parse( auto think_idx = delta_text.find(m_open_tag); msg["reasoning_content"] += delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size())); m_think_tag_opened = true; + if (!m_keep_original_content) { + delta_text = ""; + } } else if ((m_think_tag_opened || m_starts_with_thinking) && delta_text.find(m_close_tag) != std::string::npos) { auto think_idx = delta_text.find(m_close_tag); msg["reasoning_content"] += delta_text.substr(0, think_idx); msg["content"] += delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); m_think_tag_opened = false; m_deactivated = true; + if (!m_keep_original_content) { + delta_text = delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); + } } else if (m_think_tag_opened) { msg["reasoning_content"] += delta_text; - } + if (!m_keep_original_content) { + delta_text = ""; + } + } // TODO: add case when and are in the same delta_text - return msg; + return delta_text; } -ParsedMessage Llama32PythonicParser::parse(ParsedMessage& input) { +ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) { // Input example // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; @@ -116,29 +121,51 @@ ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) { return res; } -std::map> IncrementalParserBase::registered_parsers; -std::map> ParserBase::registered_parsers; +std::map()>> registered_incremental_parsers; +std::map()>> registered_base_parsers; // static initializer to register available buildin parsers static bool register_backends() { - IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared(); - IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared(); - IncrementalParserBase::registered_parsers["Phi-4-reasoning"] = std::make_shared(/*starts_with_thinking*/ false); + registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = []() { return std::make_shared(/*starts_with_thinking*/ true); }; + registered_incremental_parsers[Phi4ReasoningParser::name()] = []() { return std::make_shared(/*starts_with_thinking*/ false); }; + + registered_base_parsers[Llama32PythonicToolParser::name()] = []() { return std::make_shared(); }; - ParserBase::registered_parsers[Llama32PythonicParser::name()] = std::make_shared(); + // TODO: Add more parsers and register them here. return true; } // Ensure the backends are registered before main static bool are_backends_registered = register_backends(); -static std::vector get_parsers_names() { +std::shared_ptr IncrementalParserBase::get_parser(std::string name) { + if (!are_backends_registered) { + register_backends(); + } + + if (registered_incremental_parsers.find(name) != registered_incremental_parsers.end()) { + return registered_incremental_parsers[name](); + } + return nullptr; +} + +std::shared_ptr ParserBase::get_parser(std::string name) { if (!are_backends_registered) { register_backends(); } + if (registered_base_parsers.find(name) != registered_base_parsers.end()) { + return registered_base_parsers[name](); + } + return nullptr; +} + +static std::vector get_parsers_names() { std::vector names; - for (const auto& [name, _] : IncrementalParserBase::registered_parsers) { + for (const auto& [name, _] : registered_incremental_parsers) { + names.push_back(name); + } + for (const auto& [name, _] : registered_base_parsers) { names.push_back(name); } return names; diff --git a/src/cpp/src/parsers.hpp b/src/cpp/src/parsers.hpp deleted file mode 100644 index 21f474e089..0000000000 --- a/src/cpp/src/parsers.hpp +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (C) 2023-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once -#include "openvino/genai/text_streamer.hpp" - -namespace ov { -namespace genai { - -struct DeltaToolCall; // Forward declaration, define as needed - -struct DeltaMessage { - std::optional role; - std::optional content; - std::optional reasoning_content; - // std::vector tool_calls; - - DeltaMessage() - : role(std::nullopt), - content(std::nullopt), - reasoning_content(std::nullopt) {} -}; - -class TextParserStreamer : public ov::genai::TextStreamer { -public: - TextParserStreamer(const Tokenizer& tokenizer); - - StreamingStatus write(const DeltaMessage& message); - - ov::genai::CallbackTypeVariant write(std::string message); -}; - -class ReasoningParserBase { -public: - ReasoningParserBase() = default; - - void parse(const std::string& text); -}; - -class ToolCallingParserBase { -public: - ToolCallingParserBase() = default; - - void parse(const std::string& text); -}; - -} // namespace genai -} // namespace ov diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index a5238b4a35..3b1519e42d 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -131,29 +131,25 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector

(parser)) { auto parser_name = std::get(parser); - if (IncrementalParserBase::registered_parsers.find(parser_name) == IncrementalParserBase::registered_parsers.end()) { + auto parser = IncrementalParserBase::get_parser(parser_name); + if (!parser) { OPENVINO_THROW("Parser with name " + parser_name + " is not registered"); } - m_parsers.push_back(IncrementalParserBase::registered_parsers[parser_name]); + m_parsers.push_back(parser); } else { m_parsers.push_back(std::get>(parser)); } } } -StreamingStatus TextParserStreamer::write(ParsedMessage& message) { - if (message.find("content") != message.end()) { - std::cout << message.at("content") << std::endl; - } - return StreamingStatus::RUNNING; -} - CallbackTypeVariant TextParserStreamer::write(std::string message) { for (auto& parser: m_parsers) { - // if (parser->is_active()) { - m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message); - // } - // m_parsed_message["content"] += message; + if (parser->is_active()) { + message = parser->parse(m_parsed_message, m_text_buffer, message); + } + // Message can be modified inside parser, if parser for example extracted tool calling from message content + // but parser + m_parsed_message["content"] += message; } m_text_buffer = message; diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 1d3f066334..d2ff74b47d 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -26,15 +26,15 @@ namespace { class ConstructableIncrementalParserBase: public IncrementalParserBase { public: - ParsedMessage parse( + std::string parse( ParsedMessage& msg, const std::string& previous_text, - const std::string& delta_text, + std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) override { PYBIND11_OVERRIDE_PURE( - ParsedMessage, // Return type + std::string, // Return type IncrementalParserBase, // Parent class parse, // Name of function in C++ (must match Python name) msg, @@ -68,6 +68,7 @@ class ConstructableParserBase: public ParserBase { } // namespace +// TODO: double check/add more relevant docstrings for parsers. void init_parsers(py::module_& m) { py::class_>(m, "IncrementalParserBase") .def(py::init<>()) @@ -87,5 +88,4 @@ void init_parsers(py::module_& m) { &ParserBase::parse, "Parse is called with the full text. Returns a ParsedMessage with parsed content.", py::arg("text")); - -} \ No newline at end of file +} diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index e643666b7e..84b531b9f5 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -128,7 +128,8 @@ void init_streamers(py::module_& m) { } }, py::arg("token")); - + + // TODO: double check/add more relevant docstrings for TextParserStreamer. py::class_, TextStreamer>(m, "TextParserStreamer") .def(py::init([](const Tokenizer& tokenizer, std::vector, std::string>> parsers) { @@ -137,10 +138,10 @@ void init_streamers(py::module_& m) { py::arg("tokenizer"), py::arg("parsers") = std::vector, std::string>>({}), "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.") - // .def("write", - // py::overload_cast(&TextParserStreamer::write), - // py::arg("message"), - // "Write is called with a ParsedMessage. Returns StreamingStatus.") + .def("write", + py::overload_cast(&TextParserStreamer::write), + py::arg("message"), + "Write is called with a ParsedMessage. Returns StreamingStatus.") .def("write", py::overload_cast(&TextParserStreamer::write), py::arg("message"), diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index 0018dbd461..f4d7922b7c 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -44,7 +44,7 @@ TEST(ParserTest, test_llama32_parser_1) { }} } }); - std::shared_ptr parser = std::make_shared(); + std::shared_ptr parser = std::make_shared(); nlohmann::json res = run_parser_test(parser, prompt, expected); @@ -67,7 +67,7 @@ TEST(ParserTest, test_llama32_parser_2) { }} } }); - auto parser = std::make_shared(/*keep_original_content*/ false); + auto parser = std::make_shared(/*keep_original_content*/ false); nlohmann::json res = run_parser_test(parser, prompt, expected); @@ -112,7 +112,7 @@ TEST(ParserTest, test_reasoning_parser_2) { class DeepSeekR1ReasoningParserTest : public ::testing::Test { protected: - ov::genai::DeepSeekR1ReasoningParser parser; + ov::genai::ReasoningParser parser; ParsedMessage msg; }; @@ -131,10 +131,14 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) { std::string ref_res = "First, I recognize that the question is asking for the sum of 2 and 1.\n\nI know that addition involves combining two numbers to find their total.\n\nStarting with 2, I add 1 to it.\n\n2 plus 1 equals 3.\n"; ParsedMessage msg; + + for (int i = 1; i < input_stream.size(); i++) { std::string previous_text = input_stream[i - 1]; std::string delta_text = input_stream[i]; - msg = parser.parse(msg, previous_text, delta_text); + delta_text = parser.parse(msg, previous_text, delta_text); } ASSERT_EQ(msg["reasoning_content"], ref_res); } + +// TODO: add tests when streamer is called directly instead of manual subsequent calling of parsers. diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index d8a323f3bc..90f7ad93fd 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -116,3 +116,5 @@ def write(self, message): assert msg['reasoning_content'] == think_content assert msg['content'] == content + +# TODO: add tests when streamer is called directly instead of manual subsequent calling of parsers. diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py index 75804256b1..9b834bf3b7 100644 --- a/tests/python_tests/test_text_streamer.py +++ b/tests/python_tests/test_text_streamer.py @@ -72,6 +72,20 @@ def test_text_prompts(tmp_path, prompt, model_id): streamer.write(token) streamer.end() + class CurrentStremaer(BaseStreamer): + def write(self, token_chunk): + pass + + class CurrentParsingStreamer(TextParserStreamer): + def write(self, word: str): + msg: ParsedMessage = get_current_message() + + + streamer = lambda x: print(x) + + streamer = TextStreamer(ov_tokenizer, lambda x: print(x)) + + assert ''.join(accumulated) == ov_tokenizer.decode(tokens) for chunk_size in [1,2,3,4,5]: From 219827a415fe2f4cd3d4ed91cf82f6150be1aa66 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Tue, 14 Oct 2025 17:23:40 +0200 Subject: [PATCH 06/43] use JsonContainer --- .../text_generation/parsed_output_sample.cpp | 2 +- samples/python/text_generation/chat_sample.py | 33 +++++ .../include/openvino/genai/llm_pipeline.hpp | 4 +- src/cpp/include/openvino/genai/parsers.hpp | 21 ++-- src/cpp/src/llm/pipeline.cpp | 5 +- src/cpp/src/parsers.cpp | 116 ++++++++++++------ src/cpp/src/text_streamer.cpp | 4 +- src/python/py_openvino_genai.cpp | 3 +- src/python/py_parsers.cpp | 34 +++-- src/python/py_streamers.cpp | 6 +- tests/cpp/parser.cpp | 24 +--- tests/python_tests/test_parsers.py | 92 +++++++++----- tests/python_tests/test_text_streamer.py | 2 +- 13 files changed, 229 insertions(+), 117 deletions(-) diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp index 1b481a2a4a..6efa64ee5e 100644 --- a/samples/cpp/text_generation/parsed_output_sample.cpp +++ b/samples/cpp/text_generation/parsed_output_sample.cpp @@ -12,7 +12,7 @@ class CurrentStreamer : public ov::genai::TextParserStreamer { CurrentStreamer(const ov::genai::Tokenizer& tokenizer) : ov::genai::TextParserStreamer(tokenizer) {} ov::genai::StreamingStatus write(ov::genai::ParsedMessage& message) { - std::cout << message.at("content"); + std::cout << message["content"].get_string() << std::flush; return ov::genai::StreamingStatus::RUNNING; } }; diff --git a/samples/python/text_generation/chat_sample.py b/samples/python/text_generation/chat_sample.py index e4067c49f3..b852141d3c 100755 --- a/samples/python/text_generation/chat_sample.py +++ b/samples/python/text_generation/chat_sample.py @@ -36,3 +36,36 @@ def main(): if '__main__' == __name__: main() + + pipe = openvino_genai.LLMPipeline(args.model_dir, device) + + prompt = "What is the weather in New York today?" + res = pipe.generate(prompt, max_new_tokens=100, streamer=streamer) + print(res.texts[0]) + + res.parsed['tool_caling'] + + class LlamaToolCallParser(ParserBase): + def parse(self, parsed_data: ParsedData) -> ParsedData: + # parsed_data + # process parsed_data + # e.g. extract tool calls, or other fields from content + return new_parsed_output + + llama_parser = LlamaToolCallParser() + res = pipe.generate(prompt, parsers=[llama_parser | "LLama3.2Pythonic"], max_new_tokens=100) + +# At the beginning msg['original_content'] is filled with full text +msg = res.texts[i] +for parser in m_parsers: + msg = parser.parse(msg) + +# At the end msg is filled with all parsed fields +parsed_data = { + 'original_content': '<|system|>You are a helpful assistant... I will call the `get_weather` function with the location… \n\nfunctools[{"name": "get_weather", "arguments": {"location": "New York", "unit": "celsius"}}]<|end|>', + 'content': 'blah blah', + 'reasoning_content': '', + 'tool_calls': "[{\"name\":\"get_weather\",\"arguments\":{\"location\":\"New York, NY\",\"unit\":\"celsius\"}}]", +} + +res.parsed: ParsedData \ No newline at end of file diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp index 4f2c8405f1..6385ced995 100644 --- a/src/cpp/include/openvino/genai/llm_pipeline.hpp +++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp @@ -15,6 +15,7 @@ #include "openvino/genai/perf_metrics.hpp" #include "openvino/genai/scheduler_config.hpp" #include "openvino/genai/common_types.hpp" +#include "openvino/genai/json_container.hpp" namespace ov { namespace genai { @@ -68,7 +69,8 @@ class DecodedResults { std::vector scores; PerfMetrics perf_metrics; std::shared_ptr extended_perf_metrics; - std::vector parsed; + // std::vector parsed; + std::vector parsed; // @brief Convert DecodedResults to a string. operator std::string() const { diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 020176d117..479dd2d4de 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -9,12 +9,14 @@ #include #include #include +#include "openvino/genai/json_container.hpp" namespace ov { namespace genai { - -using ParsedMessage = std::map; +// TODO: will be converted to JSONLike object +// using ParsedMessage = std::map; +using ParsedMessage = JsonContainer; class IncrementalParserBase { public: @@ -33,20 +35,15 @@ class IncrementalParserBase { static std::shared_ptr get_parser(std::string name); }; +// Forward declaration +class ReasoningParserImpl; + class ReasoningParser : public IncrementalParserBase { private: - bool m_starts_with_thinking = true; - bool m_keep_original_content = true; - bool m_think_tag_opened = false; - bool m_deactivated = false; - std::string m_open_tag = ""; - std::string m_close_tag = ""; + std::shared_ptr m_impl; public: ReasoningParser(bool starts_with_thinking = true, - bool keep_original_content = true) - : m_starts_with_thinking(starts_with_thinking), - m_keep_original_content(keep_original_content) {} - std::map accumulated_parsed; + bool keep_original_content = true); std::string parse( ParsedMessage& msg, diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 43f90980a6..41b678d388 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -241,14 +241,13 @@ DecodedResults LLMPipeline::generate( // Apply Base parsers sequentially even if IncrementalParser has run. if (!parsers.empty()) { for (size_t i = 0; i < res.texts.size(); ++i) { - auto& message = res.texts[i]; - ParsedMessage& msg = res.parsed[i]; + ParsedMessage msg; + msg["content"] = res.texts[i]; for (auto& parser: parsers) { // TODO: check if is_active() is needed here // TODO: Check the state of incremental parser and reset if necessary msg = parser->parse(msg); } - res.parsed[i] = msg; } } diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 2a89716370..01560941cb 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -13,8 +13,72 @@ using json = nlohmann::json; namespace ov::genai { -bool ReasoningParser::is_active() const { - return !m_deactivated; +class ReasoningParserImpl { +private: + bool m_starts_with_thinking = true; + bool m_keep_original_content = true; + bool m_think_tag_opened = false; + std::string m_open_tag = ""; + std::string m_close_tag = ""; + std::map accumulated_parsed; +public: + bool m_deactivated = false; + ReasoningParserImpl() = default; + ReasoningParserImpl(bool starts_with_thinking = true, + bool keep_original_content = true) + : m_starts_with_thinking(starts_with_thinking), + m_keep_original_content(keep_original_content) {} + + std::string parse( + ParsedMessage& msg, + const std::string& previous_text, + std::string& delta_text, + const std::optional>& previous_tokens, + const std::optional>& delta_tokens + ) { + if (msg["reasoning_content"].is_null()) { + msg["reasoning_content"] = ""; + } + if (msg["content"].is_null()) { + msg["content"] = ""; + } + + bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos; + if (m_starts_with_thinking) { + m_think_tag_opened = true; + } + + if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) { + // Thinking has started + auto think_idx = delta_text.find(m_open_tag); + auto lvalue = msg["reasoning_content"].get_string(); + msg["reasoning_content"] = lvalue + delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size())); + m_think_tag_opened = true; + if (!m_keep_original_content) { + delta_text = ""; + } + } else if (m_think_tag_opened && delta_text.find(m_close_tag) != std::string::npos) { + auto think_idx = delta_text.find(m_close_tag); + msg["reasoning_content"] = msg["reasoning_content"].get_string() + delta_text.substr(0, think_idx); + msg["content"] = msg["content"].get_string() + delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); + m_think_tag_opened = false; + m_deactivated = true; + if (!m_keep_original_content) { + delta_text = delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); + } + } else if (m_think_tag_opened) { + msg["reasoning_content"] = msg["reasoning_content"].get_string() + delta_text; + if (!m_keep_original_content) { + delta_text = ""; + } + } // TODO: add case when and are in the same delta_text + + return delta_text; + } +}; + +ReasoningParser::ReasoningParser(bool starts_with_thinking, bool keep_original_content) { + m_impl = std::make_shared(starts_with_thinking, keep_original_content); } std::string ReasoningParser::parse( @@ -24,42 +88,12 @@ std::string ReasoningParser::parse( const std::optional>& previous_tokens, const std::optional>& delta_tokens ) { - if (msg.find("reasoning_content") == msg.end()) { - msg["reasoning_content"] = ""; - } - if (msg.find("content") == msg.end()) { - msg["content"] = ""; - } - - bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos; - - if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) { - // Thinking has started - auto think_idx = delta_text.find(m_open_tag); - msg["reasoning_content"] += delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size())); - m_think_tag_opened = true; - if (!m_keep_original_content) { - delta_text = ""; - } - } else if ((m_think_tag_opened || m_starts_with_thinking) && delta_text.find(m_close_tag) != std::string::npos) { - auto think_idx = delta_text.find(m_close_tag); - msg["reasoning_content"] += delta_text.substr(0, think_idx); - msg["content"] += delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); - m_think_tag_opened = false; - m_deactivated = true; - if (!m_keep_original_content) { - delta_text = delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); - } - } else if (m_think_tag_opened) { - msg["reasoning_content"] += delta_text; - if (!m_keep_original_content) { - delta_text = ""; - } - } // TODO: add case when and are in the same delta_text - - return delta_text; + return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens); } +bool ReasoningParser::is_active() const { + return !m_impl->m_deactivated; +} ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) { // Input example @@ -67,7 +101,7 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) { // Regex to capture the [...] part smatch m; - const std::string& text = input.at("content"); + const std::string& text = input["content"].get_string(); regex r(R"(\[.*?\])"); if (regex_search(text, m, r)) { // Strip outer [ ] @@ -92,7 +126,8 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) { if (!m_keep_original_content) { input["content"] = regex_replace(text, r, ""); } - input["tool_calls"] = j.dump(); + std::cout << j.dump() << std::endl; + input["tool_calls"] = ParsedMessage::from_json_string(j.dump()); return input; } return ParsedMessage{}; @@ -101,7 +136,8 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) { ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) { ParsedMessage res; std::string reasoning_content; - const std::string& content = input.at("content"); + // auto content = input["content"]; + std::string content = input["content"].get_string(); res["content"] = content; size_t start = content.find(m_open_tag); @@ -171,4 +207,6 @@ static std::vector get_parsers_names() { return names; } + + } // namespace ov::genai diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index 3b1519e42d..5ffc34689f 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -148,8 +148,8 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) { message = parser->parse(m_parsed_message, m_text_buffer, message); } // Message can be modified inside parser, if parser for example extracted tool calling from message content - // but parser - m_parsed_message["content"] += message; + // but parser + m_parsed_message["content"] = m_parsed_message["content"].get_string() + message; } m_text_buffer = message; diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp index ed4aa8d3af..aa2854480c 100644 --- a/src/python/py_openvino_genai.cpp +++ b/src/python/py_openvino_genai.cpp @@ -93,6 +93,7 @@ PYBIND11_MODULE(py_openvino_genai, m) { .def(py::init<>()) .def_property_readonly("texts", [](const DecodedResults &dr) -> py::typing::List { return pyutils::handle_utf8((std::vector)dr); }) .def_readonly("scores", &DecodedResults::scores) + .def_readonly("parsed", &DecodedResults::parsed) .def_readonly("perf_metrics", &DecodedResults::perf_metrics) .def_readonly("extended_perf_metrics", &DecodedResults::extended_perf_metrics) .def("__str__", [](const DecodedResults &dr) -> py::str { @@ -115,10 +116,10 @@ PYBIND11_MODULE(py_openvino_genai, m) { .def_readonly("extended_perf_metrics", &EncodedResults::extended_perf_metrics); init_lora_adapter(m); + init_parsers(m); init_generation_config(m); init_tokenizer(m); init_streamers(m); - init_parsers(m); init_llm_pipeline(m); init_continuous_batching_pipeline(m); diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index d2ff74b47d..2cf26e4b19 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -9,6 +9,7 @@ #include "openvino/genai/parsers.hpp" #include "py_utils.hpp" +#include "openvino/genai/json_container.hpp" namespace py = pybind11; @@ -18,6 +19,7 @@ using ov::genai::ParserVariant; using ov::genai::ParserBase; using ov::genai::Tokenizer; using ov::genai::StreamingStatus; +using ov::genai::JsonContainer; namespace pyutils = ov::genai::pybind::utils; @@ -72,15 +74,31 @@ class ConstructableParserBase: public ParserBase { void init_parsers(py::module_& m) { py::class_>(m, "IncrementalParserBase") .def(py::init<>()) - .def("parse", - &IncrementalParserBase::parse, - "Parse is called every time new text delta is decoded. Returns a ParsedMessage with parsed content.", - py::arg("msg"), - py::arg("previous_text"), - py::arg("delta_text"), - py::arg("previous_tokens") = std::nullopt, - py::arg("delta_tokens") = std::nullopt) + .def("parse", [](IncrementalParserBase& self, + py::dict& msg, + std::string& previous_text, + std::string& delta_text, + const std::optional>& previous_tokens = std::nullopt, + const std::optional>& delta_tokens = std::nullopt) { + // TODO: optimize conversion between py::dict and ParsedMessage + auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg); + auto msg_cpp = JsonContainer(msg_anymap); + + + auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens); + msg.clear(); + + auto json_obj = msg_cpp.to_json(); + for (auto it = json_obj.begin(); it != json_obj.end(); ++it) { + msg[py::cast(it.key())] = py::cast(it.value()); + } + + return res; + }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), + py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt, + "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.") .def("is_active", &IncrementalParserBase::is_active, "Indicates whether the parser is active and should be used during parsing."); + py::class_>(m, "ParserBase") .def(py::init<>()) diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 84b531b9f5..2ea1f72880 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -75,7 +75,7 @@ class ConstructableTextParserStreamer: public TextParserStreamer { using TextParserStreamer::TextParserStreamer; // inherit base constructors StreamingStatus write(ParsedMessage& message) override { - PYBIND11_OVERRIDE( + PYBIND11_OVERRIDE_PURE( StreamingStatus, // Return type TextParserStreamer, // Parent class write, // Name of function in C++ (must match Python name) @@ -142,10 +142,10 @@ void init_streamers(py::module_& m) { py::overload_cast(&TextParserStreamer::write), py::arg("message"), "Write is called with a ParsedMessage. Returns StreamingStatus.") - .def("write", + .def("_write", py::overload_cast(&TextParserStreamer::write), py::arg("message"), - "Write is called with a string message. Returns CallbackTypeVariant.") + "Write is called with a string message. Returns CallbackTypeVariant. This is a private method.") .def("get_parsed_message", &TextParserStreamer::get_parsed_message, "Get the current parsed message") diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index f4d7922b7c..dc15ac1482 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -9,22 +9,10 @@ using namespace ov::genai; -nlohmann::json convert_to_json(const ParsedMessage& msg) { - nlohmann::json j; - for (const auto& [key, value] : msg) { - if (key == "tool_calls") { - j[key] = nlohmann::json::parse(value); - continue; - } - j[key] = value; - } - return j; -} - -nlohmann::json run_parser_test(std::shared_ptr parser, const std::string& prompt, const nlohmann::json& expected) { +nlohmann::json run_parser_test(std::shared_ptr parser, const std::string& prompt) { ParsedMessage input; input["content"] = prompt; - return convert_to_json(parser->parse(input)); + return (parser->parse(input)).to_json(); } @@ -46,7 +34,7 @@ TEST(ParserTest, test_llama32_parser_1) { }); std::shared_ptr parser = std::make_shared(); - nlohmann::json res = run_parser_test(parser, prompt, expected); + nlohmann::json res = run_parser_test(parser, prompt); ASSERT_EQ(res, expected); } @@ -69,7 +57,7 @@ TEST(ParserTest, test_llama32_parser_2) { }); auto parser = std::make_shared(/*keep_original_content*/ false); - nlohmann::json res = run_parser_test(parser, prompt, expected); + nlohmann::json res = run_parser_test(parser, prompt); ASSERT_EQ(res, expected); } @@ -87,7 +75,7 @@ TEST(ParserTest, test_reasoning_parser_1) { /*keep_original_content*/ false ); - nlohmann::json res = run_parser_test(parser, prompt, expected); + nlohmann::json res = run_parser_test(parser, prompt); ASSERT_EQ(res, expected); } @@ -105,7 +93,7 @@ TEST(ParserTest, test_reasoning_parser_2) { /*keep_original_content*/ true ); - nlohmann::json res = run_parser_test(parser, prompt, expected); + nlohmann::json res = run_parser_test(parser, prompt); ASSERT_EQ(res, expected); } diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 90f7ad93fd..849c360a63 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -7,9 +7,11 @@ import numpy as np import openvino import pytest -from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer +from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus from transformers import AutoTokenizer from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model +import re +import textwrap @pytest.fixture(scope="module") @@ -43,42 +45,76 @@ def hf_ov_genai_models(request, tmp_path_factory): ) def test_parsers_1(hf_ov_genai_models): hf_tokenizer, genai_tokenizer = hf_ov_genai_models + + answer = "\nOkay, the user is asking for the answer to 2 + 1. Let me make sure I understand the question correctly. They want a short answer, so I shouldn't overcomplicate things. Basic addition here. Two plus one equals three. Yeah, that's straightforward. I need to respond with the answer inside a box using the specified format. Let me double-check the arithmetic to avoid any mistakes. Yep, 2 + 1 is definitely 3. Alright, time to put it in the box.\n\n\nThe answer to 2 + 1 is \boxed{3}." + stream_string = re.split(r"(\s+)", answer) + class CustomStreamer(TextParserStreamer): def write(self, message): - if "content" in message: - print(message["content"]) - return True - - streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"]) + msg.update(message) + return StreamingStatus.RUNNING + streamer = CustomStreamer(genai_tokenizer, parsers=["Phi4ReasoningParser"]) msg = {} - stream_string = [ - "<|begin▁of▁sentence|>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", - " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", - " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", - " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", - " ", "3", ".\n", "", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", - " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", - "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", - " it", ".", "**\n", " \n", " ", " \\", "[\n", " " - ] + for subword in stream_string: + streamer._write(subword) - full_str = ''.join(stream_string) - think_content = full_str.split("")[0] - content = full_str.split("")[1] + # breakpoint() + think_content = answer.split("")[0].replace("", "") + content = answer - parsers = streamer.get_parsers() - - extended = stream_string[:] - extended.append("") - - for parser in parsers: - for (prev_subword, subword) in zip(extended, stream_string): - msg = parser.parse(msg, prev_subword, subword) - assert msg['reasoning_content'] == think_content assert msg['content'] == content +@pytest.mark.precommit +@pytest.mark.parametrize( + "hf_ov_genai_models", + [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], + indirect=True +) +def test_final_parser_1(ov_genai_models): + prompt = textwrap.dedent(''' + <|begin_of_text|><|start_header_id|>system<|end_header_id|> + + Environment: ipython + Cutting Knowledge Date: December 2023 + Today Date: 15 Oct 2025 + + You have access to the following functions. To call functions, please respond with a python list of the calls. Respond in the format [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] Do not use variables. + + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City and state, e.g., 'San Francisco, CA'" + }, + "unit": { + "type": "string", + "enum": [ + "celsius", + "fahrenheit" + ] + } + }, + "required": [ + "location", + "unit" + ] + } + } + } + + You are a helpful assistant with tool calling capabilities. Only reply with a tool call if the function exists in the library provided by the user. If it doesn't exist, just reply directly in natural language. When you receive a tool call response, use the output to format an answer to the original user question.<|eot_id|><|start_header_id|>user<|end_header_id|> + + What's the weather in New York today? Please explain what you are doing and call the tool<|eot_id|><|start_header_id|>assistant<|end_header_id|> + ''') + def test_parsers_2(hf_ov_genai_models): hf_tokenizer, genai_tokenizer = hf_ov_genai_models class CustomStreamer(TextParserStreamer): diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py index 9b834bf3b7..bc3db5bc97 100644 --- a/tests/python_tests/test_text_streamer.py +++ b/tests/python_tests/test_text_streamer.py @@ -29,7 +29,7 @@ def chunks(arr: list, n: int): Set folder = Application.GetNamespace("Microsoft Office").PackagedInstance.GetFolder("Folder Name") 'Get all files in the folder folder.Files.Clear -""" +""" eng_prompts = [ 'What is the previous answer?', 'Why is the Sun yellow?', From 4c3d443c2c549130c3327e420e6d1fb68c1ab3e7 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 16 Oct 2025 14:53:17 +0200 Subject: [PATCH 07/43] fix processing with JsonContainer and make tests green again --- src/cpp/src/parsers.cpp | 98 ++++++++++++++++++++++++------ src/python/py_streamers.cpp | 39 +++++++++--- tests/python_tests/test_parsers.py | 90 +++++++++++++-------------- 3 files changed, 156 insertions(+), 71 deletions(-) diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 01560941cb..eb491aef32 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -20,6 +20,7 @@ class ReasoningParserImpl { bool m_think_tag_opened = false; std::string m_open_tag = ""; std::string m_close_tag = ""; + std::string m_text_cache = ""; std::map accumulated_parsed; public: bool m_deactivated = false; @@ -36,10 +37,10 @@ class ReasoningParserImpl { const std::optional>& previous_tokens, const std::optional>& delta_tokens ) { - if (msg["reasoning_content"].is_null()) { + if (!msg.contains("reasoning_content")) { msg["reasoning_content"] = ""; } - if (msg["content"].is_null()) { + if (!msg.contains("content")) { msg["content"] = ""; } @@ -47,31 +48,94 @@ class ReasoningParserImpl { if (m_starts_with_thinking) { m_think_tag_opened = true; } - - if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) { + + auto txt_chunk = m_text_cache + delta_text; + auto reason_str = msg["reasoning_content"].get_string(); + auto content_str = msg["content"].get_string(); + + if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) { + OPENVINO_ASSERT(m_open_tag.find(m_text_cache) != std::string::npos, "m_text_cache should be a prefix of m_open_tag"); + // Thinking has started - auto think_idx = delta_text.find(m_open_tag); - auto lvalue = msg["reasoning_content"].get_string(); - msg["reasoning_content"] = lvalue + delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size())); - m_think_tag_opened = true; + auto open_idx = txt_chunk.find(m_open_tag); + reason_str += txt_chunk.substr(open_idx + std::string(m_open_tag).size(), txt_chunk.size() - (open_idx + std::string(m_open_tag).size())); if (!m_keep_original_content) { delta_text = ""; } - } else if (m_think_tag_opened && delta_text.find(m_close_tag) != std::string::npos) { - auto think_idx = delta_text.find(m_close_tag); - msg["reasoning_content"] = msg["reasoning_content"].get_string() + delta_text.substr(0, think_idx); - msg["content"] = msg["content"].get_string() + delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); - m_think_tag_opened = false; - m_deactivated = true; + + m_think_tag_opened = true; + msg["reasoning_content"] = reason_str; + m_text_cache = ""; + + if (txt_chunk.find(m_close_tag) != std::string::npos) { + // If and are in the same txt_chunk + delta_text + auto close_idx = txt_chunk.find(m_close_tag); + reason_str = txt_chunk.substr(open_idx + std::string(m_open_tag).size(), close_idx - (open_idx + std::string(m_open_tag).size())); + content_str = txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size())); + if (!m_keep_original_content) { + delta_text = content_str; + } + m_think_tag_opened = false; + m_deactivated = true; + msg["reasoning_content"] = reason_str; + } + } else if (m_think_tag_opened && txt_chunk.find(m_close_tag) != std::string::npos) { + // Thinking tag was closed + auto close_idx = txt_chunk.find(m_close_tag); + + reason_str += txt_chunk.substr(0, close_idx); + // content_str += txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size())); if (!m_keep_original_content) { - delta_text = delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size())); + // Cut from the txt_chunk which is before and leave only what is after . + // Example if m_text_cache + delta_text = "...some textAnswer is 3" = "...some textAnswer is 3" + // we want to keep in delta_txt only "Answer is 3". + // We can operate with txt_chunk since final characters closing the tag ("ink>") are always in delta_text. + delta_text = txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size())); } + + msg["reasoning_content"] = reason_str; + m_text_cache = ""; + m_think_tag_opened = false; + m_deactivated = true; } else if (m_think_tag_opened) { - msg["reasoning_content"] = msg["reasoning_content"].get_string() + delta_text; + // Thinking tag was already opened and not closed yet + // reason_str += m_text_cache + + // "sdf" + // "sd<", " 2" + + // m_text_cache = "<" + // delta_text = " 2" + + size_t num_chars_to_keep = 0; // number of characters from the end of txt_chunk which can be part of the closing tag + // We must be sure that no chunks with the closing tag are included to reason_str. + for (size_t i = txt_chunk.size(); i >= 1; --i) { + // Get the substring of the i last characters of txt_chunk + auto suffix = txt_chunk.substr(txt_chunk.size() - i, i); + if (m_close_tag.find(suffix) != std::string::npos) { + num_chars_to_keep = i; + break; + } + } + + // If the suffix is a prefix of m_close_tag, we store it in the cache to detect if is split between several delta_text pieces. + if (num_chars_to_keep > 0) { + m_text_cache = txt_chunk.substr(txt_chunk.size() - num_chars_to_keep, num_chars_to_keep); + reason_str += txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep); + } else { + reason_str += txt_chunk; + m_text_cache = ""; + } + if (!m_keep_original_content) { delta_text = ""; } - } // TODO: add case when and are in the same delta_text + msg["reasoning_content"] = reason_str; + } else { + // Think tag was not opened yet and not found in the current delta_text. + // Accumulate text in the cache to detect if is split between several delta_text pieces. + m_text_cache += delta_text; + } return delta_text; } diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 2ea1f72880..61dc2d026c 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -11,6 +11,7 @@ #include "openvino/genai/text_streamer.hpp" #include "openvino/genai/parsers.hpp" #include "py_utils.hpp" +#include "openvino/genai/json_container.hpp" namespace py = pybind11; @@ -21,6 +22,7 @@ using ov::genai::TextParserStreamer; using ov::genai::IncrementalParserBase; using ov::genai::ParsedMessage; using ov::genai::Tokenizer; +using ov::genai::JsonContainer; namespace pyutils = ov::genai::pybind::utils; @@ -75,11 +77,27 @@ class ConstructableTextParserStreamer: public TextParserStreamer { using TextParserStreamer::TextParserStreamer; // inherit base constructors StreamingStatus write(ParsedMessage& message) override { + py::dict message_py; + auto json_obj = message.to_json(); + for (auto it = json_obj.begin(); it != json_obj.end(); ++it) { + message_py[py::cast(it.key())] = py::cast(it.value().get()); + } + + // call python implementation which accepts py::dict instead of ParsedMessage + auto res = py::get_override(this, "write")(message_py); + + auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(message_py); + message = JsonContainer(msg_anymap); + + return res.cast(); + } + + StreamingStatus write(py::dict& message) { PYBIND11_OVERRIDE_PURE( - StreamingStatus, // Return type - TextParserStreamer, // Parent class - write, // Name of function in C++ (must match Python name) - message // Argument(s) + StreamingStatus, + TextParserStreamer, + "write", + message ); } }; @@ -139,9 +157,16 @@ void init_streamers(py::module_& m) { py::arg("parsers") = std::vector, std::string>>({}), "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.") .def("write", - py::overload_cast(&TextParserStreamer::write), - py::arg("message"), - "Write is called with a ParsedMessage. Returns StreamingStatus.") + [](TextParserStreamer& self, py::dict& message) { + // Downcast to ConstructableTextParserStreamer if needed + auto* derived = dynamic_cast(&self); + if (!derived) { + throw std::runtime_error("write(py::dict&) only available for ConstructableTextParserStreamer"); + } + return derived->write(message); + }, + py::arg("message"), + "Write is called with a ParsedMessage. Returns StreamingStatus.") .def("_write", py::overload_cast(&TextParserStreamer::write), py::arg("message"), diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 849c360a63..719766fc06 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -40,13 +40,24 @@ def hf_ov_genai_models(request, tmp_path_factory): @pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", - [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], + [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], # this tokenizer is used as a stub only indirect=True ) -def test_parsers_1(hf_ov_genai_models): +@pytest.mark.parametrize("answer", [ + "\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}.", + + ( + "\nOkay, the user is asking for the answer to 2 + 1. Let me make sure I understand " + "the question correctly. They want a short answer, so I shouldn't overcomplicate things. " + "Basic addition here. Two plus one equals three. Yeah, that's straightforward. I need to " + "respond with the answer inside a box using the specified format. Let me double-check the " + "arithmetic to avoid any mistakes. Yep, 2 + 1 is definitely 3. Alright, time to put it in " + "the box.\n\n\nThe answer to 2 + 1 is \boxed{3}." + ), +]) +def test_phi4_reason_parser_1(hf_ov_genai_models, answer): hf_tokenizer, genai_tokenizer = hf_ov_genai_models - answer = "\nOkay, the user is asking for the answer to 2 + 1. Let me make sure I understand the question correctly. They want a short answer, so I shouldn't overcomplicate things. Basic addition here. Two plus one equals three. Yeah, that's straightforward. I need to respond with the answer inside a box using the specified format. Let me double-check the arithmetic to avoid any mistakes. Yep, 2 + 1 is definitely 3. Alright, time to put it in the box.\n\n\nThe answer to 2 + 1 is \boxed{3}." stream_string = re.split(r"(\s+)", answer) class CustomStreamer(TextParserStreamer): @@ -59,7 +70,6 @@ def write(self, message): for subword in stream_string: streamer._write(subword) - # breakpoint() think_content = answer.split("")[0].replace("", "") content = answer @@ -69,51 +79,37 @@ def write(self, message): @pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", - [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], + [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], # this tokenizer is used as a stub only indirect=True ) -def test_final_parser_1(ov_genai_models): - prompt = textwrap.dedent(''' - <|begin_of_text|><|start_header_id|>system<|end_header_id|> - - Environment: ipython - Cutting Knowledge Date: December 2023 - Today Date: 15 Oct 2025 - - You have access to the following functions. To call functions, please respond with a python list of the calls. Respond in the format [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] Do not use variables. - - { - "type": "function", - "function": { - "name": "get_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "City and state, e.g., 'San Francisco, CA'" - }, - "unit": { - "type": "string", - "enum": [ - "celsius", - "fahrenheit" - ] - } - }, - "required": [ - "location", - "unit" - ] - } - } - } - - You are a helpful assistant with tool calling capabilities. Only reply with a tool call if the function exists in the library provided by the user. If it doesn't exist, just reply directly in natural language. When you receive a tool call response, use the output to format an answer to the original user question.<|eot_id|><|start_header_id|>user<|end_header_id|> - - What's the weather in New York today? Please explain what you are doing and call the tool<|eot_id|><|start_header_id|>assistant<|end_header_id|> - ''') +@pytest.mark.parametrize("split_answer", [ + ["", "\nOkay, ", "the user is asking", " for the ", "answer ", "to 2 + 1.", "", "\n\nThe answer ", "to", "2 ", "+ ", "1 ", "is ", "\boxed{3}."], + ["", "\nOkay, ", "the user is asking", " for the ", "answer ", "to 2 + 1.", "", "\n\nThe answer ", "to", "2 ", "+ ", "1 ", "is ", "\boxed{3}."], + ["", "\nOkay, ", "the user is asking", " for the ", "answer ", "to 2 + 1.", "", "\n\nThe answer ", "to", "2 ", "+ ", "1 ", "is ", "\boxed{3}."], + + # check that if thinking opening and closing tags are passed in a single subword, it is still parsed correctly + ["\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}."] +]) +def test_phi4_reason_parser_2(hf_ov_genai_models, split_answer): + # check that if thinking opening and closing tags are in the middle of the subword, it is still parsed correctly + hf_tokenizer, genai_tokenizer = hf_ov_genai_models + + class CustomStreamer(TextParserStreamer): + def write(self, message): + msg.update(message) + return StreamingStatus.RUNNING + streamer = CustomStreamer(genai_tokenizer, parsers=["Phi4ReasoningParser"]) + + msg = {} + for subword in split_answer: + streamer._write(subword) + + think_content = (''.join(split_answer)).split("")[0].replace("", "") + content = ''.join(split_answer) + + assert msg['reasoning_content'] == think_content + assert msg['content'] == content + def test_parsers_2(hf_ov_genai_models): hf_tokenizer, genai_tokenizer = hf_ov_genai_models From 5ee48bf33590f2d7c05618f61779d4e1d4afbf0e Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 00:40:03 +0200 Subject: [PATCH 08/43] add Llama32JsonToolParser --- src/cpp/include/openvino/genai/parsers.hpp | 12 +++ src/cpp/src/continuous_batching/pipeline.cpp | 4 +- src/cpp/src/parsers.cpp | 98 ++++++++++++-------- src/python/openvino_genai/__init__.py | 6 +- src/python/py_parsers.cpp | 71 +++++++++++++- tests/python_tests/test_parsers.py | 25 ++++- 6 files changed, 170 insertions(+), 46 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 479dd2d4de..18e8ee7fef 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -89,6 +89,18 @@ class Llama32PythonicToolParser : public ParserBase { bool m_keep_original_content = true; }; +class Llama32JsonToolParser : public ParserBase { +// Does not modify original content, only extracts and adds tool calls +public: + // TODO: Check that vLLM has the same default. + Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} + + ParsedMessage parse(ParsedMessage& input) override; + static std::string name() { return "Llama32JsonToolParser"; } +private: + bool m_keep_original_content = true; +}; + class BaseReasoningParser : public ParserBase{ public: BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "", std::string close_tag = "") : diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp index 25256c8d70..404ee620e1 100644 --- a/src/cpp/src/continuous_batching/pipeline.cpp +++ b/src/cpp/src/continuous_batching/pipeline.cpp @@ -58,7 +58,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p auto model = utils::read_model(models_path, properties); auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model); - // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; + properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; auto tokenizer = ov::genai::Tokenizer(models_path, tokenizer_properties); auto generation_config = utils::from_config_json_if_exists(models_path); @@ -98,7 +98,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( auto model = utils::read_model(models_path, properties_without_draft_model); auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model); - // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; + properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path; auto generation_config = utils::from_config_json_if_exists(models_path); diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index eb491aef32..915167cc2d 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -5,10 +5,8 @@ #include #include #include -#include #include -using namespace std; using json = nlohmann::json; namespace ov::genai { @@ -99,15 +97,19 @@ class ReasoningParserImpl { m_deactivated = true; } else if (m_think_tag_opened) { // Thinking tag was already opened and not closed yet - // reason_str += m_text_cache - // "sdf" - // "sd<", " 2" - - // m_text_cache = "<" - // delta_text = " 2" - - size_t num_chars_to_keep = 0; // number of characters from the end of txt_chunk which can be part of the closing tag + // If we have subsequently "sdf The" + // Then we put "sdf" to reason_str and "" + // then we put "i" to m_text_cache since m_text_cache + delta_text = "" + // then (in the closing tag IF-block) we leave only " The" in delta_text. + + // If we have "ing. <", " 20 ", "40>" + // Then we put "ing. " to reason_str and "<" to m_text_cache since it's a substring of close tag "" + // but since continuation " 20 " is not a substring of "", we will end up in this IF-block again + // and put " 20 " to reason_str and clear m_text_cache. + + // number of characters from the end of txt_chunk which can be part of the closing tag + size_t num_chars_to_keep = 0; // We must be sure that no chunks with the closing tag are included to reason_str. for (size_t i = txt_chunk.size(); i >= 1; --i) { // Get the substring of the i last characters of txt_chunk @@ -164,39 +166,61 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) { // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; // Regex to capture the [...] part - smatch m; + std::smatch m; const std::string& text = input["content"].get_string(); - regex r(R"(\[.*?\])"); - if (regex_search(text, m, r)) { - // Strip outer [ ] - string call = m.str().substr(1, m.str().size() - 2); - - // Split function name and arguments - size_t pos = call.find('('); - string name = call.substr(0, pos); - string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...) - - // Parse arguments of the form key='value' - map kv; - regex arg_re(R"((\w+)\s*=\s*'([^']*)')"); - auto it = sregex_iterator(args.begin(), args.end(), arg_re); - for (; it != sregex_iterator(); ++it) { - kv[(*it)[1]] = (*it)[2]; - } - json j = json::array({{ - {"name", name}, - {"arguments", kv} - }}); - if (!m_keep_original_content) { - input["content"] = regex_replace(text, r, ""); - } - std::cout << j.dump() << std::endl; - input["tool_calls"] = ParsedMessage::from_json_string(j.dump()); + std::regex r(R"(\[.*?\])"); + if (!std::regex_search(text, m, r)) { return input; } + + // Strip outer [ ] + std::string call = m.str().substr(1, m.str().size() - 2); + + // Split function name and arguments + input["tool_calls"] = ParsedMessage::array(); + + size_t pos = call.find('('); + std::string name = call.substr(0, pos); + std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...) + + // Parse arguments of the form key='value' + JsonContainer kv = JsonContainer::array(); + + std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")"); + auto it = std::sregex_iterator(args.begin(), args.end(), arg_re); + for (; it != std::sregex_iterator(); ++it) { + kv.push_back(ParsedMessage(ov::AnyMap{{"key", std::string((*it)[1])}, {"value", std::string((*it)[2])}})); + } + + input["tool_calls"] = ParsedMessage::array(); + input["tool_calls"].push_back(ParsedMessage({{"name", name}, {"arguments", kv}})); + + if (!m_keep_original_content) { + input["content"] = regex_replace(text, r, ""); + } + return ParsedMessage{}; } +ParsedMessage Llama32JsonToolParser::parse(ParsedMessage& message) { + // Find JSON in the message + std::string msg_content = message["content"].get_string(); + + size_t json_start = msg_content.find('{'); + size_t json_end = msg_content.rfind('}'); + if (json_start == std::string::npos || json_end == std::string::npos || json_end <= json_start) { + return message; + } + auto res = JsonContainer::array(); + res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1))); + message["tool_calls"] = res; + + if (!m_keep_original_content) { + message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1); + } + return message; +} + ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) { ParsedMessage res; std::string reasoning_content; diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index 19e4ebe97a..3656c0280a 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -24,7 +24,11 @@ from .py_openvino_genai import ( ParserBase, - IncrementalParserBase + IncrementalParserBase, + Phi4ReasoningParser, + DeepSeekR1ReasoningParser, + Llama32JsonToolParser, + Llama32PythonicToolParser, ) __version__ = get_version() diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 2cf26e4b19..5f8df28446 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -17,6 +17,11 @@ using ov::genai::ParsedMessage; using ov::genai::IncrementalParserBase; using ov::genai::ParserVariant; using ov::genai::ParserBase; +using ov::genai::ReasoningParser; +using ov::genai::Phi4ReasoningParser; +using ov::genai::DeepSeekR1ReasoningParser; +using ov::genai::Llama32JsonToolParser; +using ov::genai::Llama32PythonicToolParser; using ov::genai::Tokenizer; using ov::genai::StreamingStatus; using ov::genai::JsonContainer; @@ -68,6 +73,25 @@ class ConstructableParserBase: public ParserBase { } }; +static py::object json_mod = py::module_::import("json"); + +// wrapper to enhance calling parser from Python +void call_parser(py::dict& msg, std::function func) { + auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg); + auto msg_cpp = JsonContainer(msg_anymap); + + func(msg_cpp); + + auto json_str = msg_cpp.to_json_string(); + py::dict result = json_mod.attr("loads")(json_str); + + // update msg with result + msg.clear(); + for (auto item : result) { + msg[item.first] = item.second; + } +} + } // namespace // TODO: double check/add more relevant docstrings for parsers. @@ -98,12 +122,51 @@ void init_parsers(py::module_& m) { py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.") .def("is_active", &IncrementalParserBase::is_active, "Indicates whether the parser is active and should be used during parsing."); + + py::class_, IncrementalParserBase>(m, "Phi4ReasoningParser") + .def(py::init(), py::arg("starts_with_thinking") = false) + .def("parse", + &Phi4ReasoningParser::parse, + "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.", + py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), + py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt) + .def_static("get_parser", &Phi4ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name."); - - py::class_>(m, "ParserBase") + py::class_, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser") .def(py::init<>()) .def("parse", - &ParserBase::parse, + &DeepSeekR1ReasoningParser::parse, "Parse is called with the full text. Returns a ParsedMessage with parsed content.", - py::arg("text")); + py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), + py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt) + .def_static("get_parser", &DeepSeekR1ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name."); + + py::class_>(m, "ParserBase") + .def(py::init<>()) + .def("parse", + [](ParserBase& self, py::dict& msg) { + return call_parser(msg, [&self](JsonContainer& m) {return self.parse(m);}); + }, + py::arg("text"), + "Parse is called with the full text. Returns a ParsedMessage with parsed content."); + + py::class_, ParserBase>(m, "Llama32JsonToolParser") + .def(py::init<>()) + .def("parse", + [](Llama32JsonToolParser& self, py::dict& msg) { + return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); + }, + py::arg("text"), + "Parse is called with the full text. Returns a ParsedMessage with parsed content.") + .def_static("get_parser", &Llama32JsonToolParser::get_parser, py::arg("name"), "Factory method to get parser by name."); + + py::class_, ParserBase>(m, "Llama32PythonicToolParser") + .def(py::init<>()) + .def("parse", + [](Llama32PythonicToolParser& self, py::dict& msg) { + return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); + }, + py::arg("text"), + "Parse is called with the full text. Returns a ParsedMessage with parsed content.") + .def_static("get_parser", &Llama32PythonicToolParser::get_parser, py::arg("name"), "Factory method to get parser by name."); } diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 719766fc06..f7ced84171 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -7,12 +7,12 @@ import numpy as np import openvino import pytest -from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus +from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser from transformers import AutoTokenizer from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model import re import textwrap - +import json @pytest.fixture(scope="module") def hf_ov_genai_models(request, tmp_path_factory): @@ -111,6 +111,27 @@ def write(self, message): assert msg['content'] == content + +@pytest.mark.precommit +@pytest.mark.parametrize( + "hf_ov_genai_models", + [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], + indirect=True +) +def test_final_parser_llama_32_json(hf_ov_genai_models): + hf_tokenizer, genai_tokenizer = hf_ov_genai_models + + json_str = '{"type": "function", "function": {"name": "get_weather", "parameters": {"location": "New York, NY", "unit": "celsius"}}}' + content_json = { + "content": f"Calling weather API: {json_str}" + } + + parser = Llama32JsonToolParser() + parser.parse(content_json) + + assert content_json['tool_calls'][0] == json.loads(json_str) + + def test_parsers_2(hf_ov_genai_models): hf_tokenizer, genai_tokenizer = hf_ov_genai_models class CustomStreamer(TextParserStreamer): From 248ccc6355504ab58775f1b3822b0f919c8652ee Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 08:07:49 +0200 Subject: [PATCH 09/43] rename ParsedMessage -> JsonContainer --- .../text_generation/parsed_output_sample.cpp | 2 +- .../include/openvino/genai/llm_pipeline.hpp | 1 - src/cpp/include/openvino/genai/parsers.hpp | 16 +++++--------- .../include/openvino/genai/text_streamer.hpp | 6 ++--- src/cpp/src/llm/pipeline.cpp | 2 +- src/cpp/src/parsers.cpp | 22 +++++++++---------- .../openvino_genai/py_openvino_genai.pyi | 6 ++--- src/python/py_parsers.cpp | 18 +++++++-------- src/python/py_streamers.cpp | 8 +++---- tests/cpp/parser.cpp | 6 ++--- tests/python_tests/test_text_streamer.py | 2 +- 11 files changed, 42 insertions(+), 47 deletions(-) diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp index 6efa64ee5e..dfc2ef964b 100644 --- a/samples/cpp/text_generation/parsed_output_sample.cpp +++ b/samples/cpp/text_generation/parsed_output_sample.cpp @@ -11,7 +11,7 @@ class CurrentStreamer : public ov::genai::TextParserStreamer { public: CurrentStreamer(const ov::genai::Tokenizer& tokenizer) : ov::genai::TextParserStreamer(tokenizer) {} - ov::genai::StreamingStatus write(ov::genai::ParsedMessage& message) { + ov::genai::StreamingStatus write(ov::genai::JsonContainer& message) { std::cout << message["content"].get_string() << std::flush; return ov::genai::StreamingStatus::RUNNING; } diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp index 6385ced995..5061901108 100644 --- a/src/cpp/include/openvino/genai/llm_pipeline.hpp +++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp @@ -69,7 +69,6 @@ class DecodedResults { std::vector scores; PerfMetrics perf_metrics; std::shared_ptr extended_perf_metrics; - // std::vector parsed; std::vector parsed; // @brief Convert DecodedResults to a string. diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 18e8ee7fef..979d63ef7d 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -14,17 +14,13 @@ namespace ov { namespace genai { -// TODO: will be converted to JSONLike object -// using ParsedMessage = std::map; -using ParsedMessage = JsonContainer; - class IncrementalParserBase { public: IncrementalParserBase() = default; // We return string which with filtered text to be added to content. virtual std::string parse( - ParsedMessage& msg, + JsonContainer& msg, const std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, @@ -46,7 +42,7 @@ class ReasoningParser : public IncrementalParserBase { bool keep_original_content = true); std::string parse( - ParsedMessage& msg, + JsonContainer& msg, const std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, @@ -71,7 +67,7 @@ class ParserBase { public: ParserBase() = default; - virtual ParsedMessage parse(ParsedMessage& text) = 0; + virtual JsonContainer parse(JsonContainer& text) = 0; static std::shared_ptr get_parser(std::string name); }; @@ -83,7 +79,7 @@ class Llama32PythonicToolParser : public ParserBase { // TODO: Check that vLLM has the same default. Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} - ParsedMessage parse(ParsedMessage& input) override; + JsonContainer parse(JsonContainer& input) override; static std::string name() { return "Llama32PythonicToolParser"; } private: bool m_keep_original_content = true; @@ -95,7 +91,7 @@ class Llama32JsonToolParser : public ParserBase { // TODO: Check that vLLM has the same default. Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} - ParsedMessage parse(ParsedMessage& input) override; + JsonContainer parse(JsonContainer& input) override; static std::string name() { return "Llama32JsonToolParser"; } private: bool m_keep_original_content = true; @@ -109,7 +105,7 @@ class BaseReasoningParser : public ParserBase{ m_open_tag(open_tag), m_close_tag(close_tag) {} - ParsedMessage parse(ParsedMessage& input) override; + JsonContainer parse(JsonContainer& input) override; private: bool m_expect_open_tag = true; diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 055adbfbe2..4ac5758b73 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -51,14 +51,14 @@ class TextParserStreamer : public TextStreamer { public: TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers = {}); - virtual StreamingStatus write(ParsedMessage& message) = 0; + virtual StreamingStatus write(JsonContainer& message) = 0; CallbackTypeVariant write(std::string message); - ParsedMessage get_parsed_message() const { return m_parsed_message; } + JsonContainer get_parsed_message() const { return m_parsed_message; } std::vector> get_parsers() const { return m_parsers; } private: - ParsedMessage m_parsed_message; + JsonContainer m_parsed_message; std::string m_text_buffer; std::vector> m_parsers; }; diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 41b678d388..c2b10208f0 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -241,7 +241,7 @@ DecodedResults LLMPipeline::generate( // Apply Base parsers sequentially even if IncrementalParser has run. if (!parsers.empty()) { for (size_t i = 0; i < res.texts.size(); ++i) { - ParsedMessage msg; + JsonContainer msg; msg["content"] = res.texts[i]; for (auto& parser: parsers) { // TODO: check if is_active() is needed here diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 915167cc2d..018a70950e 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -29,7 +29,7 @@ class ReasoningParserImpl { m_keep_original_content(keep_original_content) {} std::string parse( - ParsedMessage& msg, + JsonContainer& msg, const std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens, @@ -148,7 +148,7 @@ ReasoningParser::ReasoningParser(bool starts_with_thinking, bool keep_original_c } std::string ReasoningParser::parse( - ParsedMessage& msg, + JsonContainer& msg, const std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens, @@ -161,7 +161,7 @@ bool ReasoningParser::is_active() const { return !m_impl->m_deactivated; } -ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) { +JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) { // Input example // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; @@ -177,7 +177,7 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) { std::string call = m.str().substr(1, m.str().size() - 2); // Split function name and arguments - input["tool_calls"] = ParsedMessage::array(); + input["tool_calls"] = JsonContainer::array(); size_t pos = call.find('('); std::string name = call.substr(0, pos); @@ -189,20 +189,20 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) { std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")"); auto it = std::sregex_iterator(args.begin(), args.end(), arg_re); for (; it != std::sregex_iterator(); ++it) { - kv.push_back(ParsedMessage(ov::AnyMap{{"key", std::string((*it)[1])}, {"value", std::string((*it)[2])}})); + kv.push_back(JsonContainer(ov::AnyMap{{"key", std::string((*it)[1])}, {"value", std::string((*it)[2])}})); } - input["tool_calls"] = ParsedMessage::array(); - input["tool_calls"].push_back(ParsedMessage({{"name", name}, {"arguments", kv}})); + input["tool_calls"] = JsonContainer::array(); + input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}})); if (!m_keep_original_content) { input["content"] = regex_replace(text, r, ""); } - return ParsedMessage{}; + return JsonContainer{}; } -ParsedMessage Llama32JsonToolParser::parse(ParsedMessage& message) { +JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) { // Find JSON in the message std::string msg_content = message["content"].get_string(); @@ -221,8 +221,8 @@ ParsedMessage Llama32JsonToolParser::parse(ParsedMessage& message) { return message; } -ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) { - ParsedMessage res; +JsonContainer BaseReasoningParser::parse(JsonContainer& input) { + JsonContainer res; std::string reasoning_content; // auto content = input["content"]; std::string content = input["content"].get_string(); diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index ada0fa5ca6..82c0721e9a 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -1383,7 +1383,7 @@ class IncrementalParserBase: """ def parse(self, msg: collections.abc.Mapping[str, str], previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> dict[str, str]: """ - Parse is called every time new text delta is decoded. Returns a ParsedMessage with parsed content. + Parse is called every time new text delta is decoded. Returns a JsonContainer with parsed content. """ class InpaintingPipeline: """ @@ -1757,7 +1757,7 @@ class ParserBase: ... def parse(self, text: collections.abc.Mapping[str, str]) -> dict[str, str]: """ - Parse is called with the full text. Returns a ParsedMessage with parsed content. + Parse is called with the full text. Returns a JsonContainer with parsed content. """ class PerfMetrics: """ @@ -3158,7 +3158,7 @@ class TextParserStreamer: """ def write(self, message: collections.abc.Mapping[str, str]) -> StreamingStatus: """ - Write is called with a ParsedMessage. Returns StreamingStatus. + Write is called with a JsonContainer. Returns StreamingStatus. """ class TextRerankPipeline: """ diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 5f8df28446..d756f2a942 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -13,7 +13,7 @@ namespace py = pybind11; -using ov::genai::ParsedMessage; +using ov::genai::JsonContainer; using ov::genai::IncrementalParserBase; using ov::genai::ParserVariant; using ov::genai::ParserBase; @@ -34,7 +34,7 @@ namespace { class ConstructableIncrementalParserBase: public IncrementalParserBase { public: std::string parse( - ParsedMessage& msg, + JsonContainer& msg, const std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, @@ -63,9 +63,9 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase { class ConstructableParserBase: public ParserBase { public: - ParsedMessage parse(ParsedMessage& text) override { + JsonContainer parse(JsonContainer& text) override { PYBIND11_OVERRIDE_PURE( - ParsedMessage, // Return type + JsonContainer, // Return type ParserBase, // Parent class parse, // Name of function in C++ (must match Python name) text // Argument(s) @@ -104,7 +104,7 @@ void init_parsers(py::module_& m) { std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt) { - // TODO: optimize conversion between py::dict and ParsedMessage + // TODO: optimize conversion between py::dict and JsonContainer auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg); auto msg_cpp = JsonContainer(msg_anymap); @@ -136,7 +136,7 @@ void init_parsers(py::module_& m) { .def(py::init<>()) .def("parse", &DeepSeekR1ReasoningParser::parse, - "Parse is called with the full text. Returns a ParsedMessage with parsed content.", + "Parse is called with the full text. Returns a JsonContainer with parsed content.", py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt) .def_static("get_parser", &DeepSeekR1ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name."); @@ -148,7 +148,7 @@ void init_parsers(py::module_& m) { return call_parser(msg, [&self](JsonContainer& m) {return self.parse(m);}); }, py::arg("text"), - "Parse is called with the full text. Returns a ParsedMessage with parsed content."); + "Parse is called with the full text. Returns a JsonContainer with parsed content."); py::class_, ParserBase>(m, "Llama32JsonToolParser") .def(py::init<>()) @@ -157,7 +157,7 @@ void init_parsers(py::module_& m) { return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); }, py::arg("text"), - "Parse is called with the full text. Returns a ParsedMessage with parsed content.") + "Parse is called with the full text. Returns a JsonContainer with parsed content.") .def_static("get_parser", &Llama32JsonToolParser::get_parser, py::arg("name"), "Factory method to get parser by name."); py::class_, ParserBase>(m, "Llama32PythonicToolParser") @@ -167,6 +167,6 @@ void init_parsers(py::module_& m) { return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); }, py::arg("text"), - "Parse is called with the full text. Returns a ParsedMessage with parsed content.") + "Parse is called with the full text. Returns a JsonContainer with parsed content.") .def_static("get_parser", &Llama32PythonicToolParser::get_parser, py::arg("name"), "Factory method to get parser by name."); } diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 61dc2d026c..4bfa01223d 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -20,7 +20,7 @@ using ov::genai::StreamingStatus; using ov::genai::TextStreamer; using ov::genai::TextParserStreamer; using ov::genai::IncrementalParserBase; -using ov::genai::ParsedMessage; +using ov::genai::JsonContainer; using ov::genai::Tokenizer; using ov::genai::JsonContainer; @@ -76,14 +76,14 @@ class ConstructableTextParserStreamer: public TextParserStreamer { public: using TextParserStreamer::TextParserStreamer; // inherit base constructors - StreamingStatus write(ParsedMessage& message) override { + StreamingStatus write(JsonContainer& message) override { py::dict message_py; auto json_obj = message.to_json(); for (auto it = json_obj.begin(); it != json_obj.end(); ++it) { message_py[py::cast(it.key())] = py::cast(it.value().get()); } - // call python implementation which accepts py::dict instead of ParsedMessage + // call python implementation which accepts py::dict instead of JsonContainer auto res = py::get_override(this, "write")(message_py); auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(message_py); @@ -166,7 +166,7 @@ void init_streamers(py::module_& m) { return derived->write(message); }, py::arg("message"), - "Write is called with a ParsedMessage. Returns StreamingStatus.") + "Write is called with a JsonContainer. Returns StreamingStatus.") .def("_write", py::overload_cast(&TextParserStreamer::write), py::arg("message"), diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index dc15ac1482..2068e9308f 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -10,7 +10,7 @@ using namespace ov::genai; nlohmann::json run_parser_test(std::shared_ptr parser, const std::string& prompt) { - ParsedMessage input; + JsonContainer input; input["content"] = prompt; return (parser->parse(input)).to_json(); } @@ -101,7 +101,7 @@ TEST(ParserTest, test_reasoning_parser_2) { class DeepSeekR1ReasoningParserTest : public ::testing::Test { protected: ov::genai::ReasoningParser parser; - ParsedMessage msg; + JsonContainer msg; }; TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) { @@ -118,7 +118,7 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) { std::string ref_res = "First, I recognize that the question is asking for the sum of 2 and 1.\n\nI know that addition involves combining two numbers to find their total.\n\nStarting with 2, I add 1 to it.\n\n2 plus 1 equals 3.\n"; - ParsedMessage msg; + JsonContainer msg; for (int i = 1; i < input_stream.size(); i++) { diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py index bc3db5bc97..9b646c6553 100644 --- a/tests/python_tests/test_text_streamer.py +++ b/tests/python_tests/test_text_streamer.py @@ -78,7 +78,7 @@ def write(self, token_chunk): class CurrentParsingStreamer(TextParserStreamer): def write(self, word: str): - msg: ParsedMessage = get_current_message() + msg: JsonContainer = get_current_message() streamer = lambda x: print(x) From 04064bfe2e602121f72eaf515d77d9514807ca42 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 08:59:46 +0200 Subject: [PATCH 10/43] make tests green again, apply copilot comments --- .github/workflows/linux.yml | 2 +- .github/workflows/mac.yml | 2 +- .github/workflows/windows.yml | 2 +- src/cpp/include/openvino/genai/parsers.hpp | 2 -- src/cpp/src/llm/pipeline.cpp | 4 +-- src/cpp/src/parsers.cpp | 39 +++++++++++----------- src/cpp/src/text_streamer.cpp | 4 +-- src/python/py_parsers.cpp | 11 +----- src/python/py_streamers.cpp | 3 +- tests/cpp/parser.cpp | 9 ++--- tests/python_tests/test_parsers.py | 2 +- tests/python_tests/test_text_streamer.py | 4 +-- 12 files changed, 36 insertions(+), 48 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 9500add7c3..7ff8c29af3 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -782,7 +782,7 @@ jobs: run: | source ${{ env.INSTALL_DIR }}/setupvars.sh chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching - ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*" + ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" - name: Test Continuous Batching Tools if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching }} diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index d1be431238..5620804095 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -695,7 +695,7 @@ jobs: run: | source ${{ env.INSTALL_DIR }}/setupvars.sh chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching - ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.* + ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" - name: Test C++ Tools run: | diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index fbd082bede..bb544bc0cf 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -866,7 +866,7 @@ jobs: - name: gtests unit tests run: | . "${{ env.INSTALL_DIR }}/setupvars.ps1" - & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*" --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.* + & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*" - name: Test C++ Tools run: | diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 979d63ef7d..6d2dac8fd0 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -27,7 +27,6 @@ class IncrementalParserBase { const std::optional>& delta_tokens = std::nullopt ) = 0; - virtual bool is_active() const = 0; static std::shared_ptr get_parser(std::string name); }; @@ -48,7 +47,6 @@ class ReasoningParser : public IncrementalParserBase { const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) override; - bool is_active() const override; }; class DeepSeekR1ReasoningParser : public ReasoningParser { diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index c2b10208f0..899da4e39f 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -244,10 +244,10 @@ DecodedResults LLMPipeline::generate( JsonContainer msg; msg["content"] = res.texts[i]; for (auto& parser: parsers) { - // TODO: check if is_active() is needed here // TODO: Check the state of incremental parser and reset if necessary - msg = parser->parse(msg); + parser->parse(msg); } + res.parsed[i] = msg; } } diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 018a70950e..3026f44599 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -14,6 +14,7 @@ namespace ov::genai { class ReasoningParserImpl { private: bool m_starts_with_thinking = true; + bool m_first_run = true; bool m_keep_original_content = true; bool m_think_tag_opened = false; std::string m_open_tag = ""; @@ -35,6 +36,14 @@ class ReasoningParserImpl { const std::optional>& previous_tokens, const std::optional>& delta_tokens ) { + if (m_deactivated) { + return delta_text; + } + if (m_starts_with_thinking && m_first_run) { + m_think_tag_opened = true; + } + m_first_run = false; + if (!msg.contains("reasoning_content")) { msg["reasoning_content"] = ""; } @@ -42,10 +51,6 @@ class ReasoningParserImpl { msg["content"] = ""; } - bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos; - if (m_starts_with_thinking) { - m_think_tag_opened = true; - } auto txt_chunk = m_text_cache + delta_text; auto reason_str = msg["reasoning_content"].get_string(); @@ -114,7 +119,8 @@ class ReasoningParserImpl { for (size_t i = txt_chunk.size(); i >= 1; --i) { // Get the substring of the i last characters of txt_chunk auto suffix = txt_chunk.substr(txt_chunk.size() - i, i); - if (m_close_tag.find(suffix) != std::string::npos) { + // If this suffix is a prefix of m_close_tag, we need to keep it in the cache. + if (m_close_tag.find(suffix) == 0) { num_chars_to_keep = i; break; } @@ -157,10 +163,6 @@ std::string ReasoningParser::parse( return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens); } -bool ReasoningParser::is_active() const { - return !m_impl->m_deactivated; -} - JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) { // Input example // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; @@ -182,14 +184,14 @@ JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) { size_t pos = call.find('('); std::string name = call.substr(0, pos); std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...) - + + + JsonContainer kv; // Parse arguments of the form key='value' - JsonContainer kv = JsonContainer::array(); - std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")"); auto it = std::sregex_iterator(args.begin(), args.end(), arg_re); for (; it != std::sregex_iterator(); ++it) { - kv.push_back(JsonContainer(ov::AnyMap{{"key", std::string((*it)[1])}, {"value", std::string((*it)[2])}})); + kv[std::string((*it)[1])] = std::string((*it)[2]); } input["tool_calls"] = JsonContainer::array(); @@ -198,8 +200,7 @@ JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) { if (!m_keep_original_content) { input["content"] = regex_replace(text, r, ""); } - - return JsonContainer{}; + return input; } JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) { @@ -224,9 +225,7 @@ JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) { JsonContainer BaseReasoningParser::parse(JsonContainer& input) { JsonContainer res; std::string reasoning_content; - // auto content = input["content"]; std::string content = input["content"].get_string(); - res["content"] = content; size_t start = content.find(m_open_tag); size_t end = content.find(m_close_tag); @@ -235,14 +234,14 @@ JsonContainer BaseReasoningParser::parse(JsonContainer& input) { reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size())); if (!m_keep_original_content) { // Remove ... from content - res["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size()); + input["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size()); } } else { reasoning_content = ""; } - res["reasoning_content"] = reasoning_content; - return res; + input["reasoning_content"] = reasoning_content; + return input; } std::map()>> registered_incremental_parsers; diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index 5ffc34689f..a34b575519 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -144,9 +144,7 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector

is_active()) { - message = parser->parse(m_parsed_message, m_text_buffer, message); - } + message = parser->parse(m_parsed_message, m_text_buffer, message); // Message can be modified inside parser, if parser for example extracted tool calling from message content // but parser m_parsed_message["content"] = m_parsed_message["content"].get_string() + message; diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index d756f2a942..a3c255d1b6 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -51,14 +51,6 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase { delta_tokens ); } - - bool is_active() const override { - PYBIND11_OVERRIDE_PURE( - bool, // Return type - IncrementalParserBase, // Parent class - is_active, // Name of function in C++ (must match Python name) - ); - } }; class ConstructableParserBase: public ParserBase { @@ -120,8 +112,7 @@ void init_parsers(py::module_& m) { return res; }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt, - "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.") - .def("is_active", &IncrementalParserBase::is_active, "Indicates whether the parser is active and should be used during parsing."); + "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output."); py::class_, IncrementalParserBase>(m, "Phi4ReasoningParser") .def(py::init(), py::arg("starts_with_thinking") = false) diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 4bfa01223d..912e33d4be 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -145,7 +145,8 @@ void init_streamers(py::module_& m) { return self.write(tokens); } }, - py::arg("token")); + py::arg("token")) + .def("end", &TextStreamer::end); // TODO: double check/add more relevant docstrings for TextParserStreamer. py::class_, TextStreamer>(m, "TextParserStreamer") diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index 2068e9308f..1e56fef042 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -12,12 +12,13 @@ using namespace ov::genai; nlohmann::json run_parser_test(std::shared_ptr parser, const std::string& prompt) { JsonContainer input; input["content"] = prompt; - return (parser->parse(input)).to_json(); + parser->parse(input); + return input.to_json(); } TEST(ParserTest, test_llama32_parser_1) { - std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>)"; + std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)"; nlohmann::json expected; // By default content should keep original values. @@ -40,7 +41,7 @@ TEST(ParserTest, test_llama32_parser_1) { } TEST(ParserTest, test_llama32_parser_2) { - std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>)"; + std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)"; nlohmann::json expected; // In this test tool calling part will be cut from the content after parsing. @@ -100,7 +101,7 @@ TEST(ParserTest, test_reasoning_parser_2) { class DeepSeekR1ReasoningParserTest : public ::testing::Test { protected: - ov::genai::ReasoningParser parser; + ov::genai::DeepSeekR1ReasoningParser parser; JsonContainer msg; }; diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index f7ced84171..f273f13918 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -138,7 +138,7 @@ class CustomStreamer(TextParserStreamer): def write(self, message): if "content" in message: print(message["content"]) - return True + return StreamingStatus.RUNNING streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"]) diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py index 9b646c6553..4790ab4b3d 100644 --- a/tests/python_tests/test_text_streamer.py +++ b/tests/python_tests/test_text_streamer.py @@ -29,7 +29,7 @@ def chunks(arr: list, n: int): Set folder = Application.GetNamespace("Microsoft Office").PackagedInstance.GetFolder("Folder Name") 'Get all files in the folder folder.Files.Clear -""" +""" eng_prompts = [ 'What is the previous answer?', 'Why is the Sun yellow?', @@ -72,7 +72,7 @@ def test_text_prompts(tmp_path, prompt, model_id): streamer.write(token) streamer.end() - class CurrentStremaer(BaseStreamer): + class CurrentStreamer(BaseStreamer): def write(self, token_chunk): pass From ae1930b92c3d793a313e9ebecd4785abca5479b7 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 09:32:42 +0200 Subject: [PATCH 11/43] revert sample --- samples/cpp/text_generation/CMakeLists.txt | 1 - .../text_generation/parsed_output_sample.cpp | 52 ------------------- samples/python/text_generation/chat_sample.py | 33 ------------ 3 files changed, 86 deletions(-) delete mode 100644 samples/cpp/text_generation/parsed_output_sample.cpp diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt index 7493362e81..ebaf32c7f4 100644 --- a/samples/cpp/text_generation/CMakeLists.txt +++ b/samples/cpp/text_generation/CMakeLists.txt @@ -29,7 +29,6 @@ set (SAMPLE_LIST lora_greedy_causal_lm multinomial_causal_lm prompt_lookup_decoding_lm - parsed_output_sample speculative_decoding_lm) foreach(sample IN LISTS SAMPLE_LIST) diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp deleted file mode 100644 index dfc2ef964b..0000000000 --- a/samples/cpp/text_generation/parsed_output_sample.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (C) 2023-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "openvino/genai/llm_pipeline.hpp" -#include "openvino/genai/parsers.hpp" -#include "openvino/genai/text_streamer.hpp" - - -class CurrentStreamer : public ov::genai::TextParserStreamer { -private: -public: - CurrentStreamer(const ov::genai::Tokenizer& tokenizer) - : ov::genai::TextParserStreamer(tokenizer) {} - ov::genai::StreamingStatus write(ov::genai::JsonContainer& message) { - std::cout << message["content"].get_string() << std::flush; - return ov::genai::StreamingStatus::RUNNING; - } -}; - - -int main(int argc, char* argv[]) try { - if (argc < 2 || argc > 3) { - throw std::runtime_error(std::string{"Usage: "} + argv[0] + " "); - } - // std::string prompt = "<|begin▁of▁sentence|><|User|>Please think of a dificcult task to solve x**2 + y**2 = 1<|Assistant|>"; - std::string prompt = "<|begin▁of▁sentence|><|User|>Why is the Sky blue?<|Assistant|>"; - std::string models_path = argv[1]; - - // Default device is CPU; can be overridden by the second argument - std::string device = (argc == 3) ? argv[2] : "CPU"; // GPU, NPU can be used as well - ov::genai::LLMPipeline pipe(models_path, device); - - ov::genai::GenerationConfig config; - config.max_new_tokens = 1000; - - auto tok = pipe.get_tokenizer(); - std::shared_ptr streamer = std::make_shared(tok); - - pipe.generate(prompt, config, streamer); - - -} catch (const std::exception& error) { - try { - std::cerr << error.what() << '\n'; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; -} catch (...) { - try { - std::cerr << "Non-exception object thrown\n"; - } catch (const std::ios_base::failure&) {} - return EXIT_FAILURE; -} diff --git a/samples/python/text_generation/chat_sample.py b/samples/python/text_generation/chat_sample.py index b852141d3c..e4067c49f3 100755 --- a/samples/python/text_generation/chat_sample.py +++ b/samples/python/text_generation/chat_sample.py @@ -36,36 +36,3 @@ def main(): if '__main__' == __name__: main() - - pipe = openvino_genai.LLMPipeline(args.model_dir, device) - - prompt = "What is the weather in New York today?" - res = pipe.generate(prompt, max_new_tokens=100, streamer=streamer) - print(res.texts[0]) - - res.parsed['tool_caling'] - - class LlamaToolCallParser(ParserBase): - def parse(self, parsed_data: ParsedData) -> ParsedData: - # parsed_data - # process parsed_data - # e.g. extract tool calls, or other fields from content - return new_parsed_output - - llama_parser = LlamaToolCallParser() - res = pipe.generate(prompt, parsers=[llama_parser | "LLama3.2Pythonic"], max_new_tokens=100) - -# At the beginning msg['original_content'] is filled with full text -msg = res.texts[i] -for parser in m_parsers: - msg = parser.parse(msg) - -# At the end msg is filled with all parsed fields -parsed_data = { - 'original_content': '<|system|>You are a helpful assistant... I will call the `get_weather` function with the location… \n\nfunctools[{"name": "get_weather", "arguments": {"location": "New York", "unit": "celsius"}}]<|end|>', - 'content': 'blah blah', - 'reasoning_content': '', - 'tool_calls': "[{\"name\":\"get_weather\",\"arguments\":{\"location\":\"New York, NY\",\"unit\":\"celsius\"}}]", -} - -res.parsed: ParsedData \ No newline at end of file From 2772c985752abf3ce20dd0a663c218c09bcdff69 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 09:37:12 +0200 Subject: [PATCH 12/43] update pybind11 stubs --- src/python/openvino_genai/__init__.pyi | 9 +- .../openvino_genai/py_openvino_genai.pyi | 89 ++++++++++++++++--- 2 files changed, 86 insertions(+), 12 deletions(-) diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 175df870eb..94846441f5 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -14,6 +14,7 @@ from openvino_genai.py_openvino_genai import ChunkStreamerBase from openvino_genai.py_openvino_genai import ContinuousBatchingPipeline from openvino_genai.py_openvino_genai import CppStdGenerator from openvino_genai.py_openvino_genai import DecodedResults +from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningParser from openvino_genai.py_openvino_genai import EncodedResults from openvino_genai.py_openvino_genai import FluxTransformer2DModel from openvino_genai.py_openvino_genai import GenerationConfig @@ -24,11 +25,16 @@ from openvino_genai.py_openvino_genai import Generator from openvino_genai.py_openvino_genai import Image2ImagePipeline from openvino_genai.py_openvino_genai import ImageGenerationConfig from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics +from openvino_genai.py_openvino_genai import IncrementalParserBase from openvino_genai.py_openvino_genai import InpaintingPipeline from openvino_genai.py_openvino_genai import KVCrushAnchorPointMode from openvino_genai.py_openvino_genai import KVCrushConfig from openvino_genai.py_openvino_genai import LLMPipeline +from openvino_genai.py_openvino_genai import Llama32JsonToolParser +from openvino_genai.py_openvino_genai import Llama32PythonicToolParser +from openvino_genai.py_openvino_genai import ParserBase from openvino_genai.py_openvino_genai import PerfMetrics +from openvino_genai.py_openvino_genai import Phi4ReasoningParser from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import RawPerfMetrics from openvino_genai.py_openvino_genai import SD3Transformer2DModel @@ -49,6 +55,7 @@ from openvino_genai.py_openvino_genai import Text2ImagePipeline from openvino_genai.py_openvino_genai import Text2SpeechDecodedResults from openvino_genai.py_openvino_genai import Text2SpeechPipeline from openvino_genai.py_openvino_genai import TextEmbeddingPipeline +from openvino_genai.py_openvino_genai import TextParserStreamer from openvino_genai.py_openvino_genai import TextRerankPipeline from openvino_genai.py_openvino_genai import TextStreamer from openvino_genai.py_openvino_genai import TokenizedInputs @@ -64,5 +71,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 82c0721e9a..3c4ade4515 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import collections.abc import openvino._pyopenvino import typing -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -493,6 +493,9 @@ class DecodedResults: def extended_perf_metrics(self) -> ExtendedPerfMetrics: ... @property + def parsed(self) -> list[...]: + ... + @property def perf_metrics(self) -> PerfMetrics: ... @property @@ -501,6 +504,18 @@ class DecodedResults: @property def texts(self) -> list[str]: ... +class DeepSeekR1ReasoningParser(IncrementalParserBase): + @staticmethod + def get_parser(name: str) -> IncrementalParserBase: + """ + Factory method to get parser by name. + """ + def __init__(self) -> None: + ... + def parse(self, msg: ..., previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: + """ + Parse is called with the full text. Returns a JsonContainer with parsed content. + """ class EncodedGenerationResult: """ @@ -899,6 +914,12 @@ class GenerationConfig: def num_return_sequences(self, arg0: typing.SupportsInt) -> None: ... @property + def parsers(self) -> list[str | openvino_genai.py_openvino_genai.ParserBase]: + ... + @parsers.setter + def parsers(self, arg0: collections.abc.Sequence[str | openvino_genai.py_openvino_genai.ParserBase]) -> None: + ... + @property def presence_penalty(self) -> float: ... @presence_penalty.setter @@ -1377,13 +1398,9 @@ class ImageGenerationPerfMetrics: class IncrementalParserBase: def __init__(self) -> None: ... - def is_active(self) -> bool: - """ - Indicates whether the parser is active and should be used during parsing. - """ - def parse(self, msg: collections.abc.Mapping[str, str], previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> dict[str, str]: + def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: """ - Parse is called every time new text delta is decoded. Returns a JsonContainer with parsed content. + Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output. """ class InpaintingPipeline: """ @@ -1741,6 +1758,30 @@ class LLMPipeline: ... def start_chat(self, system_message: str = '') -> None: ... +class Llama32JsonToolParser(ParserBase): + @staticmethod + def get_parser(name: str) -> ParserBase: + """ + Factory method to get parser by name. + """ + def __init__(self) -> None: + ... + def parse(self, text: dict) -> None: + """ + Parse is called with the full text. Returns a JsonContainer with parsed content. + """ +class Llama32PythonicToolParser(ParserBase): + @staticmethod + def get_parser(name: str) -> ParserBase: + """ + Factory method to get parser by name. + """ + def __init__(self) -> None: + ... + def parse(self, text: dict) -> None: + """ + Parse is called with the full text. Returns a JsonContainer with parsed content. + """ class MeanStdPair: def __init__(self) -> None: ... @@ -1755,7 +1796,7 @@ class MeanStdPair: class ParserBase: def __init__(self) -> None: ... - def parse(self, text: collections.abc.Mapping[str, str]) -> dict[str, str]: + def parse(self, text: dict) -> None: """ Parse is called with the full text. Returns a JsonContainer with parsed content. """ @@ -1861,6 +1902,18 @@ class PerfMetrics: @property def raw_metrics(self) -> RawPerfMetrics: ... +class Phi4ReasoningParser(IncrementalParserBase): + @staticmethod + def get_parser(name: str) -> IncrementalParserBase: + """ + Factory method to get parser by name. + """ + def __init__(self, starts_with_thinking: bool = False) -> None: + ... + def parse(self, msg: ..., previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: + """ + Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output. + """ class PipelineMetrics: """ @@ -3151,12 +3204,24 @@ class TextEmbeddingPipeline: """ Waits computed embeddings for a query """ -class TextParserStreamer: - def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[...] = []) -> None: +class TextParserStreamer(TextStreamer): + def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[openvino_genai.py_openvino_genai.IncrementalParserBase | str] = []) -> None: """ TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers. """ - def write(self, message: collections.abc.Mapping[str, str]) -> StreamingStatus: + def _write(self, message: str) -> bool | openvino_genai.py_openvino_genai.StreamingStatus: + """ + Write is called with a string message. Returns CallbackTypeVariant. This is a private method. + """ + def get_parsed_message(self) -> ...: + """ + Get the current parsed message + """ + def get_parsers(self) -> list[IncrementalParserBase]: + """ + Get the list of parsers + """ + def write(self, message: dict) -> StreamingStatus: """ Write is called with a JsonContainer. Returns StreamingStatus. """ @@ -3223,6 +3288,8 @@ class TextStreamer(StreamerBase): """ def __init__(self, tokenizer: Tokenizer, callback: collections.abc.Callable[[str], bool | openvino_genai.py_openvino_genai.StreamingStatus], detokenization_params: collections.abc.Mapping[str, typing.Any] = {}) -> None: ... + def end(self) -> None: + ... def write(self, token: typing.SupportsInt | collections.abc.Sequence[typing.SupportsInt]) -> StreamingStatus: ... class TokenizedInputs: From 94b8370dc0906f1ae84aa36960bd98f179045716 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 10:44:16 +0200 Subject: [PATCH 13/43] update stubs --- .../openvino_genai/py_openvino_genai.pyi | 18 ++--- src/python/py_openvino_genai.cpp | 13 +++- src/python/py_parsers.cpp | 77 +++++++++++++++++-- src/python/py_streamers.cpp | 15 +++- 4 files changed, 102 insertions(+), 21 deletions(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 3c4ade4515..451cd720db 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -493,7 +493,7 @@ class DecodedResults: def extended_perf_metrics(self) -> ExtendedPerfMetrics: ... @property - def parsed(self) -> list[...]: + def parsed(self) -> dict: ... @property def perf_metrics(self) -> PerfMetrics: @@ -512,9 +512,9 @@ class DeepSeekR1ReasoningParser(IncrementalParserBase): """ def __init__(self) -> None: ... - def parse(self, msg: ..., previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: + def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: """ - Parse is called with the full text. Returns a JsonContainer with parsed content. + Parse is called with the full text. Returns a dict with parsed content. """ class EncodedGenerationResult: """ @@ -1768,7 +1768,7 @@ class Llama32JsonToolParser(ParserBase): ... def parse(self, text: dict) -> None: """ - Parse is called with the full text. Returns a JsonContainer with parsed content. + Parse is called with the full text. Returns a dict with parsed content. """ class Llama32PythonicToolParser(ParserBase): @staticmethod @@ -1780,7 +1780,7 @@ class Llama32PythonicToolParser(ParserBase): ... def parse(self, text: dict) -> None: """ - Parse is called with the full text. Returns a JsonContainer with parsed content. + Parse is called with the full text. Returns a dict with parsed content. """ class MeanStdPair: def __init__(self) -> None: @@ -1798,7 +1798,7 @@ class ParserBase: ... def parse(self, text: dict) -> None: """ - Parse is called with the full text. Returns a JsonContainer with parsed content. + Parse is called with the full text. Returns a dict with parsed content. """ class PerfMetrics: """ @@ -1910,7 +1910,7 @@ class Phi4ReasoningParser(IncrementalParserBase): """ def __init__(self, starts_with_thinking: bool = False) -> None: ... - def parse(self, msg: ..., previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: + def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: """ Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output. """ @@ -3213,7 +3213,7 @@ class TextParserStreamer(TextStreamer): """ Write is called with a string message. Returns CallbackTypeVariant. This is a private method. """ - def get_parsed_message(self) -> ...: + def get_parsed_message(self) -> dict: """ Get the current parsed message """ @@ -3223,7 +3223,7 @@ class TextParserStreamer(TextStreamer): """ def write(self, message: dict) -> StreamingStatus: """ - Write is called with a JsonContainer. Returns StreamingStatus. + Write is called with a dict. Returns StreamingStatus. """ class TextRerankPipeline: """ diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp index aa2854480c..586b6b2d1f 100644 --- a/src/python/py_openvino_genai.cpp +++ b/src/python/py_openvino_genai.cpp @@ -93,7 +93,18 @@ PYBIND11_MODULE(py_openvino_genai, m) { .def(py::init<>()) .def_property_readonly("texts", [](const DecodedResults &dr) -> py::typing::List { return pyutils::handle_utf8((std::vector)dr); }) .def_readonly("scores", &DecodedResults::scores) - .def_readonly("parsed", &DecodedResults::parsed) + .def_property_readonly("parsed", [](const DecodedResults& dr) -> py::dict { + static py::object json_mod = py::module_::import("json"); + py::list result_dicts; + + for (const auto& parsed: dr.parsed) { + auto json_str = parsed.to_json_string(); + py::dict json_dict = json_mod.attr("loads")(json_str); + + result_dicts.append(json_dict); + } + return result_dicts; + }) .def_readonly("perf_metrics", &DecodedResults::perf_metrics) .def_readonly("extended_perf_metrics", &DecodedResults::extended_perf_metrics) .def("__str__", [](const DecodedResults &dr) -> py::str { diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index a3c255d1b6..cb19fb503c 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -13,18 +13,17 @@ namespace py = pybind11; -using ov::genai::JsonContainer; using ov::genai::IncrementalParserBase; using ov::genai::ParserVariant; using ov::genai::ParserBase; using ov::genai::ReasoningParser; using ov::genai::Phi4ReasoningParser; using ov::genai::DeepSeekR1ReasoningParser; +using ov::genai::JsonContainer; using ov::genai::Llama32JsonToolParser; using ov::genai::Llama32PythonicToolParser; using ov::genai::Tokenizer; using ov::genai::StreamingStatus; -using ov::genai::JsonContainer; namespace pyutils = ov::genai::pybind::utils; @@ -84,6 +83,32 @@ void call_parser(py::dict& msg, std::function fun } } +// wrapper to enhance calling incremental parser from Python +std::string call_incremental_parser( + IncrementalParserBase& parser, + py::dict& msg, + const std::string& previous_text, + std::string& delta_text, + const std::optional>& previous_tokens, + const std::optional>& delta_tokens, + std::function>&, + const std::optional>&)> func) { + auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg); + auto msg_cpp = JsonContainer(msg_anymap); + + auto res = func(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens); + + auto json_str = msg_cpp.to_json_string(); + py::dict result = json_mod.attr("loads")(json_str); + + // update msg with result + msg.clear(); + for (auto item : result) { + msg[item.first] = item.second; + } + return res; +} + } // namespace // TODO: double check/add more relevant docstrings for parsers. @@ -117,7 +142,25 @@ void init_parsers(py::module_& m) { py::class_, IncrementalParserBase>(m, "Phi4ReasoningParser") .def(py::init(), py::arg("starts_with_thinking") = false) .def("parse", - &Phi4ReasoningParser::parse, + [](Phi4ReasoningParser& self, + py::dict& msg, + const std::string& previous_text, + std::string& delta_text, + const std::optional>& previous_tokens = std::nullopt, + const std::optional>& delta_tokens = std::nullopt) { + return call_incremental_parser( + self, + msg, + previous_text, + delta_text, + previous_tokens, + delta_tokens, + [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, + const std::optional>& prev_tokens, + const std::optional>& delta_toks) { + return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); + }); + }, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.", py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt) @@ -126,8 +169,26 @@ void init_parsers(py::module_& m) { py::class_, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser") .def(py::init<>()) .def("parse", - &DeepSeekR1ReasoningParser::parse, - "Parse is called with the full text. Returns a JsonContainer with parsed content.", + [](DeepSeekR1ReasoningParser& self, + py::dict& msg, + const std::string& previous_text, + std::string& delta_text, + const std::optional>& previous_tokens = std::nullopt, + const std::optional>& delta_tokens = std::nullopt) { + return call_incremental_parser( + self, + msg, + previous_text, + delta_text, + previous_tokens, + delta_tokens, + [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, + const std::optional>& prev_tokens, + const std::optional>& delta_toks) { + return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); + }); + }, + "Parse is called with the full text. Returns a dict with parsed content.", py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt) .def_static("get_parser", &DeepSeekR1ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name."); @@ -139,7 +200,7 @@ void init_parsers(py::module_& m) { return call_parser(msg, [&self](JsonContainer& m) {return self.parse(m);}); }, py::arg("text"), - "Parse is called with the full text. Returns a JsonContainer with parsed content."); + "Parse is called with the full text. Returns a dict with parsed content."); py::class_, ParserBase>(m, "Llama32JsonToolParser") .def(py::init<>()) @@ -148,7 +209,7 @@ void init_parsers(py::module_& m) { return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); }, py::arg("text"), - "Parse is called with the full text. Returns a JsonContainer with parsed content.") + "Parse is called with the full text. Returns a dict with parsed content.") .def_static("get_parser", &Llama32JsonToolParser::get_parser, py::arg("name"), "Factory method to get parser by name."); py::class_, ParserBase>(m, "Llama32PythonicToolParser") @@ -158,6 +219,6 @@ void init_parsers(py::module_& m) { return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); }, py::arg("text"), - "Parse is called with the full text. Returns a JsonContainer with parsed content.") + "Parse is called with the full text. Returns a dict with parsed content.") .def_static("get_parser", &Llama32PythonicToolParser::get_parser, py::arg("name"), "Factory method to get parser by name."); } diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 912e33d4be..75b0fdf9ad 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -22,7 +22,6 @@ using ov::genai::TextParserStreamer; using ov::genai::IncrementalParserBase; using ov::genai::JsonContainer; using ov::genai::Tokenizer; -using ov::genai::JsonContainer; namespace pyutils = ov::genai::pybind::utils; @@ -167,13 +166,23 @@ void init_streamers(py::module_& m) { return derived->write(message); }, py::arg("message"), - "Write is called with a JsonContainer. Returns StreamingStatus.") + "Write is called with a dict. Returns StreamingStatus.") .def("_write", py::overload_cast(&TextParserStreamer::write), py::arg("message"), "Write is called with a string message. Returns CallbackTypeVariant. This is a private method.") - .def("get_parsed_message", &TextParserStreamer::get_parsed_message, "Get the current parsed message") + .def("get_parsed_message", + [](TextParserStreamer& self) { + static py::object json_mod = py::module_::import("json"); + + auto res = self.get_parsed_message(); + auto json_str = res.to_json_string(); + py::dict json_dict = json_mod.attr("loads")(json_str); + + return json_dict; + + }, "Get the current parsed message") .def("get_parsers", &TextParserStreamer::get_parsers, "Get the list of parsers"); } From 7759b48095a3193675984907eff63f3401bbf89d Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 11:44:41 +0200 Subject: [PATCH 14/43] replace starts_with_thinking -> starts_with_thinking; some other corrections --- src/cpp/include/openvino/genai/parsers.hpp | 36 +++----- .../include/openvino/genai/text_streamer.hpp | 2 +- src/cpp/src/parsers.cpp | 83 ++++++++++++------- src/cpp/src/text_streamer.cpp | 2 +- .../openvino_genai/py_openvino_genai.pyi | 2 +- src/python/py_parsers.cpp | 3 +- 6 files changed, 69 insertions(+), 59 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 6d2dac8fd0..320302aa0d 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -30,14 +30,12 @@ class IncrementalParserBase { static std::shared_ptr get_parser(std::string name); }; -// Forward declaration -class ReasoningParserImpl; - class ReasoningParser : public IncrementalParserBase { private: + class ReasoningParserImpl; std::shared_ptr m_impl; public: - ReasoningParser(bool starts_with_thinking = true, + ReasoningParser(bool expect_open_tag = true, bool keep_original_content = true); std::string parse( @@ -51,67 +49,55 @@ class ReasoningParser : public IncrementalParserBase { class DeepSeekR1ReasoningParser : public ReasoningParser { public: - DeepSeekR1ReasoningParser(bool starts_with_thinking = true) : ReasoningParser(starts_with_thinking) {}; + explicit DeepSeekR1ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {}; static std::string name() { return "DeepSeekR1ReasoningParser"; } }; class Phi4ReasoningParser : public ReasoningParser { public: - Phi4ReasoningParser(bool starts_with_thinking = false) : ReasoningParser(starts_with_thinking) {}; + explicit Phi4ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {}; static std::string name() { return "Phi4ReasoningParser"; } }; class ParserBase { public: ParserBase() = default; - virtual JsonContainer parse(JsonContainer& text) = 0; static std::shared_ptr get_parser(std::string name); }; -using ParserVariant = std::variant, std::string>; - class Llama32PythonicToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: // TODO: Check that vLLM has the same default. - Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} + explicit Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} JsonContainer parse(JsonContainer& input) override; static std::string name() { return "Llama32PythonicToolParser"; } private: - bool m_keep_original_content = true; + bool m_keep_original_content; }; class Llama32JsonToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: // TODO: Check that vLLM has the same default. - Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} + explicit Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} JsonContainer parse(JsonContainer& input) override; static std::string name() { return "Llama32JsonToolParser"; } private: - bool m_keep_original_content = true; + bool m_keep_original_content; }; class BaseReasoningParser : public ParserBase{ public: - BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "", std::string close_tag = "") : - m_expect_open_tag(expect_open_tag), - m_keep_original_content(keep_original_content), - m_open_tag(open_tag), - m_close_tag(close_tag) {} - + BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "", std::string close_tag = ""); JsonContainer parse(JsonContainer& input) override; - private: - bool m_expect_open_tag = true; - bool m_keep_original_content = true; - std::string m_open_tag = ""; - std::string m_close_tag = ""; + class BaseReasoningParserImpl; + std::shared_ptr m_impl; }; - } // namespace genai } // namespace ov diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 4ac5758b73..43f383ca83 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -49,7 +49,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase { class TextParserStreamer : public TextStreamer { public: - TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers = {}); + TextParserStreamer(const Tokenizer& tokenizer, std::vector, std::string>> parsers = {}); virtual StreamingStatus write(JsonContainer& message) = 0; diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 3026f44599..94abed8d5f 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -11,11 +11,11 @@ using json = nlohmann::json; namespace ov::genai { -class ReasoningParserImpl { +class ReasoningParser::ReasoningParserImpl { private: - bool m_starts_with_thinking = true; + bool m_expect_open_tag = true; bool m_first_run = true; - bool m_keep_original_content = true; + bool m_keep_original_content; bool m_think_tag_opened = false; std::string m_open_tag = ""; std::string m_close_tag = ""; @@ -24,9 +24,9 @@ class ReasoningParserImpl { public: bool m_deactivated = false; ReasoningParserImpl() = default; - ReasoningParserImpl(bool starts_with_thinking = true, + ReasoningParserImpl(bool expect_open_tag = true, bool keep_original_content = true) - : m_starts_with_thinking(starts_with_thinking), + : m_expect_open_tag(expect_open_tag), m_keep_original_content(keep_original_content) {} std::string parse( @@ -39,7 +39,7 @@ class ReasoningParserImpl { if (m_deactivated) { return delta_text; } - if (m_starts_with_thinking && m_first_run) { + if (m_expect_open_tag && m_first_run) { m_think_tag_opened = true; } m_first_run = false; @@ -56,7 +56,7 @@ class ReasoningParserImpl { auto reason_str = msg["reasoning_content"].get_string(); auto content_str = msg["content"].get_string(); - if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) { + if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_expect_open_tag) { OPENVINO_ASSERT(m_open_tag.find(m_text_cache) != std::string::npos, "m_text_cache should be a prefix of m_open_tag"); // Thinking has started @@ -149,8 +149,8 @@ class ReasoningParserImpl { } }; -ReasoningParser::ReasoningParser(bool starts_with_thinking, bool keep_original_content) { - m_impl = std::make_shared(starts_with_thinking, keep_original_content); +ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content) { + m_impl = std::make_shared(expect_open_tag, keep_original_content); } std::string ReasoningParser::parse( @@ -222,26 +222,51 @@ JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) { return message; } -JsonContainer BaseReasoningParser::parse(JsonContainer& input) { - JsonContainer res; - std::string reasoning_content; - std::string content = input["content"].get_string(); - - size_t start = content.find(m_open_tag); - size_t end = content.find(m_close_tag); - - if (start != std::string::npos && end != std::string::npos && end > start) { - reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size())); - if (!m_keep_original_content) { - // Remove ... from content - input["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size()); +class BaseReasoningParser::BaseReasoningParserImpl { +public: + BaseReasoningParserImpl(bool expect_open_tag, + bool keep_original_content, + std::string open_tag, + std::string close_tag): + m_expect_open_tag(expect_open_tag), + m_keep_original_content(keep_original_content), + m_open_tag(open_tag), + m_close_tag(close_tag) {}; + + JsonContainer parse(JsonContainer& input) { + JsonContainer res; + std::string reasoning_content; + std::string content = input["content"].get_string(); + + size_t start = content.find(m_open_tag); + size_t end = content.find(m_close_tag); + + if (start != std::string::npos && end != std::string::npos && end > start) { + reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size())); + if (!m_keep_original_content) { + // Remove ... from content + input["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size()); + } + } else { + reasoning_content = ""; } - } else { - reasoning_content = ""; + + input["reasoning_content"] = reasoning_content; + return input; } +private: + bool m_expect_open_tag; + bool m_keep_original_content; + std::string m_open_tag; + std::string m_close_tag; +}; - input["reasoning_content"] = reasoning_content; - return input; +BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, std::string open_tag, std::string close_tag) { + m_impl = std::make_shared(expect_open_tag, keep_original_content, open_tag, close_tag); +} + +JsonContainer BaseReasoningParser::parse(JsonContainer& input) { + return m_impl->parse(input); } std::map()>> registered_incremental_parsers; @@ -249,9 +274,9 @@ std::map()>> registered_b // static initializer to register available buildin parsers static bool register_backends() { - registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = []() { return std::make_shared(/*starts_with_thinking*/ true); }; - registered_incremental_parsers[Phi4ReasoningParser::name()] = []() { return std::make_shared(/*starts_with_thinking*/ false); }; - + registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = []() { return std::make_shared(/*expect_open_tag*/ true); }; + registered_incremental_parsers[Phi4ReasoningParser::name()] = []() { return std::make_shared(/*expect_open_tag*/ false); }; + registered_base_parsers[Llama32PythonicToolParser::name()] = []() { return std::make_shared(); }; // TODO: Add more parsers and register them here. diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index a34b575519..437352358c 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -124,7 +124,7 @@ void TextStreamer::end() { StreamerBase::~StreamerBase() = default; -TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector parsers) +TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector, std::string>> parsers) : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant { return this->write(s); }) { diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 451cd720db..256834c92f 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -1908,7 +1908,7 @@ class Phi4ReasoningParser(IncrementalParserBase): """ Factory method to get parser by name. """ - def __init__(self, starts_with_thinking: bool = False) -> None: + def __init__(self, expect_open_tag: bool = False) -> None: ... def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: """ diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index cb19fb503c..37b78dc39a 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -14,7 +14,6 @@ namespace py = pybind11; using ov::genai::IncrementalParserBase; -using ov::genai::ParserVariant; using ov::genai::ParserBase; using ov::genai::ReasoningParser; using ov::genai::Phi4ReasoningParser; @@ -140,7 +139,7 @@ void init_parsers(py::module_& m) { "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output."); py::class_, IncrementalParserBase>(m, "Phi4ReasoningParser") - .def(py::init(), py::arg("starts_with_thinking") = false) + .def(py::init(), py::arg("expect_open_tag") = false) .def("parse", [](Phi4ReasoningParser& self, py::dict& msg, From 5470b63481efe4cd5ad2cd68ab79eab3e5b92436 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 12:22:41 +0200 Subject: [PATCH 15/43] remove std::variant> --- .../openvino/genai/generation_config.hpp | 2 +- src/cpp/include/openvino/genai/parsers.hpp | 13 ----- .../include/openvino/genai/text_streamer.hpp | 3 +- src/cpp/src/llm/pipeline.cpp | 37 ++++--------- src/cpp/src/parsers.cpp | 52 ------------------- src/cpp/src/text_streamer.cpp | 17 +----- .../openvino_genai/py_openvino_genai.pyi | 30 ++--------- src/python/py_parsers.cpp | 12 ++--- src/python/py_streamers.cpp | 8 ++- tests/python_tests/test_parsers.py | 11 ++-- 10 files changed, 31 insertions(+), 154 deletions(-) diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index 1d832c3c8e..ad328b3d1f 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -350,7 +350,7 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { bool is_structured_output_generation() const; // parsers - std::vector>> parsers; + std::vector> parsers; OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release") bool is_speculative_decoding() const; diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 320302aa0d..283b91f775 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -4,10 +4,6 @@ #pragma once #include #include -#include -#include -#include -#include #include #include "openvino/genai/json_container.hpp" @@ -26,8 +22,6 @@ class IncrementalParserBase { const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) = 0; - - static std::shared_ptr get_parser(std::string name); }; class ReasoningParser : public IncrementalParserBase { @@ -50,30 +44,25 @@ class ReasoningParser : public IncrementalParserBase { class DeepSeekR1ReasoningParser : public ReasoningParser { public: explicit DeepSeekR1ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {}; - static std::string name() { return "DeepSeekR1ReasoningParser"; } }; class Phi4ReasoningParser : public ReasoningParser { public: explicit Phi4ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {}; - static std::string name() { return "Phi4ReasoningParser"; } }; class ParserBase { public: ParserBase() = default; virtual JsonContainer parse(JsonContainer& text) = 0; - static std::shared_ptr get_parser(std::string name); }; class Llama32PythonicToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: - // TODO: Check that vLLM has the same default. explicit Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} JsonContainer parse(JsonContainer& input) override; - static std::string name() { return "Llama32PythonicToolParser"; } private: bool m_keep_original_content; }; @@ -81,11 +70,9 @@ class Llama32PythonicToolParser : public ParserBase { class Llama32JsonToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: - // TODO: Check that vLLM has the same default. explicit Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} JsonContainer parse(JsonContainer& input) override; - static std::string name() { return "Llama32JsonToolParser"; } private: bool m_keep_original_content; }; diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 43f383ca83..5b6fd6d16e 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -49,14 +49,13 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase { class TextParserStreamer : public TextStreamer { public: - TextParserStreamer(const Tokenizer& tokenizer, std::vector, std::string>> parsers = {}); + TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers = {}); virtual StreamingStatus write(JsonContainer& message) = 0; CallbackTypeVariant write(std::string message); JsonContainer get_parsed_message() const { return m_parsed_message; } - std::vector> get_parsers() const { return m_parsers; } private: JsonContainer m_parsed_message; std::string m_text_buffer; diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 899da4e39f..6bd601f49c 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -219,36 +219,21 @@ DecodedResults LLMPipeline::generate( return res; } - std::vector> parsers; - if (generation_config.has_value() && !(*generation_config).parsers.empty()) { - for (auto& parser_variant : (*generation_config).parsers) { - if (std::holds_alternative(parser_variant)) { - auto parser_name = std::get(parser_variant); - auto parser = ParserBase::get_parser(parser_name); - if (!parser) { - OPENVINO_THROW("Parser with name ", parser_name, " is not registered"); - } - parsers.push_back(parser); - } else if (std::holds_alternative>(parser_variant)) { - auto parser = std::get>(parser_variant); - parsers.push_back(parser); - } - } + if (!generation_config.has_value() || (*generation_config).parsers.empty()) { + return res; } - - res.parsed.resize(res.texts.size()); + std::vector> parsers = (*generation_config).parsers; + res.parsed.resize(res.texts.size()); // Apply Base parsers sequentially even if IncrementalParser has run. - if (!parsers.empty()) { - for (size_t i = 0; i < res.texts.size(); ++i) { - JsonContainer msg; - msg["content"] = res.texts[i]; - for (auto& parser: parsers) { - // TODO: Check the state of incremental parser and reset if necessary - parser->parse(msg); - } - res.parsed[i] = msg; + for (size_t i = 0; i < res.texts.size(); ++i) { + JsonContainer msg; + msg["content"] = res.texts[i]; + for (auto& parser: parsers) { + // TODO: Check the state of incremental parser and reset if necessary + parser->parse(msg); } + res.parsed[i] = msg; } return res; diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 94abed8d5f..dcfa6d2378 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -269,56 +269,4 @@ JsonContainer BaseReasoningParser::parse(JsonContainer& input) { return m_impl->parse(input); } -std::map()>> registered_incremental_parsers; -std::map()>> registered_base_parsers; - -// static initializer to register available buildin parsers -static bool register_backends() { - registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = []() { return std::make_shared(/*expect_open_tag*/ true); }; - registered_incremental_parsers[Phi4ReasoningParser::name()] = []() { return std::make_shared(/*expect_open_tag*/ false); }; - - registered_base_parsers[Llama32PythonicToolParser::name()] = []() { return std::make_shared(); }; - - // TODO: Add more parsers and register them here. - return true; -} - -// Ensure the backends are registered before main -static bool are_backends_registered = register_backends(); - -std::shared_ptr IncrementalParserBase::get_parser(std::string name) { - if (!are_backends_registered) { - register_backends(); - } - - if (registered_incremental_parsers.find(name) != registered_incremental_parsers.end()) { - return registered_incremental_parsers[name](); - } - return nullptr; -} - -std::shared_ptr ParserBase::get_parser(std::string name) { - if (!are_backends_registered) { - register_backends(); - } - - if (registered_base_parsers.find(name) != registered_base_parsers.end()) { - return registered_base_parsers[name](); - } - return nullptr; -} - -static std::vector get_parsers_names() { - std::vector names; - for (const auto& [name, _] : registered_incremental_parsers) { - names.push_back(name); - } - for (const auto& [name, _] : registered_base_parsers) { - names.push_back(name); - } - return names; -} - - - } // namespace ov::genai diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index 437352358c..7738c455e0 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -124,23 +124,10 @@ void TextStreamer::end() { StreamerBase::~StreamerBase() = default; -TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector, std::string>> parsers) +TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers) : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant { return this->write(s); - }) { - for (auto& parser : parsers) { - if (std::holds_alternative(parser)) { - auto parser_name = std::get(parser); - auto parser = IncrementalParserBase::get_parser(parser_name); - if (!parser) { - OPENVINO_THROW("Parser with name " + parser_name + " is not registered"); - } - m_parsers.push_back(parser); - } else { - m_parsers.push_back(std::get>(parser)); - } - } - } + }), m_parsers{parsers} {} CallbackTypeVariant TextParserStreamer::write(std::string message) { for (auto& parser: m_parsers) { diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 256834c92f..8241769460 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -505,11 +505,6 @@ class DecodedResults: def texts(self) -> list[str]: ... class DeepSeekR1ReasoningParser(IncrementalParserBase): - @staticmethod - def get_parser(name: str) -> IncrementalParserBase: - """ - Factory method to get parser by name. - """ def __init__(self) -> None: ... def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: @@ -914,10 +909,10 @@ class GenerationConfig: def num_return_sequences(self, arg0: typing.SupportsInt) -> None: ... @property - def parsers(self) -> list[str | openvino_genai.py_openvino_genai.ParserBase]: + def parsers(self) -> list[ParserBase]: ... @parsers.setter - def parsers(self, arg0: collections.abc.Sequence[str | openvino_genai.py_openvino_genai.ParserBase]) -> None: + def parsers(self, arg0: collections.abc.Sequence[ParserBase]) -> None: ... @property def presence_penalty(self) -> float: @@ -1759,11 +1754,6 @@ class LLMPipeline: def start_chat(self, system_message: str = '') -> None: ... class Llama32JsonToolParser(ParserBase): - @staticmethod - def get_parser(name: str) -> ParserBase: - """ - Factory method to get parser by name. - """ def __init__(self) -> None: ... def parse(self, text: dict) -> None: @@ -1771,11 +1761,6 @@ class Llama32JsonToolParser(ParserBase): Parse is called with the full text. Returns a dict with parsed content. """ class Llama32PythonicToolParser(ParserBase): - @staticmethod - def get_parser(name: str) -> ParserBase: - """ - Factory method to get parser by name. - """ def __init__(self) -> None: ... def parse(self, text: dict) -> None: @@ -1903,11 +1888,6 @@ class PerfMetrics: def raw_metrics(self) -> RawPerfMetrics: ... class Phi4ReasoningParser(IncrementalParserBase): - @staticmethod - def get_parser(name: str) -> IncrementalParserBase: - """ - Factory method to get parser by name. - """ def __init__(self, expect_open_tag: bool = False) -> None: ... def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: @@ -3205,7 +3185,7 @@ class TextEmbeddingPipeline: Waits computed embeddings for a query """ class TextParserStreamer(TextStreamer): - def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[openvino_genai.py_openvino_genai.IncrementalParserBase | str] = []) -> None: + def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[IncrementalParserBase] = []) -> None: """ TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers. """ @@ -3217,10 +3197,6 @@ class TextParserStreamer(TextStreamer): """ Get the current parsed message """ - def get_parsers(self) -> list[IncrementalParserBase]: - """ - Get the list of parsers - """ def write(self, message: dict) -> StreamingStatus: """ Write is called with a dict. Returns StreamingStatus. diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 37b78dc39a..c4842ba132 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -162,8 +162,7 @@ void init_parsers(py::module_& m) { }, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.", py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), - py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt) - .def_static("get_parser", &Phi4ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name."); + py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt); py::class_, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser") .def(py::init<>()) @@ -189,8 +188,7 @@ void init_parsers(py::module_& m) { }, "Parse is called with the full text. Returns a dict with parsed content.", py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), - py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt) - .def_static("get_parser", &DeepSeekR1ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name."); + py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt); py::class_>(m, "ParserBase") .def(py::init<>()) @@ -208,8 +206,7 @@ void init_parsers(py::module_& m) { return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); }, py::arg("text"), - "Parse is called with the full text. Returns a dict with parsed content.") - .def_static("get_parser", &Llama32JsonToolParser::get_parser, py::arg("name"), "Factory method to get parser by name."); + "Parse is called with the full text. Returns a dict with parsed content."); py::class_, ParserBase>(m, "Llama32PythonicToolParser") .def(py::init<>()) @@ -218,6 +215,5 @@ void init_parsers(py::module_& m) { return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); }, py::arg("text"), - "Parse is called with the full text. Returns a dict with parsed content.") - .def_static("get_parser", &Llama32PythonicToolParser::get_parser, py::arg("name"), "Factory method to get parser by name."); + "Parse is called with the full text. Returns a dict with parsed content."); } diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 75b0fdf9ad..47ac549f70 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -150,11 +150,11 @@ void init_streamers(py::module_& m) { // TODO: double check/add more relevant docstrings for TextParserStreamer. py::class_, TextStreamer>(m, "TextParserStreamer") .def(py::init([](const Tokenizer& tokenizer, - std::vector, std::string>> parsers) { + std::vector> parsers) { return std::make_shared(tokenizer, parsers); }), py::arg("tokenizer"), - py::arg("parsers") = std::vector, std::string>>({}), + py::arg("parsers") = std::vector>(), "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.") .def("write", [](TextParserStreamer& self, py::dict& message) { @@ -182,7 +182,5 @@ void init_streamers(py::module_& m) { return json_dict; - }, "Get the current parsed message") - - .def("get_parsers", &TextParserStreamer::get_parsers, "Get the list of parsers"); + }, "Get the current parsed message"); } diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index f273f13918..8a7117ad2c 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -64,7 +64,7 @@ class CustomStreamer(TextParserStreamer): def write(self, message): msg.update(message) return StreamingStatus.RUNNING - streamer = CustomStreamer(genai_tokenizer, parsers=["Phi4ReasoningParser"]) + streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()]) msg = {} for subword in stream_string: @@ -98,7 +98,7 @@ class CustomStreamer(TextParserStreamer): def write(self, message): msg.update(message) return StreamingStatus.RUNNING - streamer = CustomStreamer(genai_tokenizer, parsers=["Phi4ReasoningParser"]) + streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()]) msg = {} for subword in split_answer: @@ -139,9 +139,10 @@ def write(self, message): if "content" in message: print(message["content"]) return StreamingStatus.RUNNING - - streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"]) - + + streamer = TextParserStreamer(genai_tokenizer, parsers=[DeepSeekR1ReasoningParser()]) + breakpoint() + msg = {} stream_string = [ "<|begin▁of▁sentence|>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", From 62bf17a320b7059e3f16abcc151e32ded17acbd2 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 12:25:44 +0200 Subject: [PATCH 16/43] hide leftovers to m_pimpl --- src/cpp/include/openvino/genai/parsers.hpp | 12 +- src/cpp/src/parsers.cpp | 127 +++++++++++++-------- 2 files changed, 84 insertions(+), 55 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 283b91f775..3a00e631b2 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -60,21 +60,21 @@ class ParserBase { class Llama32PythonicToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: - explicit Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} - + explicit Llama32PythonicToolParser(bool keep_original_content = true); JsonContainer parse(JsonContainer& input) override; private: - bool m_keep_original_content; + class Llama32PythonicToolParserImpl; + std::shared_ptr m_impl; }; class Llama32JsonToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: - explicit Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {} - + explicit Llama32JsonToolParser(bool keep_original_content = true); JsonContainer parse(JsonContainer& input) override; private: - bool m_keep_original_content; + class Llama32JsonToolParserImpl; + std::shared_ptr m_impl; }; class BaseReasoningParser : public ParserBase{ diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index dcfa6d2378..9bfbe446cc 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -163,63 +163,92 @@ std::string ReasoningParser::parse( return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens); } -JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) { - // Input example - // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; - - // Regex to capture the [...] part - std::smatch m; - const std::string& text = input["content"].get_string(); - std::regex r(R"(\[.*?\])"); - if (!std::regex_search(text, m, r)) { +class Llama32PythonicToolParser::Llama32PythonicToolParserImpl { +public: + Llama32PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} + bool m_keep_original_content; + + JsonContainer parse(JsonContainer& input) { + // Input example + // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; + + // Regex to capture the [...] part + std::smatch m; + const std::string& text = input["content"].get_string(); + std::regex r(R"(\[.*?\])"); + if (!std::regex_search(text, m, r)) { + return input; + } + + // Strip outer [ ] + std::string call = m.str().substr(1, m.str().size() - 2); + + // Split function name and arguments + input["tool_calls"] = JsonContainer::array(); + + size_t pos = call.find('('); + std::string name = call.substr(0, pos); + std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...) + + + JsonContainer kv; + // Parse arguments of the form key='value' + std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")"); + auto it = std::sregex_iterator(args.begin(), args.end(), arg_re); + for (; it != std::sregex_iterator(); ++it) { + kv[std::string((*it)[1])] = std::string((*it)[2]); + } + + input["tool_calls"] = JsonContainer::array(); + input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}})); + + if (!m_keep_original_content) { + input["content"] = regex_replace(text, r, ""); + } return input; } +}; - // Strip outer [ ] - std::string call = m.str().substr(1, m.str().size() - 2); - - // Split function name and arguments - input["tool_calls"] = JsonContainer::array(); - - size_t pos = call.find('('); - std::string name = call.substr(0, pos); - std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...) - - - JsonContainer kv; - // Parse arguments of the form key='value' - std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")"); - auto it = std::sregex_iterator(args.begin(), args.end(), arg_re); - for (; it != std::sregex_iterator(); ++it) { - kv[std::string((*it)[1])] = std::string((*it)[2]); - } - - input["tool_calls"] = JsonContainer::array(); - input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}})); - - if (!m_keep_original_content) { - input["content"] = regex_replace(text, r, ""); - } - return input; +Llama32PythonicToolParser::Llama32PythonicToolParser(bool keep_original_content) { + m_impl = std::make_shared(keep_original_content); +} + +JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) { + return m_impl->parse(input); } -JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) { - // Find JSON in the message - std::string msg_content = message["content"].get_string(); +class Llama32JsonToolParser::Llama32JsonToolParserImpl { +private: + bool m_keep_original_content; +public: + Llama32JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} + + JsonContainer parse(JsonContainer& message) { + // Find JSON in the message + std::string msg_content = message["content"].get_string(); - size_t json_start = msg_content.find('{'); - size_t json_end = msg_content.rfind('}'); - if (json_start == std::string::npos || json_end == std::string::npos || json_end <= json_start) { + size_t json_start = msg_content.find('{'); + size_t json_end = msg_content.rfind('}'); + if (json_start == std::string::npos || json_end == std::string::npos || json_end <= json_start) { + return message; + } + auto res = JsonContainer::array(); + res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1))); + message["tool_calls"] = res; + + if (!m_keep_original_content) { + message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1); + } return message; } - auto res = JsonContainer::array(); - res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1))); - message["tool_calls"] = res; - - if (!m_keep_original_content) { - message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1); - } - return message; +}; + +Llama32JsonToolParser::Llama32JsonToolParser(bool keep_original_content) { + m_impl = std::make_shared(keep_original_content); +} + +JsonContainer Llama32JsonToolParser::parse(JsonContainer& input) { + return m_impl->parse(input); } class BaseReasoningParser::BaseReasoningParserImpl { From 4f757062049a95805218ff2767a79e5c52f418ce Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 12:38:27 +0200 Subject: [PATCH 17/43] move defaults from class members to ctor default arguments --- src/cpp/include/openvino/genai/parsers.hpp | 4 +++- src/cpp/src/parsers.cpp | 20 ++++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 3a00e631b2..007e52e6cf 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -30,7 +30,9 @@ class ReasoningParser : public IncrementalParserBase { std::shared_ptr m_impl; public: ReasoningParser(bool expect_open_tag = true, - bool keep_original_content = true); + bool keep_original_content = true, + std::string open_tag="", + std::string close_tag=""); std::string parse( JsonContainer& msg, diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 9bfbe446cc..da4b433946 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -13,21 +13,25 @@ namespace ov::genai { class ReasoningParser::ReasoningParserImpl { private: - bool m_expect_open_tag = true; + bool m_expect_open_tag; bool m_first_run = true; bool m_keep_original_content; bool m_think_tag_opened = false; - std::string m_open_tag = ""; - std::string m_close_tag = ""; + std::string m_open_tag; + std::string m_close_tag; std::string m_text_cache = ""; std::map accumulated_parsed; public: bool m_deactivated = false; ReasoningParserImpl() = default; - ReasoningParserImpl(bool expect_open_tag = true, - bool keep_original_content = true) + ReasoningParserImpl(bool expect_open_tag, + bool keep_original_content, + std::string open_tag, + std::string close_tag) : m_expect_open_tag(expect_open_tag), - m_keep_original_content(keep_original_content) {} + m_keep_original_content(keep_original_content), + m_open_tag(std::move(open_tag)), + m_close_tag(std::move(close_tag)) {} std::string parse( JsonContainer& msg, @@ -149,8 +153,8 @@ class ReasoningParser::ReasoningParserImpl { } }; -ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content) { - m_impl = std::make_shared(expect_open_tag, keep_original_content); +ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, std::string open_tag, std::string close_tag) { + m_impl = std::make_shared(expect_open_tag, keep_original_content, std::move(open_tag), std::move(close_tag)); } std::string ReasoningParser::parse( From edc2c3e7a4ecd0e350bdb507520f3d46f2b19375 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 12:43:57 +0200 Subject: [PATCH 18/43] return void instead of JsonContainer and keep modifying argument by reference --- src/cpp/include/openvino/genai/parsers.hpp | 8 +++---- src/cpp/src/parsers.cpp | 26 +++++++++------------- src/python/py_parsers.cpp | 12 +++++----- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 007e52e6cf..4837f93289 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -56,14 +56,14 @@ class Phi4ReasoningParser : public ReasoningParser { class ParserBase { public: ParserBase() = default; - virtual JsonContainer parse(JsonContainer& text) = 0; + virtual void parse(JsonContainer& text) = 0; }; class Llama32PythonicToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: explicit Llama32PythonicToolParser(bool keep_original_content = true); - JsonContainer parse(JsonContainer& input) override; + void parse(JsonContainer& input) override; private: class Llama32PythonicToolParserImpl; std::shared_ptr m_impl; @@ -73,7 +73,7 @@ class Llama32JsonToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: explicit Llama32JsonToolParser(bool keep_original_content = true); - JsonContainer parse(JsonContainer& input) override; + void parse(JsonContainer& input) override; private: class Llama32JsonToolParserImpl; std::shared_ptr m_impl; @@ -82,7 +82,7 @@ class Llama32JsonToolParser : public ParserBase { class BaseReasoningParser : public ParserBase{ public: BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "", std::string close_tag = ""); - JsonContainer parse(JsonContainer& input) override; + void parse(JsonContainer& input) override; private: class BaseReasoningParserImpl; std::shared_ptr m_impl; diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index da4b433946..66976339c5 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -172,7 +172,7 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl { Llama32PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} bool m_keep_original_content; - JsonContainer parse(JsonContainer& input) { + void parse(JsonContainer& input) { // Input example // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; @@ -181,7 +181,7 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl { const std::string& text = input["content"].get_string(); std::regex r(R"(\[.*?\])"); if (!std::regex_search(text, m, r)) { - return input; + return; } // Strip outer [ ] @@ -209,7 +209,6 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl { if (!m_keep_original_content) { input["content"] = regex_replace(text, r, ""); } - return input; } }; @@ -217,8 +216,8 @@ Llama32PythonicToolParser::Llama32PythonicToolParser(bool keep_original_content) m_impl = std::make_shared(keep_original_content); } -JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) { - return m_impl->parse(input); +void Llama32PythonicToolParser::parse(JsonContainer& input) { + m_impl->parse(input); } class Llama32JsonToolParser::Llama32JsonToolParserImpl { @@ -227,14 +226,14 @@ class Llama32JsonToolParser::Llama32JsonToolParserImpl { public: Llama32JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} - JsonContainer parse(JsonContainer& message) { + void parse(JsonContainer& message) { // Find JSON in the message std::string msg_content = message["content"].get_string(); size_t json_start = msg_content.find('{'); size_t json_end = msg_content.rfind('}'); if (json_start == std::string::npos || json_end == std::string::npos || json_end <= json_start) { - return message; + return; } auto res = JsonContainer::array(); res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1))); @@ -243,7 +242,6 @@ class Llama32JsonToolParser::Llama32JsonToolParserImpl { if (!m_keep_original_content) { message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1); } - return message; } }; @@ -251,8 +249,8 @@ Llama32JsonToolParser::Llama32JsonToolParser(bool keep_original_content) { m_impl = std::make_shared(keep_original_content); } -JsonContainer Llama32JsonToolParser::parse(JsonContainer& input) { - return m_impl->parse(input); +void Llama32JsonToolParser::parse(JsonContainer& input) { + m_impl->parse(input); } class BaseReasoningParser::BaseReasoningParserImpl { @@ -266,8 +264,7 @@ class BaseReasoningParser::BaseReasoningParserImpl { m_open_tag(open_tag), m_close_tag(close_tag) {}; - JsonContainer parse(JsonContainer& input) { - JsonContainer res; + void parse(JsonContainer& input) { std::string reasoning_content; std::string content = input["content"].get_string(); @@ -285,7 +282,6 @@ class BaseReasoningParser::BaseReasoningParserImpl { } input["reasoning_content"] = reasoning_content; - return input; } private: bool m_expect_open_tag; @@ -298,8 +294,8 @@ BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_origina m_impl = std::make_shared(expect_open_tag, keep_original_content, open_tag, close_tag); } -JsonContainer BaseReasoningParser::parse(JsonContainer& input) { - return m_impl->parse(input); +void BaseReasoningParser::parse(JsonContainer& input) { + m_impl->parse(input); } } // namespace ov::genai diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index c4842ba132..39d83beb2d 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -53,9 +53,9 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase { class ConstructableParserBase: public ParserBase { public: - JsonContainer parse(JsonContainer& text) override { + void parse(JsonContainer& text) override { PYBIND11_OVERRIDE_PURE( - JsonContainer, // Return type + void, // Return type ParserBase, // Parent class parse, // Name of function in C++ (must match Python name) text // Argument(s) @@ -66,7 +66,7 @@ class ConstructableParserBase: public ParserBase { static py::object json_mod = py::module_::import("json"); // wrapper to enhance calling parser from Python -void call_parser(py::dict& msg, std::function func) { +void call_parser(py::dict& msg, std::function func) { auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg); auto msg_cpp = JsonContainer(msg_anymap); @@ -194,7 +194,7 @@ void init_parsers(py::module_& m) { .def(py::init<>()) .def("parse", [](ParserBase& self, py::dict& msg) { - return call_parser(msg, [&self](JsonContainer& m) {return self.parse(m);}); + return call_parser(msg, [&self](JsonContainer& m) {self.parse(m);}); }, py::arg("text"), "Parse is called with the full text. Returns a dict with parsed content."); @@ -203,7 +203,7 @@ void init_parsers(py::module_& m) { .def(py::init<>()) .def("parse", [](Llama32JsonToolParser& self, py::dict& msg) { - return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); + return call_parser(msg, [&self](JsonContainer& m) { self.parse(m); }); }, py::arg("text"), "Parse is called with the full text. Returns a dict with parsed content."); @@ -212,7 +212,7 @@ void init_parsers(py::module_& m) { .def(py::init<>()) .def("parse", [](Llama32PythonicToolParser& self, py::dict& msg) { - return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); }); + return call_parser(msg, [&self](JsonContainer& m) { self.parse(m); }); }, py::arg("text"), "Parse is called with the full text. Returns a dict with parsed content."); From e4ac07966f8b01afc06eea1783328af83c639d1e Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 12:49:35 +0200 Subject: [PATCH 19/43] pass openv/close tag strings by reference instead of value --- src/cpp/include/openvino/genai/parsers.hpp | 10 +++++++--- src/cpp/src/parsers.cpp | 18 +++++++++--------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 4837f93289..45427baccc 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -31,8 +31,8 @@ class ReasoningParser : public IncrementalParserBase { public: ReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, - std::string open_tag="", - std::string close_tag=""); + const std::string& open_tag = "", + const std::string& close_tag = ""); std::string parse( JsonContainer& msg, @@ -81,7 +81,11 @@ class Llama32JsonToolParser : public ParserBase { class BaseReasoningParser : public ParserBase{ public: - BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "", std::string close_tag = ""); + BaseReasoningParser( + bool expect_open_tag = true, + bool keep_original_content = true, + const std::string& open_tag = "", + const std::string& close_tag = ""); void parse(JsonContainer& input) override; private: class BaseReasoningParserImpl; diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 66976339c5..6006a31181 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -26,12 +26,12 @@ class ReasoningParser::ReasoningParserImpl { ReasoningParserImpl() = default; ReasoningParserImpl(bool expect_open_tag, bool keep_original_content, - std::string open_tag, - std::string close_tag) + const std::string& open_tag, + const std::string& close_tag) : m_expect_open_tag(expect_open_tag), m_keep_original_content(keep_original_content), - m_open_tag(std::move(open_tag)), - m_close_tag(std::move(close_tag)) {} + m_open_tag(open_tag), + m_close_tag(close_tag) {} std::string parse( JsonContainer& msg, @@ -153,8 +153,8 @@ class ReasoningParser::ReasoningParserImpl { } }; -ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, std::string open_tag, std::string close_tag) { - m_impl = std::make_shared(expect_open_tag, keep_original_content, std::move(open_tag), std::move(close_tag)); +ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) { + m_impl = std::make_shared(expect_open_tag, keep_original_content, open_tag, close_tag); } std::string ReasoningParser::parse( @@ -257,8 +257,8 @@ class BaseReasoningParser::BaseReasoningParserImpl { public: BaseReasoningParserImpl(bool expect_open_tag, bool keep_original_content, - std::string open_tag, - std::string close_tag): + const std::string& open_tag, + const std::string& close_tag): m_expect_open_tag(expect_open_tag), m_keep_original_content(keep_original_content), m_open_tag(open_tag), @@ -290,7 +290,7 @@ class BaseReasoningParser::BaseReasoningParserImpl { std::string m_close_tag; }; -BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, std::string open_tag, std::string close_tag) { +BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) { m_impl = std::make_shared(expect_open_tag, keep_original_content, open_tag, close_tag); } From ca732bafe7ddf6491d18a49c0291b492fceb4f34 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 17 Oct 2025 13:10:12 +0200 Subject: [PATCH 20/43] remvoe breakpoint() --- tests/python_tests/test_parsers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 8a7117ad2c..56328760e1 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -141,7 +141,6 @@ def write(self, message): return StreamingStatus.RUNNING streamer = TextParserStreamer(genai_tokenizer, parsers=[DeepSeekR1ReasoningParser()]) - breakpoint() msg = {} stream_string = [ From 32356cb9b7eda55d984f957485352bd75055e3c0 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Mon, 20 Oct 2025 11:59:15 +0200 Subject: [PATCH 21/43] use new JsonContainer to py::dict conversion approach --- src/cpp/include/openvino/genai/parsers.hpp | 4 + src/cpp/src/parsers.cpp | 10 +- src/python/openvino_genai/__init__.pyi | 2 +- .../openvino_genai/py_openvino_genai.pyi | 4 +- src/python/py_openvino_genai.cpp | 9 +- src/python/py_parsers.cpp | 92 ++++++++-------- src/python/py_streamers.cpp | 17 +-- tests/cpp/CMakeLists.txt | 2 +- tests/cpp/parser.cpp | 101 ++++++++---------- tests/python_tests/test_parsers.py | 3 +- 10 files changed, 108 insertions(+), 136 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 45427baccc..0f71c5641b 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -22,6 +22,8 @@ class IncrementalParserBase { const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) = 0; + + virtual ~IncrementalParserBase() = default; }; class ReasoningParser : public IncrementalParserBase { @@ -33,6 +35,7 @@ class ReasoningParser : public IncrementalParserBase { bool keep_original_content = true, const std::string& open_tag = "", const std::string& close_tag = ""); + virtual ~ReasoningParser() = default; std::string parse( JsonContainer& msg, @@ -56,6 +59,7 @@ class Phi4ReasoningParser : public ReasoningParser { class ParserBase { public: ParserBase() = default; + virtual ~ParserBase() = default; virtual void parse(JsonContainer& text) = 0; }; diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 6006a31181..f73dd82d04 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -5,9 +5,6 @@ #include #include #include -#include - -using json = nlohmann::json; namespace ov::genai { @@ -24,6 +21,7 @@ class ReasoningParser::ReasoningParserImpl { public: bool m_deactivated = false; ReasoningParserImpl() = default; + ReasoningParserImpl(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, @@ -91,7 +89,6 @@ class ReasoningParser::ReasoningParserImpl { auto close_idx = txt_chunk.find(m_close_tag); reason_str += txt_chunk.substr(0, close_idx); - // content_str += txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size())); if (!m_keep_original_content) { // Cut from the txt_chunk which is before and leave only what is after . // Example if m_text_cache + delta_text = "...some textAnswer is 3" = "...some textAnswer is 3" @@ -187,14 +184,10 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl { // Strip outer [ ] std::string call = m.str().substr(1, m.str().size() - 2); - // Split function name and arguments - input["tool_calls"] = JsonContainer::array(); - size_t pos = call.find('('); std::string name = call.substr(0, pos); std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...) - JsonContainer kv; // Parse arguments of the form key='value' std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")"); @@ -203,6 +196,7 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl { kv[std::string((*it)[1])] = std::string((*it)[2]); } + // Split function name and arguments input["tool_calls"] = JsonContainer::array(); input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}})); diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index dfbe188abc..f8fd25d6a8 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -72,5 +72,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 7687174233..8741d691b6 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import collections.abc import openvino._pyopenvino import typing -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -566,7 +566,7 @@ class DecodedResults: def extended_perf_metrics(self) -> ExtendedPerfMetrics: ... @property - def parsed(self) -> dict: + def parsed(self) -> list: ... @property def perf_metrics(self) -> PerfMetrics: diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp index cfa8d94cd0..ed010a7581 100644 --- a/src/python/py_openvino_genai.cpp +++ b/src/python/py_openvino_genai.cpp @@ -94,15 +94,10 @@ PYBIND11_MODULE(py_openvino_genai, m) { .def(py::init<>()) .def_property_readonly("texts", [](const DecodedResults &dr) -> py::typing::List { return pyutils::handle_utf8((std::vector)dr); }) .def_readonly("scores", &DecodedResults::scores) - .def_property_readonly("parsed", [](const DecodedResults& dr) -> py::dict { - static py::object json_mod = py::module_::import("json"); + .def_property_readonly("parsed", [](const DecodedResults& dr) -> py::list { py::list result_dicts; - for (const auto& parsed: dr.parsed) { - auto json_str = parsed.to_json_string(); - py::dict json_dict = json_mod.attr("loads")(json_str); - - result_dicts.append(json_dict); + result_dicts.append(pyutils::json_container_to_py_object(parsed)); } return result_dicts; }) diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 39d83beb2d..a5a4a5855c 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -67,11 +67,11 @@ static py::object json_mod = py::module_::import("json"); // wrapper to enhance calling parser from Python void call_parser(py::dict& msg, std::function func) { - auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg); - auto msg_cpp = JsonContainer(msg_anymap); - + auto msg_cpp = pyutils::py_object_to_json_container(msg); func(msg_cpp); + // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, + // since it create a new object instead of updating existing dict. auto json_str = msg_cpp.to_json_string(); py::dict result = json_mod.attr("loads")(json_str); @@ -92,14 +92,14 @@ std::string call_incremental_parser( const std::optional>& delta_tokens, std::function>&, const std::optional>&)> func) { - auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg); - auto msg_cpp = JsonContainer(msg_anymap); + auto msg_cpp = pyutils::py_object_to_json_container(msg); auto res = func(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens); - auto json_str = msg_cpp.to_json_string(); - py::dict result = json_mod.attr("loads")(json_str); + // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, + // since it create a new object instead of updating existing dict. + py::dict result = json_mod.attr("loads")(json_str); // update msg with result msg.clear(); for (auto item : result) { @@ -120,20 +120,19 @@ void init_parsers(py::module_& m) { std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt) { - // TODO: optimize conversion between py::dict and JsonContainer - auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg); - auto msg_cpp = JsonContainer(msg_anymap); - - - auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens); - msg.clear(); - - auto json_obj = msg_cpp.to_json(); - for (auto it = json_obj.begin(); it != json_obj.end(); ++it) { - msg[py::cast(it.key())] = py::cast(it.value()); - } - - return res; + return call_incremental_parser( + self, + msg, + previous_text, + delta_text, + previous_tokens, + delta_tokens, + [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, + const std::optional>& prev_tokens, + const std::optional>& delta_toks) { + return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); + } + ); }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output."); @@ -147,18 +146,18 @@ void init_parsers(py::module_& m) { std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt) { - return call_incremental_parser( - self, - msg, - previous_text, - delta_text, - previous_tokens, - delta_tokens, - [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, - const std::optional>& prev_tokens, - const std::optional>& delta_toks) { - return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); - }); + return call_incremental_parser( + self, + msg, + previous_text, + delta_text, + previous_tokens, + delta_tokens, + [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, + const std::optional>& prev_tokens, + const std::optional>& delta_toks) { + return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); + }); }, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.", py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), @@ -173,19 +172,20 @@ void init_parsers(py::module_& m) { std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt) { - return call_incremental_parser( - self, - msg, - previous_text, - delta_text, - previous_tokens, - delta_tokens, - [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, - const std::optional>& prev_tokens, - const std::optional>& delta_toks) { - return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); - }); - }, + return call_incremental_parser( + self, + msg, + previous_text, + delta_text, + previous_tokens, + delta_tokens, + [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, + const std::optional>& prev_tokens, + const std::optional>& delta_toks) { + return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); + } + ); + }, "Parse is called with the full text. Returns a dict with parsed content.", py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt); diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 47ac549f70..29bc7296ac 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -77,16 +77,11 @@ class ConstructableTextParserStreamer: public TextParserStreamer { StreamingStatus write(JsonContainer& message) override { py::dict message_py; - auto json_obj = message.to_json(); - for (auto it = json_obj.begin(); it != json_obj.end(); ++it) { - message_py[py::cast(it.key())] = py::cast(it.value().get()); - } + message_py = pyutils::json_container_to_py_object(message); // call python implementation which accepts py::dict instead of JsonContainer auto res = py::get_override(this, "write")(message_py); - - auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(message_py); - message = JsonContainer(msg_anymap); + message = pyutils::py_object_to_json_container(message_py); return res.cast(); } @@ -174,13 +169,7 @@ void init_streamers(py::module_& m) { .def("get_parsed_message", [](TextParserStreamer& self) { - static py::object json_mod = py::module_::import("json"); - - auto res = self.get_parsed_message(); - auto json_str = res.to_json_string(); - py::dict json_dict = json_mod.attr("loads")(json_str); - - return json_dict; + return pyutils::json_container_to_py_object(self.get_parsed_message()); }, "Get the current parsed message"); } diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index f708e00e55..bdf959eb5d 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -26,7 +26,7 @@ set(TEST_TARGET_NAME "tests_continuous_batching") add_executable(${TEST_TARGET_NAME} ${tests_src} $) -target_link_libraries(${TEST_TARGET_NAME} PRIVATE $ gtest_main gmock_main nlohmann_json::nlohmann_json) +target_link_libraries(${TEST_TARGET_NAME} PRIVATE $ gtest_main gmock_main) target_include_directories(${TEST_TARGET_NAME} PRIVATE "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src" $) diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index 1e56fef042..0cd2d602ba 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -9,94 +9,86 @@ using namespace ov::genai; -nlohmann::json run_parser_test(std::shared_ptr parser, const std::string& prompt) { - JsonContainer input; - input["content"] = prompt; - parser->parse(input); - return input.to_json(); -} - - TEST(ParserTest, test_llama32_parser_1) { std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)"; - nlohmann::json expected; - // By default content should keep original values. - expected["content"] = prompt; - expected["tool_calls"] = nlohmann::json::array({ - { - {"name", "get_weather"}, - {"arguments", { - {"location", "New York, NY"}, - {"unit", "celsius"} - }} - } - }); + JsonContainer expected; + expected["content"] = prompt; + expected["tool_calls"] = JsonContainer::array(); + expected["tool_calls"].push_back(JsonContainer({ + {"name", "get_weather"}, + {"arguments", JsonContainer{ + {"location", "New York, NY"}, + {"unit", "celsius"} + }} + })); + + std::shared_ptr parser = std::make_shared(); + JsonContainer input; + input["content"] = prompt; + parser->parse(input); - nlohmann::json res = run_parser_test(parser, prompt); - - ASSERT_EQ(res, expected); + ASSERT_TRUE(expected == input); } TEST(ParserTest, test_llama32_parser_2) { std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)"; - nlohmann::json expected; - // In this test tool calling part will be cut from the content after parsing. + JsonContainer expected; expected["content"] = std::string(R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n<|eom_id|>)"); - - expected["tool_calls"] = nlohmann::json::array({ - { - {"name", "get_weather"}, - {"arguments", { - {"location", "New York, NY"}, - {"unit", "celsius"} - }} - } - }); - auto parser = std::make_shared(/*keep_original_content*/ false); - - nlohmann::json res = run_parser_test(parser, prompt); - - ASSERT_EQ(res, expected); + expected["tool_calls"] = JsonContainer::array(); + expected["tool_calls"].push_back(JsonContainer(ov::AnyMap({ + {"name", "get_weather"}, + {"arguments", ov::AnyMap{ + {"location", "New York, NY"}, + {"unit", "celsius"} + }} + }))); + + std::shared_ptr parser = std::make_shared(/*keep_original_content*/ false); + JsonContainer input; + input["content"] = prompt; + parser->parse(input); + + ASSERT_EQ(input, expected); } TEST(ParserTest, test_reasoning_parser_1) { std::string prompt = R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"; - nlohmann::json expected; - // In this test reasoning part will be cut from the content after parsing. + JsonContainer expected; expected["content"] = std::string(R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"); - expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"); - auto parser = std::make_shared( + + std::shared_ptr parser = std::make_shared( /*expect_open_tag*/ true, /*keep_original_content*/ false ); + JsonContainer input; + input["content"] = prompt; + parser->parse(input); - nlohmann::json res = run_parser_test(parser, prompt); - - ASSERT_EQ(res, expected); + ASSERT_EQ(input, expected); } TEST(ParserTest, test_reasoning_parser_2) { std::string prompt = R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"; - nlohmann::json expected; - // In this test content should keep original values. + JsonContainer expected; expected["content"] = prompt; - expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"); - auto parser = std::make_shared( + + std::shared_ptr parser = std::make_shared( /*expect_open_tag*/ true, /*keep_original_content*/ true ); + JsonContainer input; + input["content"] = prompt; + parser->parse(input); - nlohmann::json res = run_parser_test(parser, prompt); - - ASSERT_EQ(res, expected); + ASSERT_EQ(input, expected); } class DeepSeekR1ReasoningParserTest : public ::testing::Test { @@ -121,7 +113,6 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) { JsonContainer msg; - for (int i = 1; i < input_stream.size(); i++) { std::string previous_text = input_stream[i - 1]; std::string delta_text = input_stream[i]; diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 56328760e1..45f034374c 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -125,10 +125,9 @@ def test_final_parser_llama_32_json(hf_ov_genai_models): content_json = { "content": f"Calling weather API: {json_str}" } - + parser = Llama32JsonToolParser() parser.parse(content_json) - assert content_json['tool_calls'][0] == json.loads(json_str) From aefbd7b437acfbe5dee9bbb5e8027017c24941fa Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Mon, 20 Oct 2025 16:23:23 +0200 Subject: [PATCH 22/43] fix segfault; some other fixes --- src/python/py_parsers.cpp | 4 +++- src/python/py_streamers.cpp | 7 +++++-- tests/python_tests/test_parsers.py | 32 ++++++++++-------------------- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index a5a4a5855c..0904d913fc 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -63,7 +63,6 @@ class ConstructableParserBase: public ParserBase { } }; -static py::object json_mod = py::module_::import("json"); // wrapper to enhance calling parser from Python void call_parser(py::dict& msg, std::function func) { @@ -71,6 +70,8 @@ void call_parser(py::dict& msg, std::function func) { func(msg_cpp); // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, + py::object json_mod = py::module_::import("json"); + // since it create a new object instead of updating existing dict. auto json_str = msg_cpp.to_json_string(); py::dict result = json_mod.attr("loads")(json_str); @@ -99,6 +100,7 @@ std::string call_incremental_parser( // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, // since it create a new object instead of updating existing dict. + py::object json_mod = py::module_::import("json"); py::dict result = json_mod.attr("loads")(json_str); // update msg with result msg.clear(); diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 29bc7296ac..d34fc58a22 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -76,10 +76,13 @@ class ConstructableTextParserStreamer: public TextParserStreamer { using TextParserStreamer::TextParserStreamer; // inherit base constructors StreamingStatus write(JsonContainer& message) override { + // Since c++ calls function with JsonContainer while python override expects py::dict, + // this function is a wrapper to call Python implementation of 'write' with py::dict py::dict message_py; message_py = pyutils::json_container_to_py_object(message); - // call python implementation which accepts py::dict instead of JsonContainer + // Call python implementation which accepts py::dict instead of JsonContainer + // And convert back the resulting message back to JsonContainer auto res = py::get_override(this, "write")(message_py); message = pyutils::py_object_to_json_container(message_py); @@ -168,7 +171,7 @@ void init_streamers(py::module_& m) { "Write is called with a string message. Returns CallbackTypeVariant. This is a private method.") .def("get_parsed_message", - [](TextParserStreamer& self) { + [](TextParserStreamer& self) -> py::dict{ return pyutils::json_container_to_py_object(self.get_parsed_message()); }, "Get the current parsed message"); diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 45f034374c..5912cfd75c 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -9,38 +9,28 @@ import pytest from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser from transformers import AutoTokenizer -from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model +from utils.hugging_face import convert_and_save_tokenizer import re -import textwrap import json @pytest.fixture(scope="module") def hf_ov_genai_models(request, tmp_path_factory): - model_id, args = request.param - tok_load_properties = {"add_second_input": args.pop("add_second_input")} if "add_second_input" in args else {} - - hf_args = args.copy() # to overcome mutable default argument side effects - if "padding_side" in hf_args and hf_args["padding_side"] is None: - # HF does not accept None. - # Need to remove padding_side and let HF to choose default value, - hf_args.pop("padding_side") - else: - hf_args["truncation_side"] = hf_args["padding_side"] + model_id = request.param + model_dir = tmp_path_factory.getbasetemp() / model_id.replace("/", "_") model_dir.mkdir(exist_ok=True, parents=True) - hf_tokenizer = AutoTokenizer.from_pretrained(model_id, **hf_args) - convert_args = {"number_of_inputs": hf_args.pop("number_of_inputs")} if "number_of_inputs" in hf_args else {} - convert_and_save_tokenizer(hf_tokenizer, model_dir, **convert_args) + hf_tokenizer = AutoTokenizer.from_pretrained(model_id) + convert_and_save_tokenizer(hf_tokenizer, model_dir) - genai_tokenizer = Tokenizer(model_dir, tok_load_properties) + genai_tokenizer = Tokenizer(model_dir) return hf_tokenizer, genai_tokenizer @pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", - [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], # this tokenizer is used as a stub only + ["katuni4ka/tiny-random-phi3"], # this tokenizer is used as a stub only indirect=True ) @pytest.mark.parametrize("answer", [ @@ -76,10 +66,11 @@ def write(self, message): assert msg['reasoning_content'] == think_content assert msg['content'] == content + @pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", - [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], # this tokenizer is used as a stub only + ["katuni4ka/tiny-random-phi3"], indirect=True ) @pytest.mark.parametrize("split_answer", [ @@ -111,11 +102,10 @@ def write(self, message): assert msg['content'] == content - @pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", - [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})], + ["katuni4ka/tiny-random-phi3"], indirect=True ) def test_final_parser_llama_32_json(hf_ov_genai_models): @@ -125,7 +115,7 @@ def test_final_parser_llama_32_json(hf_ov_genai_models): content_json = { "content": f"Calling weather API: {json_str}" } - + parser = Llama32JsonToolParser() parser.parse(content_json) assert content_json['tool_calls'][0] == json.loads(json_str) From 9c0422c7e9c63771ed405f46914a47ac043f48ca Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Mon, 20 Oct 2025 18:14:05 +0200 Subject: [PATCH 23/43] add export symbols --- src/cpp/include/openvino/genai/parsers.hpp | 14 +++++++------- src/cpp/include/openvino/genai/text_streamer.hpp | 2 +- tests/python_tests/test_parsers.py | 2 +- tests/python_tests/test_text_streamer.py | 14 -------------- 4 files changed, 9 insertions(+), 23 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 0f71c5641b..dabb46a9e6 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -10,7 +10,7 @@ namespace ov { namespace genai { -class IncrementalParserBase { +class OPENVINO_GENAI_EXPORTS IncrementalParserBase { public: IncrementalParserBase() = default; @@ -26,7 +26,7 @@ class IncrementalParserBase { virtual ~IncrementalParserBase() = default; }; -class ReasoningParser : public IncrementalParserBase { +class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase { private: class ReasoningParserImpl; std::shared_ptr m_impl; @@ -46,12 +46,12 @@ class ReasoningParser : public IncrementalParserBase { ) override; }; -class DeepSeekR1ReasoningParser : public ReasoningParser { +class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningParser : public ReasoningParser { public: explicit DeepSeekR1ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {}; }; -class Phi4ReasoningParser : public ReasoningParser { +class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser { public: explicit Phi4ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {}; }; @@ -63,7 +63,7 @@ class ParserBase { virtual void parse(JsonContainer& text) = 0; }; -class Llama32PythonicToolParser : public ParserBase { +class OPENVINO_GENAI_EXPORTS Llama32PythonicToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: explicit Llama32PythonicToolParser(bool keep_original_content = true); @@ -73,7 +73,7 @@ class Llama32PythonicToolParser : public ParserBase { std::shared_ptr m_impl; }; -class Llama32JsonToolParser : public ParserBase { +class OPENVINO_GENAI_EXPORTS Llama32JsonToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: explicit Llama32JsonToolParser(bool keep_original_content = true); @@ -83,7 +83,7 @@ class Llama32JsonToolParser : public ParserBase { std::shared_ptr m_impl; }; -class BaseReasoningParser : public ParserBase{ +class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{ public: BaseReasoningParser( bool expect_open_tag = true, diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 5b6fd6d16e..816427c985 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -47,7 +47,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase { void compute_decoded_length_for_position(size_t cache_position); }; -class TextParserStreamer : public TextStreamer { +class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer { public: TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers = {}); diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 5912cfd75c..fd0407b31f 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -11,7 +11,7 @@ from transformers import AutoTokenizer from utils.hugging_face import convert_and_save_tokenizer import re -import json + @pytest.fixture(scope="module") def hf_ov_genai_models(request, tmp_path_factory): diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py index 4790ab4b3d..a3ea55d225 100644 --- a/tests/python_tests/test_text_streamer.py +++ b/tests/python_tests/test_text_streamer.py @@ -71,20 +71,6 @@ def test_text_prompts(tmp_path, prompt, model_id): for token in tokens: streamer.write(token) streamer.end() - - class CurrentStreamer(BaseStreamer): - def write(self, token_chunk): - pass - - class CurrentParsingStreamer(TextParserStreamer): - def write(self, word: str): - msg: JsonContainer = get_current_message() - - - streamer = lambda x: print(x) - - streamer = TextStreamer(ov_tokenizer, lambda x: print(x)) - assert ''.join(accumulated) == ov_tokenizer.decode(tokens) From ac9dd8c3995097f68dc9b86bc5b52c1ef852ed6f Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Tue, 21 Oct 2025 18:41:10 +0200 Subject: [PATCH 24/43] add more tests use unique_ptr --- src/cpp/include/openvino/genai/parsers.hpp | 15 +-- src/cpp/src/llm/pipeline.cpp | 18 ++-- src/cpp/src/parsers.cpp | 21 ++-- src/cpp/src/text_streamer.cpp | 1 - src/python/py_parsers.cpp | 77 ++++++++++++-- src/python/py_streamers.cpp | 1 + tests/cpp/parser.cpp | 118 ++++++++++++++++++++- tests/python_tests/test_parsers.py | 118 ++++++++++++++++++++- 8 files changed, 329 insertions(+), 40 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index dabb46a9e6..4af6fb2cca 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -29,13 +29,13 @@ class OPENVINO_GENAI_EXPORTS IncrementalParserBase { class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase { private: class ReasoningParserImpl; - std::shared_ptr m_impl; + std::unique_ptr m_impl; public: ReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, const std::string& open_tag = "", const std::string& close_tag = ""); - virtual ~ReasoningParser() = default; + virtual ~ReasoningParser(); std::string parse( JsonContainer& msg, @@ -59,7 +59,7 @@ class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser { class ParserBase { public: ParserBase() = default; - virtual ~ParserBase() = default; + virtual ~ParserBase(); virtual void parse(JsonContainer& text) = 0; }; @@ -67,20 +67,22 @@ class OPENVINO_GENAI_EXPORTS Llama32PythonicToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: explicit Llama32PythonicToolParser(bool keep_original_content = true); + ~Llama32PythonicToolParser(); void parse(JsonContainer& input) override; private: class Llama32PythonicToolParserImpl; - std::shared_ptr m_impl; + std::unique_ptr m_impl; }; class OPENVINO_GENAI_EXPORTS Llama32JsonToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: explicit Llama32JsonToolParser(bool keep_original_content = true); + ~Llama32JsonToolParser(); void parse(JsonContainer& input) override; private: class Llama32JsonToolParserImpl; - std::shared_ptr m_impl; + std::unique_ptr m_impl; }; class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{ @@ -91,9 +93,10 @@ class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{ const std::string& open_tag = "", const std::string& close_tag = ""); void parse(JsonContainer& input) override; + ~BaseReasoningParser(); private: class BaseReasoningParserImpl; - std::shared_ptr m_impl; + std::unique_ptr m_impl; }; } // namespace genai diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index c6d4772453..9f988dbb59 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -253,27 +253,27 @@ DecodedResults LLMPipeline::generate( auto res = m_pimpl->generate(inputs, generation_config, streamer); // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message + // Streaming is available only for batch size 1 therefore only parsed[0] if (auto streamer_obj = std::get_if>(&streamer)) { if (auto parser_streamer = std::dynamic_pointer_cast(*streamer_obj)) { - res.parsed.resize(res.texts.size()); + res.parsed.resize(1); res.parsed[0] = parser_streamer->get_parsed_message(); } } - - if (!generation_config.has_value() || (*generation_config).parsers.empty()) { - return res; - } - if (!generation_config.has_value() || (*generation_config).parsers.empty()) { + if (!generation_config.has_value() || generation_config->parsers.empty()) { return res; } - + std::vector> parsers = (*generation_config).parsers; res.parsed.resize(res.texts.size()); // Apply Base parsers sequentially even if IncrementalParser has run. for (size_t i = 0; i < res.texts.size(); ++i) { - JsonContainer msg; - msg["content"] = res.texts[i]; + auto& msg = res.parsed[i]; + if (!msg.contains("content")) { + // Initialize msg with content + msg["content"] = res.texts[i]; + } for (auto& parser: parsers) { // TODO: Check the state of incremental parser and reset if necessary parser->parse(msg); diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index f73dd82d04..0a8670dbc4 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -17,7 +17,6 @@ class ReasoningParser::ReasoningParserImpl { std::string m_open_tag; std::string m_close_tag; std::string m_text_cache = ""; - std::map accumulated_parsed; public: bool m_deactivated = false; ReasoningParserImpl() = default; @@ -59,8 +58,6 @@ class ReasoningParser::ReasoningParserImpl { auto content_str = msg["content"].get_string(); if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_expect_open_tag) { - OPENVINO_ASSERT(m_open_tag.find(m_text_cache) != std::string::npos, "m_text_cache should be a prefix of m_open_tag"); - // Thinking has started auto open_idx = txt_chunk.find(m_open_tag); reason_str += txt_chunk.substr(open_idx + std::string(m_open_tag).size(), txt_chunk.size() - (open_idx + std::string(m_open_tag).size())); @@ -151,9 +148,11 @@ class ReasoningParser::ReasoningParserImpl { }; ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) { - m_impl = std::make_shared(expect_open_tag, keep_original_content, open_tag, close_tag); + m_impl = std::make_unique(expect_open_tag, keep_original_content, open_tag, close_tag); } +ReasoningParser::~ReasoningParser() = default; + std::string ReasoningParser::parse( JsonContainer& msg, const std::string& previous_text, @@ -207,13 +206,15 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl { }; Llama32PythonicToolParser::Llama32PythonicToolParser(bool keep_original_content) { - m_impl = std::make_shared(keep_original_content); + m_impl = std::make_unique(keep_original_content); } void Llama32PythonicToolParser::parse(JsonContainer& input) { m_impl->parse(input); } +Llama32PythonicToolParser::~Llama32PythonicToolParser() = default; + class Llama32JsonToolParser::Llama32JsonToolParserImpl { private: bool m_keep_original_content; @@ -240,13 +241,15 @@ class Llama32JsonToolParser::Llama32JsonToolParserImpl { }; Llama32JsonToolParser::Llama32JsonToolParser(bool keep_original_content) { - m_impl = std::make_shared(keep_original_content); + m_impl = std::make_unique(keep_original_content); } void Llama32JsonToolParser::parse(JsonContainer& input) { m_impl->parse(input); } +Llama32JsonToolParser::~Llama32JsonToolParser() = default; + class BaseReasoningParser::BaseReasoningParserImpl { public: BaseReasoningParserImpl(bool expect_open_tag, @@ -285,11 +288,15 @@ class BaseReasoningParser::BaseReasoningParserImpl { }; BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) { - m_impl = std::make_shared(expect_open_tag, keep_original_content, open_tag, close_tag); + m_impl = std::make_unique(expect_open_tag, keep_original_content, open_tag, close_tag); } void BaseReasoningParser::parse(JsonContainer& input) { m_impl->parse(input); } +BaseReasoningParser::~BaseReasoningParser() = default; + +ParserBase::~ParserBase() = default; + } // namespace ov::genai diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index 7738c455e0..9b83e0c60c 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -133,7 +133,6 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) { for (auto& parser: m_parsers) { message = parser->parse(m_parsed_message, m_text_buffer, message); // Message can be modified inside parser, if parser for example extracted tool calling from message content - // but parser m_parsed_message["content"] = m_parsed_message["content"].get_string() + message; } diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 0904d913fc..d853f507ef 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -31,6 +31,7 @@ namespace { class ConstructableIncrementalParserBase: public IncrementalParserBase { public: + using IncrementalParserBase::IncrementalParserBase; std::string parse( JsonContainer& msg, const std::string& previous_text, @@ -38,10 +39,49 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase { const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) override { + // Convert JsonContainer to py::dict + py::dict py_msg = pyutils::json_container_to_py_object(msg); + + py::function parse_method = py::get_override(static_cast(this), "parse"); + if (!parse_method) { + throw std::runtime_error("parse method not implemented in Python subclass"); + } + + auto res = parse_method( + py_msg, + previous_text, + delta_text, + previous_tokens, + delta_tokens + ); + + // iterate throught py_msg and update msg + msg.clear(); + auto msg_anymap = pyutils::py_object_to_any_map(py_msg); + for (const auto& [key, value] : msg_anymap) { + if (value.is()) { + msg[key] = value.as(); + } else if (value.is()) { + msg[key] = JsonContainer(value.as()); + } else { + OPENVINO_THROW("Unsupported type in JsonContainer update from Python dict"); + } + } + return res.cast(); + } + + // This method should be overridden in Python + std::string parse( + py::dict& msg, + const std::string& previous_text, + std::string& delta_text, + const std::optional>& previous_tokens = std::nullopt, + const std::optional>& delta_tokens = std::nullopt + ) { PYBIND11_OVERRIDE_PURE( - std::string, // Return type - IncrementalParserBase, // Parent class - parse, // Name of function in C++ (must match Python name) + std::string, + IncrementalParserBase, + "parse", msg, previous_text, delta_text, @@ -53,13 +93,30 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase { class ConstructableParserBase: public ParserBase { public: - void parse(JsonContainer& text) override { - PYBIND11_OVERRIDE_PURE( - void, // Return type - ParserBase, // Parent class - parse, // Name of function in C++ (must match Python name) - text // Argument(s) - ); + void parse(JsonContainer& msg) override { + py::gil_scoped_acquire acquire; + + py::function parse_method = py::get_override(static_cast(this), "parse"); + if (!parse_method) { + throw std::runtime_error("parse method not implemented in Python subclass"); + } + + // Convert JsonContainer to py::dict + py::dict py_msg = pyutils::json_container_to_py_object(msg); + parse_method(py_msg); + + // iterate throught py_msg and update msg + msg.clear(); + auto msg_anymap = pyutils::py_object_to_any_map(py_msg); + for (const auto& [key, value] : msg_anymap) { + if (value.is()) { + msg[key] = value.as(); + } else if (value.is()) { + msg[key] = JsonContainer(value.as()); + } else { + OPENVINO_THROW("Unsupported type in JsonContainer update from Python dict"); + } + } } }; diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index d34fc58a22..d6660fcd17 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -153,6 +153,7 @@ void init_streamers(py::module_& m) { }), py::arg("tokenizer"), py::arg("parsers") = std::vector>(), + py::keep_alive<1, 3>(), "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.") .def("write", [](TextParserStreamer& self, py::dict& message) { diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index 0cd2d602ba..a74a12f499 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -4,7 +4,7 @@ #include #include "openvino/genai/generation_config.hpp" #include "openvino/genai/parsers.hpp" -#include "nlohmann/json.hpp" +#include "openvino/genai/text_streamer.hpp" #include "openvino/genai/llm_pipeline.hpp" using namespace ov::genai; @@ -91,6 +91,8 @@ TEST(ParserTest, test_reasoning_parser_2) { ASSERT_EQ(input, expected); } + + class DeepSeekR1ReasoningParserTest : public ::testing::Test { protected: ov::genai::DeepSeekR1ReasoningParser parser; @@ -121,4 +123,116 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) { ASSERT_EQ(msg["reasoning_content"], ref_res); } -// TODO: add tests when streamer is called directly instead of manual subsequent calling of parsers. +TEST(ParserTest, test_custom_parser) { + // Define a small custom parser derived from ParserBase + class CustomParser : public ov::genai::ParserBase { + public: + void parse(ov::genai::JsonContainer& msg) override { + // extract "content" + if (!msg.contains("content")) + return; + + auto content_opt = msg["content"].as_string(); + if (!content_opt.has_value()) + return; + + const std::string& content = content_opt.value(); + + // find text between and + std::size_t start = content.find(""); + std::size_t end = content.find(""); + if (start != std::string::npos && end != std::string::npos && end > start) { + std::string think_text = content.substr(start + 7, end - (start + 7)); + // trim leading/trailing whitespace + auto l = think_text.find_first_not_of(" \n\r\t"); + auto r = think_text.find_last_not_of(" \n\r\t"); + if (l != std::string::npos && r != std::string::npos) + think_text = think_text.substr(l, r - l + 1); + msg["reasoning_content"] = think_text; + } + } + }; + + CustomParser parser; + + ov::genai::JsonContainer msg; + msg["content"] = "This is reasoning. And this is the answer"; + + parser.parse(msg); + + ASSERT_TRUE(msg.contains("reasoning_content")); + ASSERT_EQ(msg["reasoning_content"].get_string(), "This is reasoning."); +} + +TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) { + using namespace ov::genai; + + // Custom incremental parser: mirrors the Python logic + class CustomParser : public IncrementalParserBase { + public: + bool main_part_started = false; + + std::string parse(JsonContainer& msg, + const std::string& previous_text, + std::string& delta_text, + const std::optional>& /*previous_tokens*/ = std::nullopt, + const std::optional>& /*delta_tokens*/ = std::nullopt) override { + // Ensure fields exist (Python test used dict defaults) + if (!msg.contains("content")) { + msg.to_empty_object(); + msg["content"] = std::string{}; + } + if (!msg.contains("reasoning_content")) { + msg["reasoning_content"] = std::string{}; + } + + if (!main_part_started && delta_text == "") { + main_part_started = true; + } else if (main_part_started && delta_text == "") { + main_part_started = false; + } else { + if (main_part_started) { + // Append delta into reasoning_content + auto cur = msg["reasoning_content"].as_string().value_or(""); + cur += delta_text; + msg["reasoning_content"] = cur; + } + } + // Return delta_text (same as Python) + return delta_text; + } + + // Virtual dtor for safety + ~CustomParser() override = default; + }; + + class CustomStreamer : public ov::genai::TextParserStreamer { + public: + using TextParserStreamer::write; + // Forwarding constructor to base class + CustomStreamer(ov::genai::Tokenizer& tok, const std::vector>& parsers) + : ov::genai::TextParserStreamer(tok, parsers) {} + + JsonContainer final_msg; + StreamingStatus write(JsonContainer& message) override { + final_msg = message; + return StreamingStatus::RUNNING; + } + }; + + Tokenizer tok; + std::shared_ptr parser = std::make_shared(); + CustomStreamer streamer(tok, {parser}); + + + // Same stream as in the Python example + std::vector stream_string = {"", " ", "world", " ", "", "!"}; + + for (size_t i = 0; i < stream_string.size(); ++i) { + streamer.write(stream_string[i]); + } + + JsonContainer msg = streamer.get_parsed_message(); + ASSERT_TRUE(msg.contains("reasoning_content")); + ASSERT_EQ(msg["reasoning_content"].get_string(), " world "); +} diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index fd0407b31f..6535a1130c 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -3,13 +3,14 @@ import dataclasses import json from typing import Optional - +from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model +from utils.ov_genai_pipelines import create_ov_pipeline +from utils.network import retry_request import numpy as np import openvino import pytest -from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser +from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig from transformers import AutoTokenizer -from utils.hugging_face import convert_and_save_tokenizer import re @@ -121,6 +122,112 @@ def test_final_parser_llama_32_json(hf_ov_genai_models): assert content_json['tool_calls'][0] == json.loads(json_str) +@pytest.mark.precommit +@pytest.mark.parametrize( + "hf_ov_genai_models", + ["katuni4ka/tiny-random-phi3"], + indirect=True +) +def test_custom_streamer_parser(hf_ov_genai_models): + hf_tokenizer, genai_tokenizer = hf_ov_genai_models + + class CustomParser(IncrementalParserBase): + main_part_started: bool = False + + def parse(self, msg: dict, previous_text: str, delta_text: str, prev_tokens = None, delta_tokens = None) -> str: + if 'content' not in msg: + msg['content'] = '' + if 'main_text' not in msg: + msg['main_text'] = '' + + if not self.main_part_started and delta_text == '': + self.main_part_started = True + elif self.main_part_started and delta_text == '': + self.main_part_started = False + else: + if self.main_part_started: + msg['main_text'] += delta_text + + return delta_text + + msg = {} + class CustomStreamer(TextParserStreamer): + def write(self, message): + msg.update(message) + return StreamingStatus.RUNNING + + streamer = CustomStreamer(genai_tokenizer, parsers=[CustomParser()]) + + stream_string = ["Hello", "", " ", "world", " ", "", "!"] + + for subword in stream_string: + streamer._write(subword) + + assert msg['main_text'] == ''.join(" world ") + +# @pytest.mark.precommit +# @pytest.mark.parametrize( +# "hf_ov_genai_models", +# ["microsoft/Phi-4-mini-reasoning"], +# # ["katuni4ka/tiny-random-phi3"], +# indirect=True +# ) +# def test_custom_parser_(hf_ov_genai_models): + + +# msg = { +# "content": "This is reasoning. And this is the answer" +# } +# parser.parse(msg) + +# assert msg['reasoning_content'] == "This is reasoning." + +@pytest.mark.parametrize("model_id", ["microsoft/Phi-4-mini-reasoning"]) +@pytest.mark.nightly +def test_custom_parser(tmp_path, model_id): + _, _, models_path = download_and_convert_model(model_id, padding_side="left") + pipe = create_ov_pipeline(models_path) + tok = pipe.get_tokenizer() + + class CustomParser(ParserBase): + def parse(self, msg: dict): + content = None + if 'content' in msg: + content = msg['content'] + if not content: + return + + # find text between and + think_start = content.find("") + think_end = content.find("") + if think_start != -1 and think_end != -1 and think_end > think_start: + think_text = content[think_start + len(""):think_end].strip() + msg['reasoning_content'] = think_text + + class CustomStreamer(TextParserStreamer): + def write(self, message): + # make whatever you want with message, but it will be accumulated and parsed by parser afterwards + # accumulated message can be found by get_parsed_message() + return StreamingStatus.RUNNING + + parser = CustomParser() + config = GenerationConfig() + config.max_new_tokens = 600 + config.parsers = [parser] + + res = pipe.generate(["Please say \"hello\""], generation_config=config) + + # extract manually reasoning content from the parsed result + content = res.texts[0] + think_start = content.find("") + think_end = content.find("") + if think_start != -1 and think_end != -1 and think_end > think_start: + think_text = content[think_start + len(""):think_end].strip() + + assert 'reasoning_content' in res.parsed[0] + assert res.parsed[0]['reasoning_content'] != "" + assert res.parsed[0]['reasoning_content'] == think_text + def test_parsers_2(hf_ov_genai_models): hf_tokenizer, genai_tokenizer = hf_ov_genai_models class CustomStreamer(TextParserStreamer): @@ -150,7 +257,7 @@ def write(self, message): parsers = streamer.get_parsers() extended = stream_string[:] - extended.append("") + extended.insert(0, "") for parser in parsers: for (prev_subword, subword) in zip(extended, stream_string): @@ -159,4 +266,5 @@ def write(self, message): assert msg['reasoning_content'] == think_content assert msg['content'] == content -# TODO: add tests when streamer is called directly instead of manual subsequent calling of parsers. +# TODO: add when streamer accepts integer tokens + From e4ff386971427d8666c64292acceccb6e9e0571b Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 Oct 2025 11:12:27 +0200 Subject: [PATCH 25/43] cleanup pybindings; rename Llama32 -> Llama3 --- src/cpp/include/openvino/genai/parsers.hpp | 22 +- src/cpp/src/parsers.cpp | 24 +-- src/python/openvino_genai/__init__.py | 4 +- src/python/openvino_genai/__init__.pyi | 6 +- .../openvino_genai/py_openvino_genai.pyi | 22 +- src/python/py_parsers.cpp | 192 ++++-------------- tests/cpp/parser.cpp | 8 +- tests/python_tests/test_parsers.py | 148 +++++++------- 8 files changed, 144 insertions(+), 282 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 4af6fb2cca..93b4b099a1 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -63,26 +63,26 @@ class ParserBase { virtual void parse(JsonContainer& text) = 0; }; -class OPENVINO_GENAI_EXPORTS Llama32PythonicToolParser : public ParserBase { +class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: - explicit Llama32PythonicToolParser(bool keep_original_content = true); - ~Llama32PythonicToolParser(); + explicit Llama3PythonicToolParser(bool keep_original_content = true); + ~Llama3PythonicToolParser(); void parse(JsonContainer& input) override; private: - class Llama32PythonicToolParserImpl; - std::unique_ptr m_impl; + class Llama3PythonicToolParserImpl; + std::unique_ptr m_impl; }; -class OPENVINO_GENAI_EXPORTS Llama32JsonToolParser : public ParserBase { +class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public ParserBase { // Does not modify original content, only extracts and adds tool calls public: - explicit Llama32JsonToolParser(bool keep_original_content = true); - ~Llama32JsonToolParser(); + explicit Llama3JsonToolParser(bool keep_original_content = true); + ~Llama3JsonToolParser(); void parse(JsonContainer& input) override; private: - class Llama32JsonToolParserImpl; - std::unique_ptr m_impl; + class Llama3JsonToolParserImpl; + std::unique_ptr m_impl; }; class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{ @@ -99,5 +99,7 @@ class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{ std::unique_ptr m_impl; }; +// TODO: DeepSeekR1ReasoningParser -> DeepSeekR1IncrementalParser + } // namespace genai } // namespace ov diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 0a8670dbc4..df3ffd6a31 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -163,9 +163,9 @@ std::string ReasoningParser::parse( return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens); } -class Llama32PythonicToolParser::Llama32PythonicToolParserImpl { +class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { public: - Llama32PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} + Llama3PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} bool m_keep_original_content; void parse(JsonContainer& input) { @@ -205,21 +205,21 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl { } }; -Llama32PythonicToolParser::Llama32PythonicToolParser(bool keep_original_content) { - m_impl = std::make_unique(keep_original_content); +Llama3PythonicToolParser::Llama3PythonicToolParser(bool keep_original_content) { + m_impl = std::make_unique(keep_original_content); } -void Llama32PythonicToolParser::parse(JsonContainer& input) { +void Llama3PythonicToolParser::parse(JsonContainer& input) { m_impl->parse(input); } -Llama32PythonicToolParser::~Llama32PythonicToolParser() = default; +Llama3PythonicToolParser::~Llama3PythonicToolParser() = default; -class Llama32JsonToolParser::Llama32JsonToolParserImpl { +class Llama3JsonToolParser::Llama3JsonToolParserImpl { private: bool m_keep_original_content; public: - Llama32JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} + Llama3JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} void parse(JsonContainer& message) { // Find JSON in the message @@ -240,15 +240,15 @@ class Llama32JsonToolParser::Llama32JsonToolParserImpl { } }; -Llama32JsonToolParser::Llama32JsonToolParser(bool keep_original_content) { - m_impl = std::make_unique(keep_original_content); +Llama3JsonToolParser::Llama3JsonToolParser(bool keep_original_content) { + m_impl = std::make_unique(keep_original_content); } -void Llama32JsonToolParser::parse(JsonContainer& input) { +void Llama3JsonToolParser::parse(JsonContainer& input) { m_impl->parse(input); } -Llama32JsonToolParser::~Llama32JsonToolParser() = default; +Llama3JsonToolParser::~Llama3JsonToolParser() = default; class BaseReasoningParser::BaseReasoningParserImpl { public: diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index ba99ce7e83..34e0b153f4 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -27,8 +27,8 @@ IncrementalParserBase, Phi4ReasoningParser, DeepSeekR1ReasoningParser, - Llama32JsonToolParser, - Llama32PythonicToolParser, + Llama3JsonToolParser, + Llama3PythonicToolParser, ) __version__ = get_version() diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index f8fd25d6a8..00f67b6d2d 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -31,8 +31,8 @@ from openvino_genai.py_openvino_genai import InpaintingPipeline from openvino_genai.py_openvino_genai import KVCrushAnchorPointMode from openvino_genai.py_openvino_genai import KVCrushConfig from openvino_genai.py_openvino_genai import LLMPipeline -from openvino_genai.py_openvino_genai import Llama32JsonToolParser -from openvino_genai.py_openvino_genai import Llama32PythonicToolParser +from openvino_genai.py_openvino_genai import Llama3JsonToolParser +from openvino_genai.py_openvino_genai import Llama3PythonicToolParser from openvino_genai.py_openvino_genai import ParserBase from openvino_genai.py_openvino_genai import PerfMetrics from openvino_genai.py_openvino_genai import Phi4ReasoningParser @@ -72,5 +72,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 8741d691b6..4a27153742 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import collections.abc import openvino._pyopenvino import typing -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -580,10 +580,6 @@ class DecodedResults: class DeepSeekR1ReasoningParser(IncrementalParserBase): def __init__(self) -> None: ... - def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: - """ - Parse is called with the full text. Returns a dict with parsed content. - """ class EncodedGenerationResult: """ @@ -1826,20 +1822,12 @@ class LLMPipeline: ... def start_chat(self, system_message: str = '') -> None: ... -class Llama32JsonToolParser(ParserBase): +class Llama3JsonToolParser(ParserBase): def __init__(self) -> None: ... - def parse(self, text: dict) -> None: - """ - Parse is called with the full text. Returns a dict with parsed content. - """ -class Llama32PythonicToolParser(ParserBase): +class Llama3PythonicToolParser(ParserBase): def __init__(self) -> None: ... - def parse(self, text: dict) -> None: - """ - Parse is called with the full text. Returns a dict with parsed content. - """ class MeanStdPair: def __init__(self) -> None: ... @@ -1963,10 +1951,6 @@ class PerfMetrics: class Phi4ReasoningParser(IncrementalParserBase): def __init__(self, expect_open_tag: bool = False) -> None: ... - def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: - """ - Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output. - """ class PipelineMetrics: """ diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index d853f507ef..a7ffba9b69 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -19,8 +19,8 @@ using ov::genai::ReasoningParser; using ov::genai::Phi4ReasoningParser; using ov::genai::DeepSeekR1ReasoningParser; using ov::genai::JsonContainer; -using ov::genai::Llama32JsonToolParser; -using ov::genai::Llama32PythonicToolParser; +using ov::genai::Llama3JsonToolParser; +using ov::genai::Llama3PythonicToolParser; using ov::genai::Tokenizer; using ov::genai::StreamingStatus; @@ -28,7 +28,8 @@ namespace pyutils = ov::genai::pybind::utils; namespace { - +// ConstructableIncrementalParserBase and ConstructableParserBase are used when python overload is called from C++ +// and we need to convert JsonContainer to py::dict and then update back JsonContainer from the py::dict which was modified in Python. class ConstructableIncrementalParserBase: public IncrementalParserBase { public: using IncrementalParserBase::IncrementalParserBase; @@ -56,7 +57,6 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase { ); // iterate throught py_msg and update msg - msg.clear(); auto msg_anymap = pyutils::py_object_to_any_map(py_msg); for (const auto& [key, value] : msg_anymap) { if (value.is()) { @@ -69,26 +69,6 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase { } return res.cast(); } - - // This method should be overridden in Python - std::string parse( - py::dict& msg, - const std::string& previous_text, - std::string& delta_text, - const std::optional>& previous_tokens = std::nullopt, - const std::optional>& delta_tokens = std::nullopt - ) { - PYBIND11_OVERRIDE_PURE( - std::string, - IncrementalParserBase, - "parse", - msg, - previous_text, - delta_text, - previous_tokens, - delta_tokens - ); - } }; class ConstructableParserBase: public ParserBase { @@ -106,7 +86,6 @@ class ConstructableParserBase: public ParserBase { parse_method(py_msg); // iterate throught py_msg and update msg - msg.clear(); auto msg_anymap = pyutils::py_object_to_any_map(py_msg); for (const auto& [key, value] : msg_anymap) { if (value.is()) { @@ -120,53 +99,6 @@ class ConstructableParserBase: public ParserBase { } }; - -// wrapper to enhance calling parser from Python -void call_parser(py::dict& msg, std::function func) { - auto msg_cpp = pyutils::py_object_to_json_container(msg); - func(msg_cpp); - - // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, - py::object json_mod = py::module_::import("json"); - - // since it create a new object instead of updating existing dict. - auto json_str = msg_cpp.to_json_string(); - py::dict result = json_mod.attr("loads")(json_str); - - // update msg with result - msg.clear(); - for (auto item : result) { - msg[item.first] = item.second; - } -} - -// wrapper to enhance calling incremental parser from Python -std::string call_incremental_parser( - IncrementalParserBase& parser, - py::dict& msg, - const std::string& previous_text, - std::string& delta_text, - const std::optional>& previous_tokens, - const std::optional>& delta_tokens, - std::function>&, - const std::optional>&)> func) { - auto msg_cpp = pyutils::py_object_to_json_container(msg); - - auto res = func(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens); - auto json_str = msg_cpp.to_json_string(); - - // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, - // since it create a new object instead of updating existing dict. - py::object json_mod = py::module_::import("json"); - py::dict result = json_mod.attr("loads")(json_str); - // update msg with result - msg.clear(); - for (auto item : result) { - msg[item.first] = item.second; - } - return res; -} - } // namespace // TODO: double check/add more relevant docstrings for parsers. @@ -179,100 +111,54 @@ void init_parsers(py::module_& m) { std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt) { - return call_incremental_parser( - self, - msg, - previous_text, - delta_text, - previous_tokens, - delta_tokens, - [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, - const std::optional>& prev_tokens, - const std::optional>& delta_toks) { - return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); - } - ); + auto msg_cpp = pyutils::py_object_to_json_container(msg); + auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens); + auto json_str = msg_cpp.to_json_string(); + + // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, + // since it create a new object instead of updating existing dict. + py::object json_mod = py::module_::import("json"); + py::dict result = json_mod.attr("loads")(json_str); + // update msg with result + for (auto item : result) { + msg[item.first] = item.second; + } + return res; }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output."); py::class_, IncrementalParserBase>(m, "Phi4ReasoningParser") - .def(py::init(), py::arg("expect_open_tag") = false) - .def("parse", - [](Phi4ReasoningParser& self, - py::dict& msg, - const std::string& previous_text, - std::string& delta_text, - const std::optional>& previous_tokens = std::nullopt, - const std::optional>& delta_tokens = std::nullopt) { - return call_incremental_parser( - self, - msg, - previous_text, - delta_text, - previous_tokens, - delta_tokens, - [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, - const std::optional>& prev_tokens, - const std::optional>& delta_toks) { - return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); - }); - }, - "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.", - py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), - py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt); + .def(py::init(), py::arg("expect_open_tag") = false); py::class_, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser") - .def(py::init<>()) - .def("parse", - [](DeepSeekR1ReasoningParser& self, - py::dict& msg, - const std::string& previous_text, - std::string& delta_text, - const std::optional>& previous_tokens = std::nullopt, - const std::optional>& delta_tokens = std::nullopt) { - return call_incremental_parser( - self, - msg, - previous_text, - delta_text, - previous_tokens, - delta_tokens, - [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t, - const std::optional>& prev_tokens, - const std::optional>& delta_toks) { - return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks); - } - ); - }, - "Parse is called with the full text. Returns a dict with parsed content.", - py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), - py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt); + .def(py::init<>()); py::class_>(m, "ParserBase") - .def(py::init<>()) - .def("parse", - [](ParserBase& self, py::dict& msg) { - return call_parser(msg, [&self](JsonContainer& m) {self.parse(m);}); - }, - py::arg("text"), - "Parse is called with the full text. Returns a dict with parsed content."); - - py::class_, ParserBase>(m, "Llama32JsonToolParser") .def(py::init<>()) .def("parse", - [](Llama32JsonToolParser& self, py::dict& msg) { - return call_parser(msg, [&self](JsonContainer& m) { self.parse(m); }); + [](ParserBase& self, py::dict& msg) { + auto msg_cpp = pyutils::py_object_to_json_container(msg); + self.parse(msg_cpp); + + // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, + py::object json_mod = py::module_::import("json"); + + // since it create a new object instead of updating existing dict. + auto json_str = msg_cpp.to_json_string(); + py::dict result = json_mod.attr("loads")(json_str); + + // update msg with result + for (auto item : result) { + msg[item.first] = item.second; + } }, py::arg("text"), "Parse is called with the full text. Returns a dict with parsed content."); - py::class_, ParserBase>(m, "Llama32PythonicToolParser") - .def(py::init<>()) - .def("parse", - [](Llama32PythonicToolParser& self, py::dict& msg) { - return call_parser(msg, [&self](JsonContainer& m) { self.parse(m); }); - }, - py::arg("text"), - "Parse is called with the full text. Returns a dict with parsed content."); + py::class_, ParserBase>(m, "Llama3JsonToolParser") + .def(py::init<>()); + + py::class_, ParserBase>(m, "Llama3PythonicToolParser") + .def(py::init<>()); } diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index a74a12f499..e31bd3c236 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -9,7 +9,7 @@ using namespace ov::genai; -TEST(ParserTest, test_llama32_parser_1) { +TEST(ParserTest, test_llama3_parser_1) { std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)"; // By default content should keep original values. @@ -25,7 +25,7 @@ TEST(ParserTest, test_llama32_parser_1) { })); - std::shared_ptr parser = std::make_shared(); + std::shared_ptr parser = std::make_shared(); JsonContainer input; input["content"] = prompt; parser->parse(input); @@ -33,7 +33,7 @@ TEST(ParserTest, test_llama32_parser_1) { ASSERT_TRUE(expected == input); } -TEST(ParserTest, test_llama32_parser_2) { +TEST(ParserTest, test_llama3_parser_2) { std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)"; JsonContainer expected; @@ -47,7 +47,7 @@ TEST(ParserTest, test_llama32_parser_2) { }} }))); - std::shared_ptr parser = std::make_shared(/*keep_original_content*/ false); + std::shared_ptr parser = std::make_shared(/*keep_original_content*/ false); JsonContainer input; input["content"] = prompt; parser->parse(input); diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 6535a1130c..08574d9c89 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -1,15 +1,10 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import dataclasses import json -from typing import Optional from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model from utils.ov_genai_pipelines import create_ov_pipeline -from utils.network import retry_request -import numpy as np -import openvino import pytest -from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig +from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig from transformers import AutoTokenizer import re @@ -46,7 +41,7 @@ def hf_ov_genai_models(request, tmp_path_factory): "the box.\n\n\nThe answer to 2 + 1 is \boxed{3}." ), ]) -def test_phi4_reason_parser_1(hf_ov_genai_models, answer): +def test_incremental_phi4_reason_parser_1(hf_ov_genai_models, answer): hf_tokenizer, genai_tokenizer = hf_ov_genai_models stream_string = re.split(r"(\s+)", answer) @@ -82,7 +77,7 @@ def write(self, message): # check that if thinking opening and closing tags are passed in a single subword, it is still parsed correctly ["\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}."] ]) -def test_phi4_reason_parser_2(hf_ov_genai_models, split_answer): +def test_incremental_phi4_reason_parser_2(hf_ov_genai_models, split_answer): # check that if thinking opening and closing tags are in the middle of the subword, it is still parsed correctly hf_tokenizer, genai_tokenizer = hf_ov_genai_models @@ -104,22 +99,54 @@ def write(self, message): @pytest.mark.precommit -@pytest.mark.parametrize( - "hf_ov_genai_models", - ["katuni4ka/tiny-random-phi3"], - indirect=True -) -def test_final_parser_llama_32_json(hf_ov_genai_models): - hf_tokenizer, genai_tokenizer = hf_ov_genai_models +@pytest.mark.parametrize("answer", [ + "\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}.", +]) +def test_incremental_phi4_reason_parser_nostreamer(answer): + # In this test we are calling parser directly without streamer + parser = Phi4ReasoningParser() + + stream_string = re.split(r"(\s+)", answer) + msg = {} + for subword in stream_string: + parser.parse(msg, '', subword) + # When parser is called from streamer, it is expected that content is accumulated inside streamer. + # Here we are calling parser manually therefore we need to accumulate content manually. + msg['content'] += subword - json_str = '{"type": "function", "function": {"name": "get_weather", "parameters": {"location": "New York, NY", "unit": "celsius"}}}' - content_json = { - "content": f"Calling weather API: {json_str}" - } + think_content = answer.split("")[0].replace("", "") + content = answer - parser = Llama32JsonToolParser() - parser.parse(content_json) - assert content_json['tool_calls'][0] == json.loads(json_str) + assert msg['reasoning_content'] == think_content + assert msg['content'] == content + + +def test_incremental_deepseek_parser(): + msg = {} + stream_string = [ + "<|begin▁of▁sentence|>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", + " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", + " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", + " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", + " ", "3", ".\n", "", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", + " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", + "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", + " it", ".", "**\n", " \n", " ", " \\", "[\n", " " + ] + + full_str = ''.join(stream_string) + think_content = full_str.split("")[0] + content = full_str.split("")[1] + + extended = stream_string[:] + extended.insert(0, "") + + parser = DeepSeekR1ReasoningParser() + for (prev_subword, subword) in zip(extended, stream_string): + msg = parser.parse(msg, prev_subword, subword) + + assert msg['reasoning_content'] == think_content + assert msg['content'] == content @pytest.mark.precommit @@ -128,7 +155,7 @@ def test_final_parser_llama_32_json(hf_ov_genai_models): ["katuni4ka/tiny-random-phi3"], indirect=True ) -def test_custom_streamer_parser(hf_ov_genai_models): +def test_custom_incremental_parser(hf_ov_genai_models): hf_tokenizer, genai_tokenizer = hf_ov_genai_models class CustomParser(IncrementalParserBase): @@ -155,7 +182,6 @@ class CustomStreamer(TextParserStreamer): def write(self, message): msg.update(message) return StreamingStatus.RUNNING - streamer = CustomStreamer(genai_tokenizer, parsers=[CustomParser()]) stream_string = ["Hello", "", " ", "world", " ", "", "!"] @@ -165,22 +191,28 @@ def write(self, message): assert msg['main_text'] == ''.join(" world ") -# @pytest.mark.precommit -# @pytest.mark.parametrize( -# "hf_ov_genai_models", -# ["microsoft/Phi-4-mini-reasoning"], -# # ["katuni4ka/tiny-random-phi3"], -# indirect=True -# ) -# def test_custom_parser_(hf_ov_genai_models): +@pytest.mark.precommit +@pytest.mark.parametrize( + "hf_ov_genai_models", + ["katuni4ka/tiny-random-phi3"], + indirect=True +) +def test_final_parser_llama_32_json(hf_ov_genai_models): + hf_tokenizer, genai_tokenizer = hf_ov_genai_models -# msg = { -# "content": "This is reasoning. And this is the answer" -# } -# parser.parse(msg) + json_str = '{"type": "function", "function": {"name": "get_weather", "parameters": {"location": "New York, NY", "unit": "celsius"}}}' + content_json = { + "content": f"Calling weather API: {json_str}" + } + + parser = Llama3JsonToolParser() + parser.parse(content_json) + assert content_json['tool_calls'][0] == json.loads(json_str) + + +# TODO: add test when several parsers are called. -# assert msg['reasoning_content'] == "This is reasoning." @pytest.mark.parametrize("model_id", ["microsoft/Phi-4-mini-reasoning"]) @pytest.mark.nightly @@ -203,12 +235,6 @@ def parse(self, msg: dict): if think_start != -1 and think_end != -1 and think_end > think_start: think_text = content[think_start + len(""):think_end].strip() msg['reasoning_content'] = think_text - - class CustomStreamer(TextParserStreamer): - def write(self, message): - # make whatever you want with message, but it will be accumulated and parsed by parser afterwards - # accumulated message can be found by get_parsed_message() - return StreamingStatus.RUNNING parser = CustomParser() config = GenerationConfig() @@ -228,43 +254,7 @@ def write(self, message): assert res.parsed[0]['reasoning_content'] != "" assert res.parsed[0]['reasoning_content'] == think_text -def test_parsers_2(hf_ov_genai_models): - hf_tokenizer, genai_tokenizer = hf_ov_genai_models - class CustomStreamer(TextParserStreamer): - def write(self, message): - if "content" in message: - print(message["content"]) - return StreamingStatus.RUNNING - streamer = TextParserStreamer(genai_tokenizer, parsers=[DeepSeekR1ReasoningParser()]) - msg = {} - stream_string = [ - "<|begin▁of▁sentence|>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", - " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", - " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", - " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", - " ", "3", ".\n", "", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", - " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", - "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", - " it", ".", "**\n", " \n", " ", " \\", "[\n", " " - ] - - full_str = ''.join(stream_string) - think_content = full_str.split("")[0] - content = full_str.split("")[1] - - parsers = streamer.get_parsers() - - extended = stream_string[:] - extended.insert(0, "") - - for parser in parsers: - for (prev_subword, subword) in zip(extended, stream_string): - msg = parser.parse(msg, prev_subword, subword) - - assert msg['reasoning_content'] == think_content - assert msg['content'] == content # TODO: add when streamer accepts integer tokens - From 196a54c1ac5a2813bbbbed1abf68d0ff40e87082 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 Oct 2025 12:52:40 +0200 Subject: [PATCH 26/43] corrected expect_open_tag behaviour, added some tests --- src/cpp/include/openvino/genai/parsers.hpp | 4 +- .../include/openvino/genai/text_streamer.hpp | 1 + src/cpp/src/parsers.cpp | 13 ++-- src/python/openvino_genai/__init__.py | 1 + src/python/py_generation_config.cpp | 2 +- src/python/py_parsers.cpp | 11 ++- src/python/py_streamers.cpp | 13 ++-- tests/python_tests/test_parsers.py | 71 ++++++++++++++++--- 8 files changed, 91 insertions(+), 25 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 93b4b099a1..7b85c3541c 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -48,12 +48,12 @@ class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase { class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningParser : public ReasoningParser { public: - explicit DeepSeekR1ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {}; + explicit DeepSeekR1ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {}; }; class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser { public: - explicit Phi4ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {}; + explicit Phi4ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {}; }; class ParserBase { diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 816427c985..53324def89 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -49,6 +49,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase { class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer { public: + using TextStreamer::write; TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers = {}); virtual StreamingStatus write(JsonContainer& message) = 0; diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index df3ffd6a31..8de2bd5e1f 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -40,7 +40,7 @@ class ReasoningParser::ReasoningParserImpl { if (m_deactivated) { return delta_text; } - if (m_expect_open_tag && m_first_run) { + if (!m_expect_open_tag && m_first_run) { m_think_tag_opened = true; } m_first_run = false; @@ -57,10 +57,11 @@ class ReasoningParser::ReasoningParserImpl { auto reason_str = msg["reasoning_content"].get_string(); auto content_str = msg["content"].get_string(); - if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_expect_open_tag) { + if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && m_expect_open_tag) { // Thinking has started auto open_idx = txt_chunk.find(m_open_tag); - reason_str += txt_chunk.substr(open_idx + std::string(m_open_tag).size(), txt_chunk.size() - (open_idx + std::string(m_open_tag).size())); + + reason_str += txt_chunk.substr(open_idx + m_open_tag.size(), txt_chunk.size() - (open_idx + m_open_tag.size())); if (!m_keep_original_content) { delta_text = ""; } @@ -72,8 +73,8 @@ class ReasoningParser::ReasoningParserImpl { if (txt_chunk.find(m_close_tag) != std::string::npos) { // If and are in the same txt_chunk + delta_text auto close_idx = txt_chunk.find(m_close_tag); - reason_str = txt_chunk.substr(open_idx + std::string(m_open_tag).size(), close_idx - (open_idx + std::string(m_open_tag).size())); - content_str = txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size())); + reason_str = txt_chunk.substr(open_idx + m_open_tag.size(), close_idx - (open_idx + m_open_tag.size())); + content_str = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size())); if (!m_keep_original_content) { delta_text = content_str; } @@ -91,7 +92,7 @@ class ReasoningParser::ReasoningParserImpl { // Example if m_text_cache + delta_text = "...some textAnswer is 3" = "...some textAnswer is 3" // we want to keep in delta_txt only "Answer is 3". // We can operate with txt_chunk since final characters closing the tag ("ink>") are always in delta_text. - delta_text = txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size())); + delta_text = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size())); } msg["reasoning_content"] = reason_str; diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index 34e0b153f4..e4edc351ca 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -29,6 +29,7 @@ DeepSeekR1ReasoningParser, Llama3JsonToolParser, Llama3PythonicToolParser, + ReasoningParser ) __version__ = get_version() diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp index 4c9250bd87..86ad684aa7 100644 --- a/src/python/py_generation_config.cpp +++ b/src/python/py_generation_config.cpp @@ -445,7 +445,7 @@ void init_generation_config(py::module_& m) { .def_readwrite("include_stop_str_in_output", &GenerationConfig::include_stop_str_in_output) .def_readwrite("stop_token_ids", &GenerationConfig::stop_token_ids) .def_readwrite("structured_output_config", &GenerationConfig::structured_output_config) - .def_readwrite("parsers", &GenerationConfig::parsers) + .def_readwrite("parsers", &GenerationConfig::parsers) // TODO: add keep_alive .def_readwrite("adapters", &GenerationConfig::adapters) .def_readwrite("apply_chat_template", &GenerationConfig::apply_chat_template) .def("set_eos_token_id", &GenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id")) diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index a7ffba9b69..931c84ce18 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -129,10 +129,10 @@ void init_parsers(py::module_& m) { "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output."); py::class_, IncrementalParserBase>(m, "Phi4ReasoningParser") - .def(py::init(), py::arg("expect_open_tag") = false); + .def(py::init(), py::arg("expect_open_tag") = true); py::class_, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser") - .def(py::init<>()); + .def(py::init(), py::arg("expect_open_tag") = false); py::class_>(m, "ParserBase") .def(py::init<>()) @@ -161,4 +161,11 @@ void init_parsers(py::module_& m) { py::class_, ParserBase>(m, "Llama3PythonicToolParser") .def(py::init<>()); + + py::class_, IncrementalParserBase>(m, "ReasoningParser") + .def(py::init(), + py::arg("expect_open_tag") = true, + py::arg("keep_original_content") = true, + py::arg("open_tag") = "", + py::arg("close_tag") = ""); } diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index d6660fcd17..7ac5ee437e 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -166,11 +166,14 @@ void init_streamers(py::module_& m) { }, py::arg("message"), "Write is called with a dict. Returns StreamingStatus.") - .def("_write", - py::overload_cast(&TextParserStreamer::write), - py::arg("message"), - "Write is called with a string message. Returns CallbackTypeVariant. This is a private method.") - + .def("_write", [](TextParserStreamer& self, std::variant, std::string> chunk) -> StreamingStatus { + if (auto _token = std::get_if>(&chunk)) { + return self.write(*_token); + } else if (auto _str = std::get_if(&chunk)) { + auto res = self.write(*_str); + return std::get(res); + } + }) .def("get_parsed_message", [](TextParserStreamer& self) -> py::dict{ return pyutils::json_container_to_py_object(self.get_parsed_message()); diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 08574d9c89..6b9022255d 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -4,7 +4,7 @@ from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model from utils.ov_genai_pipelines import create_ov_pipeline import pytest -from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig +from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig, ReasoningParser from transformers import AutoTokenizer import re @@ -63,6 +63,35 @@ def write(self, message): assert msg['content'] == content +@pytest.mark.precommit +@pytest.mark.parametrize( + "hf_ov_genai_models", + ["katuni4ka/tiny-random-phi3"], # this tokenizer is used as a stub only + indirect=True +) +def test_incremental_phi4_reason_integer_token_ids(hf_ov_genai_models): + hf_tokenizer, genai_tokenizer = hf_ov_genai_models + + class CustomStreamer(TextParserStreamer): + def write(self, message): + msg.update(message) + return StreamingStatus.RUNNING + streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()]) + + msg = {} + answer = "\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}." + encoded_tokens = genai_tokenizer.encode(answer).input_ids.data.tolist() + for token in encoded_tokens: + streamer._write(token) + streamer.end() + + think_content = answer.split("")[0].replace("", "") + content = answer + + assert msg['reasoning_content'] == think_content + assert msg['content'] == content + + @pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", @@ -121,6 +150,38 @@ def test_incremental_phi4_reason_parser_nostreamer(answer): assert msg['content'] == content +@pytest.mark.precommit +@pytest.mark.parametrize("keep_original_content", [True, False]) +@pytest.mark.parametrize( + "hf_ov_genai_models", + ["katuni4ka/tiny-random-phi3"], # this tokenizer is used as a stub only + indirect=True +) +@pytest.mark.parametrize("answer", [ + "\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}.", +]) +def test_reasoning_parser_cut_content(hf_ov_genai_models, answer, keep_original_content): + hf_tokenizer, genai_tokenizer = hf_ov_genai_models + + stream_string = re.split(r"(\s+)", answer) + + class CustomStreamer(TextParserStreamer): + def write(self, message): + msg.update(message) + return StreamingStatus.RUNNING + streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningParser(expect_open_tag=True, keep_original_content=keep_original_content)]) + + msg = {} + for subword in stream_string: + streamer._write(subword) + + think_content = answer.split("")[0].replace("", "") + content = answer + + assert msg['reasoning_content'] == think_content + assert msg['content'] == (content if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.") + + def test_incremental_deepseek_parser(): msg = {} stream_string = [ @@ -211,9 +272,6 @@ def test_final_parser_llama_32_json(hf_ov_genai_models): assert content_json['tool_calls'][0] == json.loads(json_str) -# TODO: add test when several parsers are called. - - @pytest.mark.parametrize("model_id", ["microsoft/Phi-4-mini-reasoning"]) @pytest.mark.nightly def test_custom_parser(tmp_path, model_id): @@ -253,8 +311,3 @@ def parse(self, msg: dict): assert 'reasoning_content' in res.parsed[0] assert res.parsed[0]['reasoning_content'] != "" assert res.parsed[0]['reasoning_content'] == think_text - - - - -# TODO: add when streamer accepts integer tokens From 801d8fb332406114bc1d8f2d537aaf91f385e187 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 Oct 2025 13:19:26 +0200 Subject: [PATCH 27/43] renamed parsers; removed ParserBase -> Parser; IncrementalParserBase -> IncrementalParser --- .../openvino/genai/generation_config.hpp | 3 +- src/cpp/include/openvino/genai/parsers.hpp | 34 ++++++------ .../include/openvino/genai/text_streamer.hpp | 4 +- src/cpp/src/llm/pipeline.cpp | 2 +- src/cpp/src/parsers.cpp | 10 ++-- src/cpp/src/text_streamer.cpp | 2 +- src/python/openvino_genai/__init__.py | 10 ++-- src/python/openvino_genai/__init__.pyi | 11 ++-- .../openvino_genai/py_openvino_genai.pyi | 33 ++++++------ src/python/py_parsers.cpp | 52 +++++++++---------- src/python/py_streamers.cpp | 6 +-- tests/cpp/parser.cpp | 12 ++--- tests/python_tests/test_parsers.py | 18 +++---- 13 files changed, 98 insertions(+), 99 deletions(-) diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index 31c50b28e5..e592cb36ff 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -689,8 +689,7 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { bool is_prompt_lookup() const; bool is_structured_output_generation() const; - // parsers - std::vector> parsers; + std::vector> parsers; OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release") bool is_speculative_decoding() const; diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 7b85c3541c..7225669f5a 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -10,9 +10,9 @@ namespace ov { namespace genai { -class OPENVINO_GENAI_EXPORTS IncrementalParserBase { +class OPENVINO_GENAI_EXPORTS IncrementalParser { public: - IncrementalParserBase() = default; + IncrementalParser() = default; // We return string which with filtered text to be added to content. virtual std::string parse( @@ -23,19 +23,19 @@ class OPENVINO_GENAI_EXPORTS IncrementalParserBase { const std::optional>& delta_tokens = std::nullopt ) = 0; - virtual ~IncrementalParserBase() = default; + virtual ~IncrementalParser() = default; }; -class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase { +class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalParser { private: class ReasoningParserImpl; std::unique_ptr m_impl; public: - ReasoningParser(bool expect_open_tag = true, + ReasoningIncrementalParser(bool expect_open_tag = true, bool keep_original_content = true, const std::string& open_tag = "", const std::string& close_tag = ""); - virtual ~ReasoningParser(); + virtual ~ReasoningIncrementalParser(); std::string parse( JsonContainer& msg, @@ -46,24 +46,24 @@ class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase { ) override; }; -class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningParser : public ReasoningParser { +class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningIncrementalParser : public ReasoningIncrementalParser { public: - explicit DeepSeekR1ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {}; + explicit DeepSeekR1ReasoningIncrementalParser(bool expect_open_tag = false) : ReasoningIncrementalParser(expect_open_tag) {}; }; -class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser { +class OPENVINO_GENAI_EXPORTS Phi4ReasoningIncrementalParser : public ReasoningIncrementalParser { public: - explicit Phi4ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {}; + explicit Phi4ReasoningIncrementalParser(bool expect_open_tag = true) : ReasoningIncrementalParser(expect_open_tag) {}; }; -class ParserBase { +class Parser { public: - ParserBase() = default; - virtual ~ParserBase(); + Parser() = default; + virtual ~Parser(); virtual void parse(JsonContainer& text) = 0; }; -class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public ParserBase { +class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser { // Does not modify original content, only extracts and adds tool calls public: explicit Llama3PythonicToolParser(bool keep_original_content = true); @@ -74,7 +74,7 @@ class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public ParserBase { std::unique_ptr m_impl; }; -class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public ParserBase { +class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser { // Does not modify original content, only extracts and adds tool calls public: explicit Llama3JsonToolParser(bool keep_original_content = true); @@ -85,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public ParserBase { std::unique_ptr m_impl; }; -class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{ +class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public Parser { public: BaseReasoningParser( bool expect_open_tag = true, @@ -99,7 +99,5 @@ class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{ std::unique_ptr m_impl; }; -// TODO: DeepSeekR1ReasoningParser -> DeepSeekR1IncrementalParser - } // namespace genai } // namespace ov diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 53324def89..fabea0f524 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -50,7 +50,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase { class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer { public: using TextStreamer::write; - TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers = {}); + TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers = {}); virtual StreamingStatus write(JsonContainer& message) = 0; @@ -60,7 +60,7 @@ class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer { private: JsonContainer m_parsed_message; std::string m_text_buffer; - std::vector> m_parsers; + std::vector> m_parsers; }; } // namespace genai diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 9f988dbb59..513d047bf2 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -265,7 +265,7 @@ DecodedResults LLMPipeline::generate( return res; } - std::vector> parsers = (*generation_config).parsers; + std::vector> parsers = (*generation_config).parsers; res.parsed.resize(res.texts.size()); // Apply Base parsers sequentially even if IncrementalParser has run. for (size_t i = 0; i < res.texts.size(); ++i) { diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 8de2bd5e1f..c362d9f73d 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -8,7 +8,7 @@ namespace ov::genai { -class ReasoningParser::ReasoningParserImpl { +class ReasoningIncrementalParser::ReasoningParserImpl { private: bool m_expect_open_tag; bool m_first_run = true; @@ -148,13 +148,13 @@ class ReasoningParser::ReasoningParserImpl { } }; -ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) { +ReasoningIncrementalParser::ReasoningIncrementalParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) { m_impl = std::make_unique(expect_open_tag, keep_original_content, open_tag, close_tag); } -ReasoningParser::~ReasoningParser() = default; +ReasoningIncrementalParser::~ReasoningIncrementalParser() = default; -std::string ReasoningParser::parse( +std::string ReasoningIncrementalParser::parse( JsonContainer& msg, const std::string& previous_text, std::string& delta_text, @@ -298,6 +298,6 @@ void BaseReasoningParser::parse(JsonContainer& input) { BaseReasoningParser::~BaseReasoningParser() = default; -ParserBase::~ParserBase() = default; +Parser::~Parser() = default; } // namespace ov::genai diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index 9b83e0c60c..ee48cff1f9 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -124,7 +124,7 @@ void TextStreamer::end() { StreamerBase::~StreamerBase() = default; -TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers) +TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers) : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant { return this->write(s); }), m_parsers{parsers} {} diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index e4edc351ca..a0c06baec6 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -23,13 +23,13 @@ ) from .py_openvino_genai import ( - ParserBase, - IncrementalParserBase, - Phi4ReasoningParser, - DeepSeekR1ReasoningParser, + Parser, + IncrementalParser, + Phi4ReasoningIncrementalParser, + DeepSeekR1ReasoningIncrementalParser, Llama3JsonToolParser, Llama3PythonicToolParser, - ReasoningParser + ReasoningIncrementalParser ) __version__ = get_version() diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 00f67b6d2d..04bd694c57 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -15,7 +15,7 @@ from openvino_genai.py_openvino_genai import ChunkStreamerBase from openvino_genai.py_openvino_genai import ContinuousBatchingPipeline from openvino_genai.py_openvino_genai import CppStdGenerator from openvino_genai.py_openvino_genai import DecodedResults -from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningParser +from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningIncrementalParser from openvino_genai.py_openvino_genai import EncodedResults from openvino_genai.py_openvino_genai import FluxTransformer2DModel from openvino_genai.py_openvino_genai import GenerationConfig @@ -26,18 +26,19 @@ from openvino_genai.py_openvino_genai import Generator from openvino_genai.py_openvino_genai import Image2ImagePipeline from openvino_genai.py_openvino_genai import ImageGenerationConfig from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics -from openvino_genai.py_openvino_genai import IncrementalParserBase +from openvino_genai.py_openvino_genai import IncrementalParser from openvino_genai.py_openvino_genai import InpaintingPipeline from openvino_genai.py_openvino_genai import KVCrushAnchorPointMode from openvino_genai.py_openvino_genai import KVCrushConfig from openvino_genai.py_openvino_genai import LLMPipeline from openvino_genai.py_openvino_genai import Llama3JsonToolParser from openvino_genai.py_openvino_genai import Llama3PythonicToolParser -from openvino_genai.py_openvino_genai import ParserBase +from openvino_genai.py_openvino_genai import Parser from openvino_genai.py_openvino_genai import PerfMetrics -from openvino_genai.py_openvino_genai import Phi4ReasoningParser +from openvino_genai.py_openvino_genai import Phi4ReasoningIncrementalParser from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import RawPerfMetrics +from openvino_genai.py_openvino_genai import ReasoningIncrementalParser from openvino_genai.py_openvino_genai import SD3Transformer2DModel from openvino_genai.py_openvino_genai import Scheduler from openvino_genai.py_openvino_genai import SchedulerConfig @@ -72,5 +73,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 4a27153742..549da2bf3a 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import collections.abc import openvino._pyopenvino import typing -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -577,8 +577,8 @@ class DecodedResults: @property def texts(self) -> list[str]: ... -class DeepSeekR1ReasoningParser(IncrementalParserBase): - def __init__(self) -> None: +class DeepSeekR1ReasoningIncrementalParser(IncrementalParser): + def __init__(self, expect_open_tag: bool = False) -> None: ... class EncodedGenerationResult: """ @@ -978,10 +978,10 @@ class GenerationConfig: def num_return_sequences(self, arg0: typing.SupportsInt) -> None: ... @property - def parsers(self) -> list[ParserBase]: + def parsers(self) -> list[Parser]: ... @parsers.setter - def parsers(self, arg0: collections.abc.Sequence[ParserBase]) -> None: + def parsers(self, arg0: collections.abc.Sequence[Parser]) -> None: ... @property def presence_penalty(self) -> float: @@ -1459,7 +1459,7 @@ class ImageGenerationPerfMetrics: @property def raw_metrics(self) -> RawImageGenerationPerfMetrics: ... -class IncrementalParserBase: +class IncrementalParser: def __init__(self) -> None: ... def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: @@ -1822,10 +1822,10 @@ class LLMPipeline: ... def start_chat(self, system_message: str = '') -> None: ... -class Llama3JsonToolParser(ParserBase): +class Llama3JsonToolParser(Parser): def __init__(self) -> None: ... -class Llama3PythonicToolParser(ParserBase): +class Llama3PythonicToolParser(Parser): def __init__(self) -> None: ... class MeanStdPair: @@ -1839,7 +1839,7 @@ class MeanStdPair: @property def std(self) -> float: ... -class ParserBase: +class Parser: def __init__(self) -> None: ... def parse(self, text: dict) -> None: @@ -1948,8 +1948,8 @@ class PerfMetrics: @property def raw_metrics(self) -> RawPerfMetrics: ... -class Phi4ReasoningParser(IncrementalParserBase): - def __init__(self, expect_open_tag: bool = False) -> None: +class Phi4ReasoningIncrementalParser(IncrementalParser): + def __init__(self, expect_open_tag: bool = True) -> None: ... class PipelineMetrics: """ @@ -2082,6 +2082,9 @@ class RawPerfMetrics: @property def tokenization_durations(self) -> list[float]: ... +class ReasoningIncrementalParser(IncrementalParser): + def __init__(self, expect_open_tag: bool = True, keep_original_content: bool = True, open_tag: str = '', close_tag: str = '') -> None: + ... class SD3Transformer2DModel: """ SD3Transformer2DModel class. @@ -3384,14 +3387,12 @@ class TextEmbeddingPipeline: Waits computed embeddings for a query """ class TextParserStreamer(TextStreamer): - def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[IncrementalParserBase] = []) -> None: + def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[IncrementalParser] = []) -> None: """ TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers. """ - def _write(self, message: str) -> bool | openvino_genai.py_openvino_genai.StreamingStatus: - """ - Write is called with a string message. Returns CallbackTypeVariant. This is a private method. - """ + def _write(self, arg0: collections.abc.Sequence[typing.SupportsInt] | str) -> StreamingStatus: + ... def get_parsed_message(self) -> dict: """ Get the current parsed message diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 931c84ce18..e6b3437f87 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -13,11 +13,11 @@ namespace py = pybind11; -using ov::genai::IncrementalParserBase; -using ov::genai::ParserBase; -using ov::genai::ReasoningParser; -using ov::genai::Phi4ReasoningParser; -using ov::genai::DeepSeekR1ReasoningParser; +using ov::genai::IncrementalParser; +using ov::genai::Parser; +using ov::genai::ReasoningIncrementalParser; +using ov::genai::Phi4ReasoningIncrementalParser; +using ov::genai::DeepSeekR1ReasoningIncrementalParser; using ov::genai::JsonContainer; using ov::genai::Llama3JsonToolParser; using ov::genai::Llama3PythonicToolParser; @@ -28,11 +28,11 @@ namespace pyutils = ov::genai::pybind::utils; namespace { -// ConstructableIncrementalParserBase and ConstructableParserBase are used when python overload is called from C++ +// ConstructableIncremental and ConstructableBase are used when python overload is called from C++ // and we need to convert JsonContainer to py::dict and then update back JsonContainer from the py::dict which was modified in Python. -class ConstructableIncrementalParserBase: public IncrementalParserBase { +class ConstructableIncrementalParser: public IncrementalParser { public: - using IncrementalParserBase::IncrementalParserBase; + using IncrementalParser::IncrementalParser; std::string parse( JsonContainer& msg, const std::string& previous_text, @@ -43,7 +43,7 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase { // Convert JsonContainer to py::dict py::dict py_msg = pyutils::json_container_to_py_object(msg); - py::function parse_method = py::get_override(static_cast(this), "parse"); + py::function parse_method = py::get_override(static_cast(this), "parse"); if (!parse_method) { throw std::runtime_error("parse method not implemented in Python subclass"); } @@ -71,12 +71,12 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase { } }; -class ConstructableParserBase: public ParserBase { +class ConstructableParser: public Parser { public: void parse(JsonContainer& msg) override { py::gil_scoped_acquire acquire; - py::function parse_method = py::get_override(static_cast(this), "parse"); + py::function parse_method = py::get_override(static_cast(this), "parse"); if (!parse_method) { throw std::runtime_error("parse method not implemented in Python subclass"); } @@ -103,9 +103,9 @@ class ConstructableParserBase: public ParserBase { // TODO: double check/add more relevant docstrings for parsers. void init_parsers(py::module_& m) { - py::class_>(m, "IncrementalParserBase") + py::class_>(m, "IncrementalParser") .def(py::init<>()) - .def("parse", [](IncrementalParserBase& self, + .def("parse", [](IncrementalParser& self, py::dict& msg, std::string& previous_text, std::string& delta_text, @@ -128,16 +128,23 @@ void init_parsers(py::module_& m) { py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output."); - py::class_, IncrementalParserBase>(m, "Phi4ReasoningParser") + py::class_, IncrementalParser>(m, "ReasoningIncrementalParser") + .def(py::init(), + py::arg("expect_open_tag") = true, + py::arg("keep_original_content") = true, + py::arg("open_tag") = "", + py::arg("close_tag") = ""); + + py::class_, IncrementalParser>(m, "Phi4ReasoningIncrementalParser") .def(py::init(), py::arg("expect_open_tag") = true); - py::class_, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser") + py::class_, IncrementalParser>(m, "DeepSeekR1ReasoningIncrementalParser") .def(py::init(), py::arg("expect_open_tag") = false); - py::class_>(m, "ParserBase") + py::class_>(m, "Parser") .def(py::init<>()) .def("parse", - [](ParserBase& self, py::dict& msg) { + [](Parser& self, py::dict& msg) { auto msg_cpp = pyutils::py_object_to_json_container(msg); self.parse(msg_cpp); @@ -156,16 +163,9 @@ void init_parsers(py::module_& m) { py::arg("text"), "Parse is called with the full text. Returns a dict with parsed content."); - py::class_, ParserBase>(m, "Llama3JsonToolParser") + py::class_, Parser>(m, "Llama3JsonToolParser") .def(py::init<>()); - py::class_, ParserBase>(m, "Llama3PythonicToolParser") + py::class_, Parser>(m, "Llama3PythonicToolParser") .def(py::init<>()); - - py::class_, IncrementalParserBase>(m, "ReasoningParser") - .def(py::init(), - py::arg("expect_open_tag") = true, - py::arg("keep_original_content") = true, - py::arg("open_tag") = "", - py::arg("close_tag") = ""); } diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 7ac5ee437e..94fb52355d 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -19,7 +19,7 @@ using ov::genai::CallbackTypeVariant; using ov::genai::StreamingStatus; using ov::genai::TextStreamer; using ov::genai::TextParserStreamer; -using ov::genai::IncrementalParserBase; +using ov::genai::IncrementalParser; using ov::genai::JsonContainer; using ov::genai::Tokenizer; @@ -148,11 +148,11 @@ void init_streamers(py::module_& m) { // TODO: double check/add more relevant docstrings for TextParserStreamer. py::class_, TextStreamer>(m, "TextParserStreamer") .def(py::init([](const Tokenizer& tokenizer, - std::vector> parsers) { + std::vector> parsers) { return std::make_shared(tokenizer, parsers); }), py::arg("tokenizer"), - py::arg("parsers") = std::vector>(), + py::arg("parsers") = std::vector>(), py::keep_alive<1, 3>(), "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.") .def("write", diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index e31bd3c236..7660d1625b 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -95,7 +95,7 @@ TEST(ParserTest, test_reasoning_parser_2) { class DeepSeekR1ReasoningParserTest : public ::testing::Test { protected: - ov::genai::DeepSeekR1ReasoningParser parser; + ov::genai::DeepSeekR1ReasoningIncrementalParser parser; JsonContainer msg; }; @@ -124,8 +124,8 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) { } TEST(ParserTest, test_custom_parser) { - // Define a small custom parser derived from ParserBase - class CustomParser : public ov::genai::ParserBase { + // Define a small custom parser derived from Parser + class CustomParser : public ov::genai::Parser { public: void parse(ov::genai::JsonContainer& msg) override { // extract "content" @@ -168,7 +168,7 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) { using namespace ov::genai; // Custom incremental parser: mirrors the Python logic - class CustomParser : public IncrementalParserBase { + class CustomParser : public IncrementalParser { public: bool main_part_started = false; @@ -210,7 +210,7 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) { public: using TextParserStreamer::write; // Forwarding constructor to base class - CustomStreamer(ov::genai::Tokenizer& tok, const std::vector>& parsers) + CustomStreamer(ov::genai::Tokenizer& tok, const std::vector>& parsers) : ov::genai::TextParserStreamer(tok, parsers) {} JsonContainer final_msg; @@ -221,7 +221,7 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) { }; Tokenizer tok; - std::shared_ptr parser = std::make_shared(); + std::shared_ptr parser = std::make_shared(); CustomStreamer streamer(tok, {parser}); diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 6b9022255d..2368571f62 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -4,7 +4,7 @@ from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model from utils.ov_genai_pipelines import create_ov_pipeline import pytest -from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig, ReasoningParser +from openvino_genai import Tokenizer, IncrementalParser, Parser, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningIncrementalParser, DeepSeekR1ReasoningIncrementalParser, GenerationConfig, ReasoningIncrementalParser from transformers import AutoTokenizer import re @@ -50,7 +50,7 @@ class CustomStreamer(TextParserStreamer): def write(self, message): msg.update(message) return StreamingStatus.RUNNING - streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()]) + streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningIncrementalParser()]) msg = {} for subword in stream_string: @@ -76,7 +76,7 @@ class CustomStreamer(TextParserStreamer): def write(self, message): msg.update(message) return StreamingStatus.RUNNING - streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()]) + streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningIncrementalParser()]) msg = {} answer = "\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}." @@ -114,7 +114,7 @@ class CustomStreamer(TextParserStreamer): def write(self, message): msg.update(message) return StreamingStatus.RUNNING - streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()]) + streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningIncrementalParser()]) msg = {} for subword in split_answer: @@ -133,7 +133,7 @@ def write(self, message): ]) def test_incremental_phi4_reason_parser_nostreamer(answer): # In this test we are calling parser directly without streamer - parser = Phi4ReasoningParser() + parser = Phi4ReasoningIncrementalParser() stream_string = re.split(r"(\s+)", answer) msg = {} @@ -169,7 +169,7 @@ class CustomStreamer(TextParserStreamer): def write(self, message): msg.update(message) return StreamingStatus.RUNNING - streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningParser(expect_open_tag=True, keep_original_content=keep_original_content)]) + streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningIncrementalParser(expect_open_tag=True, keep_original_content=keep_original_content)]) msg = {} for subword in stream_string: @@ -202,7 +202,7 @@ def test_incremental_deepseek_parser(): extended = stream_string[:] extended.insert(0, "") - parser = DeepSeekR1ReasoningParser() + parser = DeepSeekR1ReasoningIncrementalParser() for (prev_subword, subword) in zip(extended, stream_string): msg = parser.parse(msg, prev_subword, subword) @@ -219,7 +219,7 @@ def test_incremental_deepseek_parser(): def test_custom_incremental_parser(hf_ov_genai_models): hf_tokenizer, genai_tokenizer = hf_ov_genai_models - class CustomParser(IncrementalParserBase): + class CustomParser(IncrementalParser): main_part_started: bool = False def parse(self, msg: dict, previous_text: str, delta_text: str, prev_tokens = None, delta_tokens = None) -> str: @@ -279,7 +279,7 @@ def test_custom_parser(tmp_path, model_id): pipe = create_ov_pipeline(models_path) tok = pipe.get_tokenizer() - class CustomParser(ParserBase): + class CustomParser(Parser): def parse(self, msg: dict): content = None if 'content' in msg: From 01d422452d5ccd4ce8b89668c574cf9e76d470f4 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 Oct 2025 15:09:53 +0200 Subject: [PATCH 28/43] renaming leftovers --- src/cpp/include/openvino/genai/parsers.hpp | 38 +++++------ src/cpp/src/parsers.cpp | 68 +++++++++---------- src/python/openvino_genai/__init__.py | 7 +- src/python/openvino_genai/__init__.pyi | 3 +- .../openvino_genai/py_openvino_genai.pyi | 5 +- src/python/py_parsers.cpp | 24 ++++--- tests/cpp/parser.cpp | 4 +- 7 files changed, 81 insertions(+), 68 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 7225669f5a..33657072f2 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -16,7 +16,7 @@ class OPENVINO_GENAI_EXPORTS IncrementalParser { // We return string which with filtered text to be added to content. virtual std::string parse( - JsonContainer& msg, + JsonContainer& message, const std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, @@ -38,7 +38,7 @@ class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalPars virtual ~ReasoningIncrementalParser(); std::string parse( - JsonContainer& msg, + JsonContainer& message, const std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, @@ -56,19 +56,33 @@ class OPENVINO_GENAI_EXPORTS Phi4ReasoningIncrementalParser : public ReasoningIn explicit Phi4ReasoningIncrementalParser(bool expect_open_tag = true) : ReasoningIncrementalParser(expect_open_tag) {}; }; -class Parser { +class OPENVINO_GENAI_EXPORTS Parser { public: Parser() = default; virtual ~Parser(); virtual void parse(JsonContainer& text) = 0; }; +class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser { +public: + ReasoningParser( + bool expect_open_tag = true, + bool keep_original_content = true, + const std::string& open_tag = "", + const std::string& close_tag = ""); + void parse(JsonContainer& message) override; + ~ReasoningParser(); +private: + class ReasoningParserImpl; + std::unique_ptr m_impl; +}; + class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser { // Does not modify original content, only extracts and adds tool calls public: explicit Llama3PythonicToolParser(bool keep_original_content = true); ~Llama3PythonicToolParser(); - void parse(JsonContainer& input) override; + void parse(JsonContainer& message) override; private: class Llama3PythonicToolParserImpl; std::unique_ptr m_impl; @@ -79,25 +93,11 @@ class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser { public: explicit Llama3JsonToolParser(bool keep_original_content = true); ~Llama3JsonToolParser(); - void parse(JsonContainer& input) override; + void parse(JsonContainer& message) override; private: class Llama3JsonToolParserImpl; std::unique_ptr m_impl; }; -class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public Parser { -public: - BaseReasoningParser( - bool expect_open_tag = true, - bool keep_original_content = true, - const std::string& open_tag = "", - const std::string& close_tag = ""); - void parse(JsonContainer& input) override; - ~BaseReasoningParser(); -private: - class BaseReasoningParserImpl; - std::unique_ptr m_impl; -}; - } // namespace genai } // namespace ov diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index c362d9f73d..a43a328a2f 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -31,7 +31,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl { m_close_tag(close_tag) {} std::string parse( - JsonContainer& msg, + JsonContainer& message, const std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens, @@ -45,17 +45,17 @@ class ReasoningIncrementalParser::ReasoningParserImpl { } m_first_run = false; - if (!msg.contains("reasoning_content")) { - msg["reasoning_content"] = ""; + if (!message.contains("reasoning_content")) { + message["reasoning_content"] = ""; } - if (!msg.contains("content")) { - msg["content"] = ""; + if (!message.contains("content")) { + message["content"] = ""; } auto txt_chunk = m_text_cache + delta_text; - auto reason_str = msg["reasoning_content"].get_string(); - auto content_str = msg["content"].get_string(); + auto reason_str = message["reasoning_content"].get_string(); + auto content_str = message["content"].get_string(); if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && m_expect_open_tag) { // Thinking has started @@ -67,7 +67,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl { } m_think_tag_opened = true; - msg["reasoning_content"] = reason_str; + message["reasoning_content"] = reason_str; m_text_cache = ""; if (txt_chunk.find(m_close_tag) != std::string::npos) { @@ -80,7 +80,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl { } m_think_tag_opened = false; m_deactivated = true; - msg["reasoning_content"] = reason_str; + message["reasoning_content"] = reason_str; } } else if (m_think_tag_opened && txt_chunk.find(m_close_tag) != std::string::npos) { // Thinking tag was closed @@ -95,7 +95,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl { delta_text = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size())); } - msg["reasoning_content"] = reason_str; + message["reasoning_content"] = reason_str; m_text_cache = ""; m_think_tag_opened = false; m_deactivated = true; @@ -137,7 +137,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl { if (!m_keep_original_content) { delta_text = ""; } - msg["reasoning_content"] = reason_str; + message["reasoning_content"] = reason_str; } else { // Think tag was not opened yet and not found in the current delta_text. // Accumulate text in the cache to detect if is split between several delta_text pieces. @@ -155,13 +155,13 @@ ReasoningIncrementalParser::ReasoningIncrementalParser(bool expect_open_tag, boo ReasoningIncrementalParser::~ReasoningIncrementalParser() = default; std::string ReasoningIncrementalParser::parse( - JsonContainer& msg, + JsonContainer& message, const std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens, const std::optional>& delta_tokens ) { - return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens); + return m_impl->parse(message, previous_text, delta_text, previous_tokens, delta_tokens); } class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { @@ -169,13 +169,13 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { Llama3PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} bool m_keep_original_content; - void parse(JsonContainer& input) { + void parse(JsonContainer& message) { // Input example - // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; + // string message = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; // Regex to capture the [...] part std::smatch m; - const std::string& text = input["content"].get_string(); + const std::string& text = message["content"].get_string(); std::regex r(R"(\[.*?\])"); if (!std::regex_search(text, m, r)) { return; @@ -197,11 +197,11 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { } // Split function name and arguments - input["tool_calls"] = JsonContainer::array(); - input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}})); + message["tool_calls"] = JsonContainer::array(); + message["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}})); if (!m_keep_original_content) { - input["content"] = regex_replace(text, r, ""); + message["content"] = regex_replace(text, r, ""); } } }; @@ -210,8 +210,8 @@ Llama3PythonicToolParser::Llama3PythonicToolParser(bool keep_original_content) { m_impl = std::make_unique(keep_original_content); } -void Llama3PythonicToolParser::parse(JsonContainer& input) { - m_impl->parse(input); +void Llama3PythonicToolParser::parse(JsonContainer& message) { + m_impl->parse(message); } Llama3PythonicToolParser::~Llama3PythonicToolParser() = default; @@ -245,15 +245,15 @@ Llama3JsonToolParser::Llama3JsonToolParser(bool keep_original_content) { m_impl = std::make_unique(keep_original_content); } -void Llama3JsonToolParser::parse(JsonContainer& input) { - m_impl->parse(input); +void Llama3JsonToolParser::parse(JsonContainer& message) { + m_impl->parse(message); } Llama3JsonToolParser::~Llama3JsonToolParser() = default; -class BaseReasoningParser::BaseReasoningParserImpl { +class ReasoningParser::ReasoningParserImpl { public: - BaseReasoningParserImpl(bool expect_open_tag, + ReasoningParserImpl(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag): @@ -262,9 +262,9 @@ class BaseReasoningParser::BaseReasoningParserImpl { m_open_tag(open_tag), m_close_tag(close_tag) {}; - void parse(JsonContainer& input) { + void parse(JsonContainer& message) { std::string reasoning_content; - std::string content = input["content"].get_string(); + std::string content = message["content"].get_string(); size_t start = content.find(m_open_tag); size_t end = content.find(m_close_tag); @@ -273,13 +273,13 @@ class BaseReasoningParser::BaseReasoningParserImpl { reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size())); if (!m_keep_original_content) { // Remove ... from content - input["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size()); + message["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size()); } } else { reasoning_content = ""; } - input["reasoning_content"] = reasoning_content; + message["reasoning_content"] = reasoning_content; } private: bool m_expect_open_tag; @@ -288,15 +288,15 @@ class BaseReasoningParser::BaseReasoningParserImpl { std::string m_close_tag; }; -BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) { - m_impl = std::make_unique(expect_open_tag, keep_original_content, open_tag, close_tag); +ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) { + m_impl = std::make_unique(expect_open_tag, keep_original_content, open_tag, close_tag); } -void BaseReasoningParser::parse(JsonContainer& input) { - m_impl->parse(input); +void ReasoningParser::parse(JsonContainer& message) { + m_impl->parse(message); } -BaseReasoningParser::~BaseReasoningParser() = default; +ReasoningParser::~ReasoningParser() = default; Parser::~Parser() = default; diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index a0c06baec6..5673b941cb 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -24,12 +24,13 @@ from .py_openvino_genai import ( Parser, + ReasoningParser, + Llama3JsonToolParser, + Llama3PythonicToolParser, IncrementalParser, + ReasoningIncrementalParser, Phi4ReasoningIncrementalParser, DeepSeekR1ReasoningIncrementalParser, - Llama3JsonToolParser, - Llama3PythonicToolParser, - ReasoningIncrementalParser ) __version__ = get_version() diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 04bd694c57..f92c55f4fd 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -39,6 +39,7 @@ from openvino_genai.py_openvino_genai import Phi4ReasoningIncrementalParser from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import RawPerfMetrics from openvino_genai.py_openvino_genai import ReasoningIncrementalParser +from openvino_genai.py_openvino_genai import ReasoningParser from openvino_genai.py_openvino_genai import SD3Transformer2DModel from openvino_genai.py_openvino_genai import Scheduler from openvino_genai.py_openvino_genai import SchedulerConfig @@ -73,5 +74,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 549da2bf3a..25bd1a6b15 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import collections.abc import openvino._pyopenvino import typing -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -2085,6 +2085,9 @@ class RawPerfMetrics: class ReasoningIncrementalParser(IncrementalParser): def __init__(self, expect_open_tag: bool = True, keep_original_content: bool = True, open_tag: str = '', close_tag: str = '') -> None: ... +class ReasoningParser(Parser): + def __init__(self, expect_open_tag: bool = True, keep_original_content: bool = True, open_tag: str = '', close_tag: str = '') -> None: + ... class SD3Transformer2DModel: """ SD3Transformer2DModel class. diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index e6b3437f87..d55e3d29ee 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -15,6 +15,7 @@ namespace py = pybind11; using ov::genai::IncrementalParser; using ov::genai::Parser; +using ov::genai::ReasoningParser; using ov::genai::ReasoningIncrementalParser; using ov::genai::Phi4ReasoningIncrementalParser; using ov::genai::DeepSeekR1ReasoningIncrementalParser; @@ -106,12 +107,12 @@ void init_parsers(py::module_& m) { py::class_>(m, "IncrementalParser") .def(py::init<>()) .def("parse", [](IncrementalParser& self, - py::dict& msg, + py::dict& message, std::string& previous_text, std::string& delta_text, const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt) { - auto msg_cpp = pyutils::py_object_to_json_container(msg); + auto msg_cpp = pyutils::py_object_to_json_container(message); auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens); auto json_str = msg_cpp.to_json_string(); @@ -121,10 +122,10 @@ void init_parsers(py::module_& m) { py::dict result = json_mod.attr("loads")(json_str); // update msg with result for (auto item : result) { - msg[item.first] = item.second; + message[item.first] = item.second; } return res; - }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"), + }, py::arg("message"), py::arg("previous_text"), py::arg("delta_text"), py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output."); @@ -144,8 +145,8 @@ void init_parsers(py::module_& m) { py::class_>(m, "Parser") .def(py::init<>()) .def("parse", - [](Parser& self, py::dict& msg) { - auto msg_cpp = pyutils::py_object_to_json_container(msg); + [](Parser& self, py::dict& message) { + auto msg_cpp = pyutils::py_object_to_json_container(message); self.parse(msg_cpp); // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, @@ -157,12 +158,19 @@ void init_parsers(py::module_& m) { // update msg with result for (auto item : result) { - msg[item.first] = item.second; + message[item.first] = item.second; } }, - py::arg("text"), + py::arg("message"), "Parse is called with the full text. Returns a dict with parsed content."); + py::class_, Parser>(m, "ReasoningParser") + .def(py::init(), + py::arg("expect_open_tag") = true, + py::arg("keep_original_content") = true, + py::arg("open_tag") = "", + py::arg("close_tag") = ""); + py::class_, Parser>(m, "Llama3JsonToolParser") .def(py::init<>()); diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index 7660d1625b..7aa4caf6b8 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -62,7 +62,7 @@ TEST(ParserTest, test_reasoning_parser_1) { expected["content"] = std::string(R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"); expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"); - std::shared_ptr parser = std::make_shared( + std::shared_ptr parser = std::make_shared( /*expect_open_tag*/ true, /*keep_original_content*/ false ); @@ -80,7 +80,7 @@ TEST(ParserTest, test_reasoning_parser_2) { expected["content"] = prompt; expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"); - std::shared_ptr parser = std::make_shared( + std::shared_ptr parser = std::make_shared( /*expect_open_tag*/ true, /*keep_original_content*/ true ); From fec89453572730332188893f8b8a34b834501fd4 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 Oct 2025 16:55:57 +0200 Subject: [PATCH 29/43] some corrections --- src/cpp/src/llm/pipeline.cpp | 72 ++++++++++--------- src/cpp/src/parsers.cpp | 4 +- src/cpp/src/text_streamer.cpp | 2 +- .../openvino_genai/py_openvino_genai.pyi | 23 +++--- src/python/py_parsers.cpp | 43 +++-------- src/python/py_streamers.cpp | 52 ++++++-------- tests/python_tests/test_text_streamer.py | 2 +- 7 files changed, 88 insertions(+), 110 deletions(-) diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 513d047bf2..f6f206a6ab 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -15,6 +15,41 @@ #include "speculative_decoding/speculative_decoding_stateful.hpp" #include "utils.hpp" +namespace { + +void run_parsers(ov::genai::DecodedResults& res, const ov::genai::OptionalGenerationConfig& generation_config, const ov::genai::StreamerVariant& streamer) { + // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message + // Streaming is available only for batch size 1 therefore only parsed[0] + if (auto streamer_obj = std::get_if>(&streamer)) { + if (auto parser_streamer = std::dynamic_pointer_cast(*streamer_obj)) { + res.parsed.resize(1); + res.parsed[0] = parser_streamer->get_parsed_message(); + } + } + + if (!generation_config.has_value() || generation_config->parsers.empty()) { + return; + } + + std::vector> parsers = generation_config->parsers; + res.parsed.resize(res.texts.size()); + // Apply Base parsers sequentially even if IncrementalParser has run. + for (size_t i = 0; i < res.texts.size(); ++i) { + auto& msg = res.parsed[i]; + if (!msg.contains("content")) { + // Initialize msg with content + msg["content"] = res.texts[i]; + } + for (auto& parser: parsers) { + // TODO: Check the state of incremental parser and reset if necessary + parser->parse(msg); + } + res.parsed[i] = msg; + } +} + +} + namespace ov { namespace genai { @@ -251,36 +286,7 @@ DecodedResults LLMPipeline::generate( OptionalGenerationConfig generation_config, StreamerVariant streamer) { auto res = m_pimpl->generate(inputs, generation_config, streamer); - - // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message - // Streaming is available only for batch size 1 therefore only parsed[0] - if (auto streamer_obj = std::get_if>(&streamer)) { - if (auto parser_streamer = std::dynamic_pointer_cast(*streamer_obj)) { - res.parsed.resize(1); - res.parsed[0] = parser_streamer->get_parsed_message(); - } - } - - if (!generation_config.has_value() || generation_config->parsers.empty()) { - return res; - } - - std::vector> parsers = (*generation_config).parsers; - res.parsed.resize(res.texts.size()); - // Apply Base parsers sequentially even if IncrementalParser has run. - for (size_t i = 0; i < res.texts.size(); ++i) { - auto& msg = res.parsed[i]; - if (!msg.contains("content")) { - // Initialize msg with content - msg["content"] = res.texts[i]; - } - for (auto& parser: parsers) { - // TODO: Check the state of incremental parser and reset if necessary - parser->parse(msg); - } - res.parsed[i] = msg; - } - + run_parsers(res, generation_config, streamer); return res; } @@ -288,8 +294,10 @@ DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config auto config_arg = utils::get_config_from_map(config_map); GenerationConfig config = (config_arg.has_value()) ? *config_arg : get_generation_config(); config.update_generation_config(config_map); - - return m_pimpl->generate(text, config, utils::get_streamer_from_map(config_map)); + auto streamer = utils::get_streamer_from_map(config_map); + auto res = m_pimpl->generate(text, config, streamer); + run_parsers(res, config_arg, streamer); + return res; } EncodedResults LLMPipeline::generate( diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index a43a328a2f..2ed107260e 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -17,8 +17,8 @@ class ReasoningIncrementalParser::ReasoningParserImpl { std::string m_open_tag; std::string m_close_tag; std::string m_text_cache = ""; -public: bool m_deactivated = false; +public: ReasoningParserImpl() = default; ReasoningParserImpl(bool expect_open_tag, @@ -189,7 +189,7 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...) JsonContainer kv; - // Parse arguments of the form key='value' + // Parse arguments of the form key="value" std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")"); auto it = std::sregex_iterator(args.begin(), args.end(), arg_re); for (; it != std::sregex_iterator(); ++it) { diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index ee48cff1f9..9df53aab71 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -136,7 +136,7 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) { m_parsed_message["content"] = m_parsed_message["content"].get_string() + message; } - m_text_buffer = message; + m_text_buffer += message; return write(m_parsed_message); } diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 25bd1a6b15..273f2d778b 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -1462,7 +1462,7 @@ class ImageGenerationPerfMetrics: class IncrementalParser: def __init__(self) -> None: ... - def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: + def parse(self, message: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: """ Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output. """ @@ -1842,7 +1842,7 @@ class MeanStdPair: class Parser: def __init__(self) -> None: ... - def parse(self, text: dict) -> None: + def parse(self, message: dict) -> None: """ Parse is called with the full text. Returns a dict with parsed content. """ @@ -3390,19 +3390,22 @@ class TextEmbeddingPipeline: Waits computed embeddings for a query """ class TextParserStreamer(TextStreamer): + """ + + Base class for text streamers which works with parsed messages. In order to use inherit from this class and implement write method which takes a dict as input parameter. + + tokenizer: Tokenizer object to decode tokens into text. + parsers: vector of IncrementalParser to process the text stream incrementally. + """ def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[IncrementalParser] = []) -> None: - """ - TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers. - """ - def _write(self, arg0: collections.abc.Sequence[typing.SupportsInt] | str) -> StreamingStatus: ... - def get_parsed_message(self) -> dict: + def _write(self, chunk: collections.abc.Sequence[typing.SupportsInt] | str) -> StreamingStatus: """ - Get the current parsed message + This is a private method is used to call write with integer tokens or text chunks. Is used for text purposes only. """ - def write(self, message: dict) -> StreamingStatus: + def get_parsed_message(self) -> dict: """ - Write is called with a dict. Returns StreamingStatus. + Returns the accumulated message. """ class TextRerankPipeline: """ diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index d55e3d29ee..30faa18388 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -46,28 +46,12 @@ class ConstructableIncrementalParser: public IncrementalParser { py::function parse_method = py::get_override(static_cast(this), "parse"); if (!parse_method) { - throw std::runtime_error("parse method not implemented in Python subclass"); + OPENVINO_THROW("parse method not implemented in Python subclass"); } + + auto res = parse_method(py_msg, previous_text, delta_text, previous_tokens, delta_tokens); + msg = pyutils::py_object_to_json_container(py_msg); - auto res = parse_method( - py_msg, - previous_text, - delta_text, - previous_tokens, - delta_tokens - ); - - // iterate throught py_msg and update msg - auto msg_anymap = pyutils::py_object_to_any_map(py_msg); - for (const auto& [key, value] : msg_anymap) { - if (value.is()) { - msg[key] = value.as(); - } else if (value.is()) { - msg[key] = JsonContainer(value.as()); - } else { - OPENVINO_THROW("Unsupported type in JsonContainer update from Python dict"); - } - } return res.cast(); } }; @@ -79,24 +63,13 @@ class ConstructableParser: public Parser { py::function parse_method = py::get_override(static_cast(this), "parse"); if (!parse_method) { - throw std::runtime_error("parse method not implemented in Python subclass"); + OPENVINO_THROW("parse method not implemented in Python subclass"); } // Convert JsonContainer to py::dict - py::dict py_msg = pyutils::json_container_to_py_object(msg); - parse_method(py_msg); - - // iterate throught py_msg and update msg - auto msg_anymap = pyutils::py_object_to_any_map(py_msg); - for (const auto& [key, value] : msg_anymap) { - if (value.is()) { - msg[key] = value.as(); - } else if (value.is()) { - msg[key] = JsonContainer(value.as()); - } else { - OPENVINO_THROW("Unsupported type in JsonContainer update from Python dict"); - } - } + py::dict py_msg = pyutils::json_container_to_py_object(msg); + parse_method(py_msg); + msg = pyutils::py_object_to_json_container(py_msg); } }; diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index 94fb52355d..ba596ead92 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -39,6 +39,13 @@ callback: User-defined callback function to process the decoded text, callback s detokenization_params: AnyMap with detokenization parameters, e.g. ov::genai::skip_special_tokens(...) )"; +auto text_parser_streamer_docstring = R"( +Base class for text streamers which works with parsed messages. In order to use inherit from this class and implement write method which takes a dict as input parameter. + +tokenizer: Tokenizer object to decode tokens into text. +parsers: vector of IncrementalParser to process the text stream incrementally. +)"; + class ConstructableStreamer: public StreamerBase { OPENVINO_SUPPRESS_DEPRECATED_START bool put(int64_t token) override { @@ -88,15 +95,6 @@ class ConstructableTextParserStreamer: public TextParserStreamer { return res.cast(); } - - StreamingStatus write(py::dict& message) { - PYBIND11_OVERRIDE_PURE( - StreamingStatus, - TextParserStreamer, - "write", - message - ); - } }; } // namespace @@ -145,38 +143,34 @@ void init_streamers(py::module_& m) { py::arg("token")) .def("end", &TextStreamer::end); - // TODO: double check/add more relevant docstrings for TextParserStreamer. - py::class_, TextStreamer>(m, "TextParserStreamer") + py::class_, TextStreamer>(m, "TextParserStreamer", text_parser_streamer_docstring) .def(py::init([](const Tokenizer& tokenizer, std::vector> parsers) { return std::make_shared(tokenizer, parsers); }), py::arg("tokenizer"), py::arg("parsers") = std::vector>(), - py::keep_alive<1, 3>(), - "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.") - .def("write", - [](TextParserStreamer& self, py::dict& message) { - // Downcast to ConstructableTextParserStreamer if needed - auto* derived = dynamic_cast(&self); - if (!derived) { - throw std::runtime_error("write(py::dict&) only available for ConstructableTextParserStreamer"); - } - return derived->write(message); - }, - py::arg("message"), - "Write is called with a dict. Returns StreamingStatus.") - .def("_write", [](TextParserStreamer& self, std::variant, std::string> chunk) -> StreamingStatus { + py::keep_alive<1, 3>()) + + // If we inherit and implement 'write' in Python and try to call write with text chunks or integer tokens + // then Python implementation will be called since python does not have overloads. + // But for texts we need to check that when we call write with strings/integer tokens they are accumulated and stored correctly in py::dict. + // Therefore we provide a private method '_write' which is used to call 'write' with correct parameters from C++ side. + .def("_write", + [](TextParserStreamer& self, std::variant, std::string> chunk) -> StreamingStatus { if (auto _token = std::get_if>(&chunk)) { return self.write(*_token); } else if (auto _str = std::get_if(&chunk)) { auto res = self.write(*_str); return std::get(res); } - }) - .def("get_parsed_message", + return StreamingStatus::RUNNING; + }, + py::arg("chunk"), "This is a private method is used to call write with integer tokens or text chunks. Is used for text purposes only.") + + .def("get_parsed_message", [](TextParserStreamer& self) -> py::dict{ return pyutils::json_container_to_py_object(self.get_parsed_message()); - - }, "Get the current parsed message"); + + }, "Returns the accumulated message."); } diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py index a3ea55d225..75804256b1 100644 --- a/tests/python_tests/test_text_streamer.py +++ b/tests/python_tests/test_text_streamer.py @@ -71,7 +71,7 @@ def test_text_prompts(tmp_path, prompt, model_id): for token in tokens: streamer.write(token) streamer.end() - + assert ''.join(accumulated) == ov_tokenizer.decode(tokens) for chunk_size in [1,2,3,4,5]: From 9fa7d01d1fcb1cf2097f014c90471d13d486c68d Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 Oct 2025 17:25:17 +0200 Subject: [PATCH 30/43] hide TextParsedStreamerImplementation --- .../include/openvino/genai/text_streamer.hpp | 13 ++++---- src/cpp/src/text_streamer.cpp | 33 ++++++++++++++----- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index fabea0f524..53e06b7a93 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -49,18 +49,19 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase { class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer { public: + class TextParserStreamerImpl; using TextStreamer::write; + TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers = {}); - + ~TextParserStreamer(); + virtual StreamingStatus write(JsonContainer& message) = 0; CallbackTypeVariant write(std::string message); - - JsonContainer get_parsed_message() const { return m_parsed_message; } + + JsonContainer get_parsed_message() const; private: - JsonContainer m_parsed_message; - std::string m_text_buffer; - std::vector> m_parsers; + std::unique_ptr m_pimpl; }; } // namespace genai diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index 9df53aab71..9ad9bd9543 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -124,21 +124,38 @@ void TextStreamer::end() { StreamerBase::~StreamerBase() = default; -TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers) - : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant { - return this->write(s); - }), m_parsers{parsers} {} +class TextParserStreamer::TextParserStreamerImpl { +public: -CallbackTypeVariant TextParserStreamer::write(std::string message) { +std::vector> m_parsers; +JsonContainer m_parsed_message; + +TextParserStreamerImpl(std::vector> parsers) : m_parsers{parsers} {} + +void parse(std::string message) { for (auto& parser: m_parsers) { - message = parser->parse(m_parsed_message, m_text_buffer, message); + message = parser->parse(m_parsed_message, message); // Message can be modified inside parser, if parser for example extracted tool calling from message content m_parsed_message["content"] = m_parsed_message["content"].get_string() + message; } +} +}; - m_text_buffer += message; - return write(m_parsed_message); +TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers) + : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant { + return this->write(s); + }), m_pimpl{std::make_unique(parsers)} {} + +CallbackTypeVariant TextParserStreamer::write(std::string message) { + m_pimpl->parse(message); + return write(m_pimpl->m_parsed_message); +} + +JsonContainer TextParserStreamer::get_parsed_message() const { + return m_pimpl->m_parsed_message; } +TextParserStreamer::~TextParserStreamer() = default; + } // namespace genai } // namespace ov From a2307a2a87d0734c1e3d97f2b87b8500c828e342 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 Oct 2025 18:16:07 +0200 Subject: [PATCH 31/43] remove redundant previous_text, previous_tokens, add docstrings --- src/cpp/include/openvino/genai/parsers.hpp | 215 ++++++++++++++++----- src/cpp/src/parsers.cpp | 29 +-- src/python/py_parsers.cpp | 15 +- tests/cpp/parser.cpp | 27 +-- tests/python_tests/test_parsers.py | 13 +- 5 files changed, 179 insertions(+), 120 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index 33657072f2..f952268d97 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -10,93 +10,202 @@ namespace ov { namespace genai { +/** + * @brief Abstract base class for parsers that process complete text content at the end of generation. + */ +class OPENVINO_GENAI_EXPORTS Parser { +public: + Parser() = default; + virtual ~Parser(); + + /** + * @brief Parse complete text content at the end of generate call. + * + * This method processes the entire text content and extracts or modifies + * information as needed. The results are stored in the provided JsonContainer. + * + * @param message JsonContainer containing the text to parse and to store results + */ + virtual void parse(JsonContainer& message) = 0; +}; + +class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser { +public: + /** + * @brief ReasoningParser extracts reasoning content between open and close tags from text. + * Field 'content' should be filled in order to extract reasoning content. + * The reasoning content is stored in the 'reasoning_content' field of the JsonContainer. + * + * @param expect_open_tag If true then open_tag is expected to be generated, if false then it's already part of the model input string + * @param keep_original_content Whether to preserve the original 'content' including reasoning sections + * @param open_tag The opening tag (default: "") + * @param close_tag The closing tag (default: "") + */ + ReasoningParser( + bool expect_open_tag = true, + bool keep_original_content = true, + const std::string& open_tag = "", + const std::string& close_tag = ""); + + /** + * @brief Parse complete text content at the end of generate call. + * + * This method processes the entire text content and extracts or modifies + * information as needed. The results are stored in the provided JsonContainer. + * + * @param message JsonContainer containing the text to parse and to store results + */ + void parse(JsonContainer& message) override; + ~ReasoningParser(); +private: + class ReasoningParserImpl; + std::unique_ptr m_impl; +}; + +/** + * @brief Parser for Llama 3 Pythonic tool calls format. + * + * Llama3PythonicToolParser extracts tool calls from text content formatted + * in Llama 3's Pythonic style, e.g. [get_weather(location='New York, NY', unit='celsius')]. + * It does not modify the original content, + * only extracts and adds tool call information to the message. + */ +class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser { +public: + explicit Llama3PythonicToolParser(); + ~Llama3PythonicToolParser(); + + /** + * @brief Parse Llama 3 Pythonic tool calls from text. + * + * Extracts tool call information from text formatted in Llama 3's Pythonic style + * and adds the 'tool_calls' to the JsonContainer without modifying the original content. + * + * @param message JsonContainer containing the text to parse and to store tool call results + */ + void parse(JsonContainer& message) override; +private: + class Llama3PythonicToolParserImpl; + std::unique_ptr m_impl; +}; + +/** + * @brief Parser for Llama 3 JSON tool calls format. + * + * Llama3JsonToolParser extracts tool calls from text content formatted + * in Llama 3's JSON style, e.g. {"type": "function", "function": {"name": "get_weather", "parameters": {"location": "New York, NY", ...}}}. + * It does not modify the original content, only extracts and adds tool call information to the message. + */ +class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser { +public: + explicit Llama3JsonToolParser(); + ~Llama3JsonToolParser(); + + /** + * @brief Parse Llama 3 JSON tool calls from text. + * + * Extracts tool call information from text formatted in Llama 3's JSON style + * and adds the tool calls to the JsonContainer without modifying the original content. + * + * @param message JsonContainer containing the text to parse and to store tool call results + */ + void parse(JsonContainer& message) override; +private: + class Llama3JsonToolParserImpl; + std::unique_ptr m_impl; +}; + +/** + * @brief Abstract base class for incremental parsers that process text during streaming. + */ class OPENVINO_GENAI_EXPORTS IncrementalParser { public: IncrementalParser() = default; - // We return string which with filtered text to be added to content. + /** + * @brief Parse incremental text content and return filtered text. + * + * This method processes incoming text deltas and returns filtered text that should + * be added to the content. + * + * @param message JsonContainer to store parsed results and metadata + * @param delta_text New text chunk to be processed in this step + * @param delta_tokens Optional vector of new token IDs to be processed in case if more fast token-based processing is needed. + * @return std::string Filtered text that should be added to the content + */ virtual std::string parse( JsonContainer& message, - const std::string& previous_text, - std::string& delta_text, - const std::optional>& previous_tokens = std::nullopt, + std::string& delta_text, // TODO: double check const std::optional>& delta_tokens = std::nullopt ) = 0; virtual ~IncrementalParser() = default; }; +/** + * @brief Incremental parser for reasoning content with configurable tags. + * + * Extracts text with open and close tags. Original JsonContainer must have 'content' field. + * The reasoning content is stored in the 'reasoning_content' field of the JsonContainer. + */ class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalParser { private: class ReasoningParserImpl; std::unique_ptr m_impl; public: + /** + * @brief Constructor for ReasoningIncrementalParser. + * + * @param expect_open_tag If true then open_tag is expected to be generated, if false then it's already part of the model input string + * @param keep_original_content If true then original 'content' is preserved, otherwise reasoning text is removed from 'content' + * @param open_tag The opening tag (default: "") + * @param close_tag The closing tag (default: "") + */ ReasoningIncrementalParser(bool expect_open_tag = true, - bool keep_original_content = true, - const std::string& open_tag = "", + bool keep_original_content = true, + const std::string& open_tag = "", const std::string& close_tag = ""); virtual ~ReasoningIncrementalParser(); + /** + * @brief Parse reasoning content incrementally. + * + * Processes text streams containing reasoning sections marked by configurable tags. + * Can filter out reasoning content or preserve it based on parser configuration. + * + * @param message JsonContainer to store parsed results and reasoning metadata + * @param delta_text New text chunk to be processed in this step + * @param delta_tokens Optional vector of new token IDs to be processed + * @return std::string Filtered text with reasoning content processed according to configuration + */ std::string parse( JsonContainer& message, - const std::string& previous_text, std::string& delta_text, - const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) override; }; +/** + * @brief Specialized incremental parser for DeepSeek R1 model reasoning format. + * + * DeepSeekR1ReasoningIncrementalParser is a specialized version of ReasoningIncrementalParser + * configured specifically for the DeepSeek R1 model's reasoning format, which doesn't expect an opening tag. + */ class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningIncrementalParser : public ReasoningIncrementalParser { public: - explicit DeepSeekR1ReasoningIncrementalParser(bool expect_open_tag = false) : ReasoningIncrementalParser(expect_open_tag) {}; + explicit DeepSeekR1ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/false) {}; }; +/** + * @brief Specialized incremental parser for Phi-4 model reasoning format. + * + * Phi4ReasoningIncrementalParser is a specialized version of ReasoningIncrementalParser + * configured specifically for the Phi-4 model's reasoning format, which typically + * expects an opening tag by default. + */ class OPENVINO_GENAI_EXPORTS Phi4ReasoningIncrementalParser : public ReasoningIncrementalParser { public: - explicit Phi4ReasoningIncrementalParser(bool expect_open_tag = true) : ReasoningIncrementalParser(expect_open_tag) {}; -}; - -class OPENVINO_GENAI_EXPORTS Parser { -public: - Parser() = default; - virtual ~Parser(); - virtual void parse(JsonContainer& text) = 0; -}; - -class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser { -public: - ReasoningParser( - bool expect_open_tag = true, - bool keep_original_content = true, - const std::string& open_tag = "", - const std::string& close_tag = ""); - void parse(JsonContainer& message) override; - ~ReasoningParser(); -private: - class ReasoningParserImpl; - std::unique_ptr m_impl; -}; - -class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser { -// Does not modify original content, only extracts and adds tool calls -public: - explicit Llama3PythonicToolParser(bool keep_original_content = true); - ~Llama3PythonicToolParser(); - void parse(JsonContainer& message) override; -private: - class Llama3PythonicToolParserImpl; - std::unique_ptr m_impl; -}; - -class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser { -// Does not modify original content, only extracts and adds tool calls -public: - explicit Llama3JsonToolParser(bool keep_original_content = true); - ~Llama3JsonToolParser(); - void parse(JsonContainer& message) override; -private: - class Llama3JsonToolParserImpl; - std::unique_ptr m_impl; + explicit Phi4ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/true) {}; }; } // namespace genai diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 2ed107260e..cff484105f 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -32,9 +32,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl { std::string parse( JsonContainer& message, - const std::string& previous_text, std::string& delta_text, - const std::optional>& previous_tokens, const std::optional>& delta_tokens ) { if (m_deactivated) { @@ -156,19 +154,14 @@ ReasoningIncrementalParser::~ReasoningIncrementalParser() = default; std::string ReasoningIncrementalParser::parse( JsonContainer& message, - const std::string& previous_text, std::string& delta_text, - const std::optional>& previous_tokens, const std::optional>& delta_tokens ) { - return m_impl->parse(message, previous_text, delta_text, previous_tokens, delta_tokens); + return m_impl->parse(message, delta_text, delta_tokens); } class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { public: - Llama3PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} - bool m_keep_original_content; - void parse(JsonContainer& message) { // Input example // string message = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; @@ -199,15 +192,11 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { // Split function name and arguments message["tool_calls"] = JsonContainer::array(); message["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}})); - - if (!m_keep_original_content) { - message["content"] = regex_replace(text, r, ""); - } } }; -Llama3PythonicToolParser::Llama3PythonicToolParser(bool keep_original_content) { - m_impl = std::make_unique(keep_original_content); +Llama3PythonicToolParser::Llama3PythonicToolParser() { + m_impl = std::make_unique(); } void Llama3PythonicToolParser::parse(JsonContainer& message) { @@ -217,11 +206,7 @@ void Llama3PythonicToolParser::parse(JsonContainer& message) { Llama3PythonicToolParser::~Llama3PythonicToolParser() = default; class Llama3JsonToolParser::Llama3JsonToolParserImpl { -private: - bool m_keep_original_content; public: - Llama3JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {} - void parse(JsonContainer& message) { // Find JSON in the message std::string msg_content = message["content"].get_string(); @@ -234,15 +219,11 @@ class Llama3JsonToolParser::Llama3JsonToolParserImpl { auto res = JsonContainer::array(); res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1))); message["tool_calls"] = res; - - if (!m_keep_original_content) { - message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1); - } } }; -Llama3JsonToolParser::Llama3JsonToolParser(bool keep_original_content) { - m_impl = std::make_unique(keep_original_content); +Llama3JsonToolParser::Llama3JsonToolParser() { + m_impl = std::make_unique(); } void Llama3JsonToolParser::parse(JsonContainer& message) { diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 30faa18388..7818044e71 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -36,9 +36,7 @@ class ConstructableIncrementalParser: public IncrementalParser { using IncrementalParser::IncrementalParser; std::string parse( JsonContainer& msg, - const std::string& previous_text, std::string& delta_text, - const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt ) override { // Convert JsonContainer to py::dict @@ -49,7 +47,7 @@ class ConstructableIncrementalParser: public IncrementalParser { OPENVINO_THROW("parse method not implemented in Python subclass"); } - auto res = parse_method(py_msg, previous_text, delta_text, previous_tokens, delta_tokens); + auto res = parse_method(py_msg, delta_text, delta_tokens); msg = pyutils::py_object_to_json_container(py_msg); return res.cast(); @@ -81,12 +79,10 @@ void init_parsers(py::module_& m) { .def(py::init<>()) .def("parse", [](IncrementalParser& self, py::dict& message, - std::string& previous_text, std::string& delta_text, - const std::optional>& previous_tokens = std::nullopt, const std::optional>& delta_tokens = std::nullopt) { auto msg_cpp = pyutils::py_object_to_json_container(message); - auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens); + auto res = self.parse(msg_cpp, delta_text, delta_tokens); auto json_str = msg_cpp.to_json_string(); // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, @@ -98,8 +94,7 @@ void init_parsers(py::module_& m) { message[item.first] = item.second; } return res; - }, py::arg("message"), py::arg("previous_text"), py::arg("delta_text"), - py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt, + }, py::arg("message"), py::arg("delta_text"), py::arg("delta_tokens") = std::nullopt, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output."); py::class_, IncrementalParser>(m, "ReasoningIncrementalParser") @@ -110,10 +105,10 @@ void init_parsers(py::module_& m) { py::arg("close_tag") = ""); py::class_, IncrementalParser>(m, "Phi4ReasoningIncrementalParser") - .def(py::init(), py::arg("expect_open_tag") = true); + .def(py::init<>()); py::class_, IncrementalParser>(m, "DeepSeekR1ReasoningIncrementalParser") - .def(py::init(), py::arg("expect_open_tag") = false); + .def(py::init<>()); py::class_>(m, "Parser") .def(py::init<>()) diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index 7aa4caf6b8..ca2b39453b 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -33,28 +33,6 @@ TEST(ParserTest, test_llama3_parser_1) { ASSERT_TRUE(expected == input); } -TEST(ParserTest, test_llama3_parser_2) { - std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)"; - - JsonContainer expected; - expected["content"] = std::string(R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n<|eom_id|>)"); - expected["tool_calls"] = JsonContainer::array(); - expected["tool_calls"].push_back(JsonContainer(ov::AnyMap({ - {"name", "get_weather"}, - {"arguments", ov::AnyMap{ - {"location", "New York, NY"}, - {"unit", "celsius"} - }} - }))); - - std::shared_ptr parser = std::make_shared(/*keep_original_content*/ false); - JsonContainer input; - input["content"] = prompt; - parser->parse(input); - - ASSERT_EQ(input, expected); -} - TEST(ParserTest, test_reasoning_parser_1) { std::string prompt = R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"; @@ -116,9 +94,8 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) { JsonContainer msg; for (int i = 1; i < input_stream.size(); i++) { - std::string previous_text = input_stream[i - 1]; std::string delta_text = input_stream[i]; - delta_text = parser.parse(msg, previous_text, delta_text); + delta_text = parser.parse(msg, delta_text); } ASSERT_EQ(msg["reasoning_content"], ref_res); } @@ -173,9 +150,7 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) { bool main_part_started = false; std::string parse(JsonContainer& msg, - const std::string& previous_text, std::string& delta_text, - const std::optional>& /*previous_tokens*/ = std::nullopt, const std::optional>& /*delta_tokens*/ = std::nullopt) override { // Ensure fields exist (Python test used dict defaults) if (!msg.contains("content")) { diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 2368571f62..e5f33ac955 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -138,7 +138,7 @@ def test_incremental_phi4_reason_parser_nostreamer(answer): stream_string = re.split(r"(\s+)", answer) msg = {} for subword in stream_string: - parser.parse(msg, '', subword) + parser.parse(msg, subword) # When parser is called from streamer, it is expected that content is accumulated inside streamer. # Here we are calling parser manually therefore we need to accumulate content manually. msg['content'] += subword @@ -199,12 +199,9 @@ def test_incremental_deepseek_parser(): think_content = full_str.split("")[0] content = full_str.split("")[1] - extended = stream_string[:] - extended.insert(0, "") - parser = DeepSeekR1ReasoningIncrementalParser() - for (prev_subword, subword) in zip(extended, stream_string): - msg = parser.parse(msg, prev_subword, subword) + for subword in stream_string: + msg = parser.parse(msg, subword) assert msg['reasoning_content'] == think_content assert msg['content'] == content @@ -222,7 +219,7 @@ def test_custom_incremental_parser(hf_ov_genai_models): class CustomParser(IncrementalParser): main_part_started: bool = False - def parse(self, msg: dict, previous_text: str, delta_text: str, prev_tokens = None, delta_tokens = None) -> str: + def parse(self, msg: dict, delta_text: str, delta_tokens = None) -> str: if 'content' not in msg: msg['content'] = '' if 'main_text' not in msg: @@ -311,3 +308,5 @@ def parse(self, msg: dict): assert 'reasoning_content' in res.parsed[0] assert res.parsed[0]['reasoning_content'] != "" assert res.parsed[0]['reasoning_content'] == think_text + +# TODO; add test for reseting incremental parser at generation start From cada0556078defcd8931a22fb30ddeb7965fe78b Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 Oct 2025 20:04:25 +0200 Subject: [PATCH 32/43] add decorator to call/reset parser before/after generate --- src/cpp/src/llm/pipeline.cpp | 44 +++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index f6f206a6ab..0b2a445db4 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -17,22 +17,37 @@ namespace { -void run_parsers(ov::genai::DecodedResults& res, const ov::genai::OptionalGenerationConfig& generation_config, const ov::genai::StreamerVariant& streamer) { +// This is an decorator function that wraps a generation callable to apply parsers and reset them before generation if needed. +ov::genai::DecodedResults run_generate_with_parsers(const ov::genai::OptionalGenerationConfig& generation_config, + const ov::genai::StreamerVariant& streamer, + std::function generate_callable) { + + std::shared_ptr parser_streamer; // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message // Streaming is available only for batch size 1 therefore only parsed[0] if (auto streamer_obj = std::get_if>(&streamer)) { - if (auto parser_streamer = std::dynamic_pointer_cast(*streamer_obj)) { - res.parsed.resize(1); - res.parsed[0] = parser_streamer->get_parsed_message(); - } + parser_streamer = std::dynamic_pointer_cast(*streamer_obj); + } + + // determine from generation config when 'need_to_reset_parser' will be available + bool need_to_reset_parser = true; + if (parser_streamer && need_to_reset_parser) { + parser_streamer->reset(); } + auto res = generate_callable(); + + res.parsed.resize(1); + res.parsed[0] = parser_streamer->get_parsed_message(); + + // If no parsers are defined, return if (!generation_config.has_value() || generation_config->parsers.empty()) { - return; + return res; } std::vector> parsers = generation_config->parsers; res.parsed.resize(res.texts.size()); + // Apply Base parsers sequentially even if IncrementalParser has run. for (size_t i = 0; i < res.texts.size(); ++i) { auto& msg = res.parsed[i]; @@ -40,12 +55,13 @@ void run_parsers(ov::genai::DecodedResults& res, const ov::genai::OptionalGenera // Initialize msg with content msg["content"] = res.texts[i]; } + for (auto& parser: parsers) { - // TODO: Check the state of incremental parser and reset if necessary parser->parse(msg); } res.parsed[i] = msg; } + return res; } } @@ -285,9 +301,10 @@ DecodedResults LLMPipeline::generate( StringInputs inputs, OptionalGenerationConfig generation_config, StreamerVariant streamer) { - auto res = m_pimpl->generate(inputs, generation_config, streamer); - run_parsers(res, generation_config, streamer); - return res; + + return run_generate_with_parsers(generation_config, streamer, [&]() -> DecodedResults { + return m_pimpl->generate(inputs, generation_config, streamer); + }); } DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config_map) { @@ -295,9 +312,10 @@ DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config GenerationConfig config = (config_arg.has_value()) ? *config_arg : get_generation_config(); config.update_generation_config(config_map); auto streamer = utils::get_streamer_from_map(config_map); - auto res = m_pimpl->generate(text, config, streamer); - run_parsers(res, config_arg, streamer); - return res; + + return run_generate_with_parsers(config_arg, streamer, [&]() -> DecodedResults { + return m_pimpl->generate(text, config, streamer); + }); } EncodedResults LLMPipeline::generate( From b18ee75b125ed767fce3064848b7fda621313b0f Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 22 Oct 2025 20:04:36 +0200 Subject: [PATCH 33/43] add reset() method --- src/cpp/include/openvino/genai/parsers.hpp | 18 ++- .../include/openvino/genai/text_streamer.hpp | 2 + src/cpp/src/llm/pipeline.cpp | 8 +- src/cpp/src/parsers.cpp | 17 ++- src/cpp/src/text_streamer.cpp | 7 ++ src/python/py_parsers.cpp | 110 ++++++++++-------- tests/cpp/parser.cpp | 4 + 7 files changed, 106 insertions(+), 60 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index f952268d97..c4163f8201 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -135,10 +135,15 @@ class OPENVINO_GENAI_EXPORTS IncrementalParser { */ virtual std::string parse( JsonContainer& message, - std::string& delta_text, // TODO: double check + std::string& delta_text, const std::optional>& delta_tokens = std::nullopt ) = 0; + /** + * @brief Reset the internal state of the parser. + */ + virtual void reset() = 0; + virtual ~IncrementalParser() = default; }; @@ -149,9 +154,6 @@ class OPENVINO_GENAI_EXPORTS IncrementalParser { * The reasoning content is stored in the 'reasoning_content' field of the JsonContainer. */ class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalParser { -private: - class ReasoningParserImpl; - std::unique_ptr m_impl; public: /** * @brief Constructor for ReasoningIncrementalParser. @@ -183,6 +185,14 @@ class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalPars std::string& delta_text, const std::optional>& delta_tokens = std::nullopt ) override; + + /** + * @brief Reset the internal state of the parser. + */ + void reset() override; +private: + class ReasoningParserImpl; + std::unique_ptr m_impl; }; /** diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 53e06b7a93..06f400a88e 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -60,6 +60,8 @@ class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer { CallbackTypeVariant write(std::string message); JsonContainer get_parsed_message() const; + + void reset(); private: std::unique_ptr m_pimpl; }; diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 0b2a445db4..7cee7d9ac3 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -36,9 +36,11 @@ ov::genai::DecodedResults run_generate_with_parsers(const ov::genai::OptionalGen } auto res = generate_callable(); - - res.parsed.resize(1); - res.parsed[0] = parser_streamer->get_parsed_message(); + + if (parser_streamer) { + res.parsed.resize(1); + res.parsed[0] = parser_streamer->get_parsed_message(); + } // If no parsers are defined, return if (!generation_config.has_value() || generation_config->parsers.empty()) { diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index cff484105f..b5b08b2f50 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -10,12 +10,14 @@ namespace ov::genai { class ReasoningIncrementalParser::ReasoningParserImpl { private: + // Values initialized from constructor don't need default member initializer. bool m_expect_open_tag; - bool m_first_run = true; bool m_keep_original_content; - bool m_think_tag_opened = false; std::string m_open_tag; std::string m_close_tag; + // Values with default member initializers are reset on each reset() call. + bool m_first_run = true; + bool m_think_tag_opened = false; std::string m_text_cache = ""; bool m_deactivated = false; public: @@ -144,6 +146,13 @@ class ReasoningIncrementalParser::ReasoningParserImpl { return delta_text; } + + void reset() { + m_first_run = true; + m_think_tag_opened = false; + m_text_cache = ""; + m_deactivated = false; + } }; ReasoningIncrementalParser::ReasoningIncrementalParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) { @@ -160,6 +169,10 @@ std::string ReasoningIncrementalParser::parse( return m_impl->parse(message, delta_text, delta_tokens); } +void ReasoningIncrementalParser::reset() { + m_impl->reset(); +} + class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { public: void parse(JsonContainer& message) { diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index 9ad9bd9543..fa48f1a33e 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -155,6 +155,13 @@ JsonContainer TextParserStreamer::get_parsed_message() const { return m_pimpl->m_parsed_message; } +void TextParserStreamer::reset() { + m_pimpl->m_parsed_message = JsonContainer(); + for (auto& parser : m_pimpl->m_parsers) { + parser->reset(); + } +} + TextParserStreamer::~TextParserStreamer() = default; } // namespace genai diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 7818044e71..b92454855c 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -29,6 +29,23 @@ namespace pyutils = ov::genai::pybind::utils; namespace { +class ConstructableParser: public Parser { +public: + void parse(JsonContainer& msg) override { + py::gil_scoped_acquire acquire; + + py::function parse_method = py::get_override(static_cast(this), "parse"); + if (!parse_method) { + OPENVINO_THROW("parse method not implemented in Python subclass"); + } + + // Convert JsonContainer to py::dict + py::dict py_msg = pyutils::json_container_to_py_object(msg); + parse_method(py_msg); + msg = pyutils::py_object_to_json_container(py_msg); + } +}; + // ConstructableIncremental and ConstructableBase are used when python overload is called from C++ // and we need to convert JsonContainer to py::dict and then update back JsonContainer from the py::dict which was modified in Python. class ConstructableIncrementalParser: public IncrementalParser { @@ -52,64 +69,19 @@ class ConstructableIncrementalParser: public IncrementalParser { return res.cast(); } -}; -class ConstructableParser: public Parser { -public: - void parse(JsonContainer& msg) override { - py::gil_scoped_acquire acquire; - - py::function parse_method = py::get_override(static_cast(this), "parse"); - if (!parse_method) { - OPENVINO_THROW("parse method not implemented in Python subclass"); - } - - // Convert JsonContainer to py::dict - py::dict py_msg = pyutils::json_container_to_py_object(msg); - parse_method(py_msg); - msg = pyutils::py_object_to_json_container(py_msg); + void reset() override { + PYBIND11_OVERLOAD_PURE( + void, + IncrementalParser, + reset, + ); } }; } // namespace -// TODO: double check/add more relevant docstrings for parsers. void init_parsers(py::module_& m) { - py::class_>(m, "IncrementalParser") - .def(py::init<>()) - .def("parse", [](IncrementalParser& self, - py::dict& message, - std::string& delta_text, - const std::optional>& delta_tokens = std::nullopt) { - auto msg_cpp = pyutils::py_object_to_json_container(message); - auto res = self.parse(msg_cpp, delta_text, delta_tokens); - auto json_str = msg_cpp.to_json_string(); - - // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, - // since it create a new object instead of updating existing dict. - py::object json_mod = py::module_::import("json"); - py::dict result = json_mod.attr("loads")(json_str); - // update msg with result - for (auto item : result) { - message[item.first] = item.second; - } - return res; - }, py::arg("message"), py::arg("delta_text"), py::arg("delta_tokens") = std::nullopt, - "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output."); - - py::class_, IncrementalParser>(m, "ReasoningIncrementalParser") - .def(py::init(), - py::arg("expect_open_tag") = true, - py::arg("keep_original_content") = true, - py::arg("open_tag") = "", - py::arg("close_tag") = ""); - - py::class_, IncrementalParser>(m, "Phi4ReasoningIncrementalParser") - .def(py::init<>()); - - py::class_, IncrementalParser>(m, "DeepSeekR1ReasoningIncrementalParser") - .def(py::init<>()); - py::class_>(m, "Parser") .def(py::init<>()) .def("parse", @@ -144,4 +116,40 @@ void init_parsers(py::module_& m) { py::class_, Parser>(m, "Llama3PythonicToolParser") .def(py::init<>()); + + py::class_>(m, "IncrementalParser") + .def(py::init<>()) + .def("parse", [](IncrementalParser& self, + py::dict& message, + std::string& delta_text, + const std::optional>& delta_tokens = std::nullopt) { + auto msg_cpp = pyutils::py_object_to_json_container(message); + auto res = self.parse(msg_cpp, delta_text, delta_tokens); + auto json_str = msg_cpp.to_json_string(); + + // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, + // since it create a new object instead of updating existing dict. + py::object json_mod = py::module_::import("json"); + py::dict result = json_mod.attr("loads")(json_str); + // update msg with result + for (auto item : result) { + message[item.first] = item.second; + } + return res; + }, py::arg("message"), py::arg("delta_text"), py::arg("delta_tokens") = std::nullopt, + "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.") + .def("reset", &IncrementalParser::reset, "Reset the internal state of the parser."); + + py::class_, IncrementalParser>(m, "ReasoningIncrementalParser") + .def(py::init(), + py::arg("expect_open_tag") = true, + py::arg("keep_original_content") = true, + py::arg("open_tag") = "", + py::arg("close_tag") = ""); + + py::class_, IncrementalParser>(m, "Phi4ReasoningIncrementalParser") + .def(py::init<>()); + + py::class_, IncrementalParser>(m, "DeepSeekR1ReasoningIncrementalParser") + .def(py::init<>()); } diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index ca2b39453b..732f7cbe37 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -177,6 +177,10 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) { return delta_text; } + void reset() override { + main_part_started = false; + } + // Virtual dtor for safety ~CustomParser() override = default; }; From 79d299e78d277d00f4b5888959bde50ef9dac5c6 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 23 Oct 2025 00:28:02 +0200 Subject: [PATCH 34/43] put delta_tokens to parser as well --- .../include/openvino/genai/text_streamer.hpp | 4 +- src/cpp/src/text_streamer.cpp | 62 +++++++++++++++---- .../openvino_genai/py_openvino_genai.pyi | 10 ++- tests/cpp/parser.cpp | 10 +-- tests/python_tests/test_parsers.py | 57 ++++++++++++++++- 5 files changed, 118 insertions(+), 25 deletions(-) diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp index 06f400a88e..403c63ef7d 100644 --- a/src/cpp/include/openvino/genai/text_streamer.hpp +++ b/src/cpp/include/openvino/genai/text_streamer.hpp @@ -29,7 +29,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase { TextStreamer(const Tokenizer& tokenizer, std::function callback, const ov::AnyMap& detokenization_params = {}); -private: +protected: Tokenizer m_tokenizer; std::vector m_tokens_cache; std::vector m_decoded_lengths; @@ -58,7 +58,7 @@ class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer { virtual StreamingStatus write(JsonContainer& message) = 0; CallbackTypeVariant write(std::string message); - + JsonContainer get_parsed_message() const; void reset(); diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index fa48f1a33e..fca30b7aea 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -9,6 +9,9 @@ bool is_incomplete(std::string& text) { constexpr char replacement[] = "\xef\xbf\xbd"; return text.size() >= 3 && text.compare(text.size() - 3, 3, replacement) == 0; } + +constexpr size_t delay_n_tokens = 3; + } // namespace namespace ov { @@ -31,10 +34,13 @@ StreamingStatus TextStreamer::write(int64_t token) { if (!text.empty() && '\n' == text.back() && text.size() > m_printed_len) { // Flush the cache after the new line symbol res << std::string_view{text.data() + m_printed_len, text.size() - m_printed_len}; + // Get the list of tokens decoded for this chunk or rest of text. + + auto res_status = run_callback_if_needed(res.str()); m_tokens_cache.clear(); m_decoded_lengths.clear(); m_printed_len = 0; - return run_callback_if_needed(res.str()); + return res_status; } if (is_incomplete(text)) { @@ -42,7 +48,7 @@ StreamingStatus TextStreamer::write(int64_t token) { // Don't print incomplete text return run_callback_if_needed(res.str()); } - constexpr size_t delay_n_tokens = 3; + // In some cases adding the next token can shorten the text, // e.g. when apostrophe removing regex had worked after adding new tokens. // Printing several last tokens is delayed. @@ -58,10 +64,14 @@ StreamingStatus TextStreamer::write(int64_t token) { // It is possible to have a shorter text after adding new token. // Print to output only if text length is increaesed. res << std::string_view{text.data() + m_printed_len, print_until - m_printed_len} << std::flush; + } + + auto status = run_callback_if_needed(res.str()); + + if (print_until > -1 && print_until > m_printed_len) { m_printed_len = print_until; } - - return run_callback_if_needed(res.str()); + return status; } void TextStreamer::compute_decoded_length_for_position(size_t cache_position) { @@ -124,6 +134,7 @@ void TextStreamer::end() { StreamerBase::~StreamerBase() = default; +// Is used to hide internal states of TextParserStreamer class TextParserStreamer::TextParserStreamerImpl { public: @@ -131,14 +142,6 @@ std::vector> m_parsers; JsonContainer m_parsed_message; TextParserStreamerImpl(std::vector> parsers) : m_parsers{parsers} {} - -void parse(std::string message) { - for (auto& parser: m_parsers) { - message = parser->parse(m_parsed_message, message); - // Message can be modified inside parser, if parser for example extracted tool calling from message content - m_parsed_message["content"] = m_parsed_message["content"].get_string() + message; - } -} }; TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector> parsers) @@ -147,7 +150,40 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector(parsers)} {} CallbackTypeVariant TextParserStreamer::write(std::string message) { - m_pimpl->parse(message); + // When 'write' is called with string, it means new chunck of tokens is decoded into text + + auto flushed_tokens = std::vector(); + if (message.back() == '\n') { + // Flush all tokens // TODO: m_decoded_lengths[m_decoded_lengths.size() - 1] = -1; + flushed_tokens.assign(m_tokens_cache.begin(), m_tokens_cache.end()); + } else if (m_decoded_lengths.size() >= delay_n_tokens) { + // prompt = "I was waiting for the bus.\n" + // tokens = [2,2, 3, 45, 67, 89,4,2] + // decoded_lengths = [1,5, 13, 17, 21, 25,26,27] + // let printed_len = 13 (after "I was waiting") + // then delta_text = "for the bus.\n" + // delta_tokens = [45, 67, 89,4,2] + // delta_tokens = m_tokens_cache[4..end] + + // Find where the last printed tokens are located based on m_printed_len and print_until + auto print_until = m_decoded_lengths[m_decoded_lengths.size() - delay_n_tokens]; + auto first = std::upper_bound(m_decoded_lengths.begin(), m_decoded_lengths.end(), static_cast(m_printed_len)) + - m_decoded_lengths.begin(); + auto last = std::upper_bound(m_decoded_lengths.begin(), m_decoded_lengths.end(), static_cast(print_until)) + - m_decoded_lengths.begin(); + + // Before calling base write from TextStreamer save the current token. + if (last >= first) { + flushed_tokens.assign(m_tokens_cache.begin() + first, m_tokens_cache.begin() + last); + } + } + + // Iterate over all parsers and apply them to the message + for (auto& parser: m_pimpl->m_parsers) { + message = parser->parse(m_pimpl->m_parsed_message, message, flushed_tokens); + // Message can be modified inside parser, if parser for example extracted tool calling from message content + m_pimpl->m_parsed_message["content"] = m_pimpl->m_parsed_message["content"].get_string() + message; + } return write(m_pimpl->m_parsed_message); } diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 273f2d778b..2cfa83a583 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -578,7 +578,7 @@ class DecodedResults: def texts(self) -> list[str]: ... class DeepSeekR1ReasoningIncrementalParser(IncrementalParser): - def __init__(self, expect_open_tag: bool = False) -> None: + def __init__(self) -> None: ... class EncodedGenerationResult: """ @@ -1462,10 +1462,14 @@ class ImageGenerationPerfMetrics: class IncrementalParser: def __init__(self) -> None: ... - def parse(self, message: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: + def parse(self, message: dict, delta_text: str, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str: """ Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output. """ + def reset(self) -> None: + """ + Reset the internal state of the parser. + """ class InpaintingPipeline: """ This class is used for generation with inpainting models. @@ -1949,7 +1953,7 @@ class PerfMetrics: def raw_metrics(self) -> RawPerfMetrics: ... class Phi4ReasoningIncrementalParser(IncrementalParser): - def __init__(self, expect_open_tag: bool = True) -> None: + def __init__(self) -> None: ... class PipelineMetrics: """ diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp index 732f7cbe37..e4db4da3f2 100644 --- a/tests/cpp/parser.cpp +++ b/tests/cpp/parser.cpp @@ -37,8 +37,8 @@ TEST(ParserTest, test_reasoning_parser_1) { std::string prompt = R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"; JsonContainer expected; - expected["content"] = std::string(R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"); - expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"); + expected["content"] = R"("<|begin▁of▁sentence|><|begin▁of▁sentence|><|User|>What is 2 + 1?<|Assistant|>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )"; + expected["reasoning_content"] = R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"; std::shared_ptr parser = std::make_shared( /*expect_open_tag*/ true, @@ -56,7 +56,7 @@ TEST(ParserTest, test_reasoning_parser_2) { JsonContainer expected; expected["content"] = prompt; - expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"); + expected["reasoning_content"] = R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)"; std::shared_ptr parser = std::make_shared( /*expect_open_tag*/ true, @@ -155,10 +155,10 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) { // Ensure fields exist (Python test used dict defaults) if (!msg.contains("content")) { msg.to_empty_object(); - msg["content"] = std::string{}; + msg["content"] = ""; } if (!msg.contains("reasoning_content")) { - msg["reasoning_content"] = std::string{}; + msg["reasoning_content"] = ""; } if (!main_part_started && delta_text == "") { diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index e5f33ac955..53b79a027e 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -80,9 +80,9 @@ def write(self, message): msg = {} answer = "\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}." - encoded_tokens = genai_tokenizer.encode(answer).input_ids.data.tolist() + encoded_tokens = genai_tokenizer.encode(answer).input_ids.data.tolist()[0] for token in encoded_tokens: - streamer._write(token) + streamer._write([token]) streamer.end() think_content = answer.split("")[0].replace("", "") @@ -92,6 +92,59 @@ def write(self, message): assert msg['content'] == content +@pytest.mark.precommit +@pytest.mark.parametrize( + "hf_ov_genai_models", + ["katuni4ka/tiny-random-phi3"], # this tokenizer is used as a stub only + indirect=True +) +def test_incremental_integer_token_ids(hf_ov_genai_models): + hf_tokenizer, genai_tokenizer = hf_ov_genai_models + + class CustomIncrementalParser(IncrementalParser): + started_reasoning: bool = False + + def parse(self, msg: dict, delta_text: str, delta_tokens = None) -> str: + if 'content' not in msg: + msg['content'] = '' + if 'reasoning_content' not in msg: + msg['reasoning_content'] = '' + + if 1 in delta_tokens and not self.started_reasoning: + self.started_reasoning = True + msg['reasoning_content'] += delta_text + delta_text = '' + elif 1 in delta_tokens and self.started_reasoning: + self.started_reasoning = False + delta_text = '' + elif self.started_reasoning: + msg['reasoning_content'] += delta_text + delta_text = '' + + # # Here we are only collecting ordinary text, therefore leave delta_text unchanged. + # # msg['content'] += delta_text will happen under the hood + return delta_text + + class CustomStreamer(TextParserStreamer): + def write(self, message): + msg.update(message) + return StreamingStatus.RUNNING + streamer = CustomStreamer(genai_tokenizer, parsers=[CustomIncrementalParser()]) + + msg = {} + # All closing tags , <|/inst|>, <|endoftext|>, ent. in tiny-random-phi3 add strange \x0c\x0c characters + # so we avoid them in this test. + answer = "\nOkay, the user is asking for the answer to 2 + 1.The answer to 2 + 1 is 3." + encoded_tokens = genai_tokenizer.encode(answer, add_special_tokens=False).input_ids.data.tolist()[0] + + for token in encoded_tokens: + streamer._write([token]) + streamer.end() + + assert msg['reasoning_content'] == "\nOkay, the user is asking for the answer to 2 + 1" + assert msg['content'] == " The answer to 2 + 1 is 3." + + @pytest.mark.precommit @pytest.mark.parametrize( "hf_ov_genai_models", From 775cf13d17116fb847490df785b4f54bc712477b Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 23 Oct 2025 01:14:42 +0200 Subject: [PATCH 35/43] store compiled pattern as a member --- src/cpp/src/parsers.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index b5b08b2f50..ad8a9a1cc1 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -175,6 +175,7 @@ void ReasoningIncrementalParser::reset() { class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { public: + std::regex m_pattern = std::regex(R"(\[(.*?)\])"); void parse(JsonContainer& message) { // Input example // string message = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>"; @@ -182,8 +183,8 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl { // Regex to capture the [...] part std::smatch m; const std::string& text = message["content"].get_string(); - std::regex r(R"(\[.*?\])"); - if (!std::regex_search(text, m, r)) { + + if (!std::regex_search(text, m, m_pattern)) { return; } From d4bf6bee4c431ce3fe72c31ac516325e87eb19a6 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 23 Oct 2025 08:48:55 +0200 Subject: [PATCH 36/43] call parsers for ChatHistory as well; update caches --- .github/workflows/linux.yml | 2 +- .github/workflows/mac.yml | 2 +- .github/workflows/windows.yml | 2 +- src/cpp/src/llm/pipeline.cpp | 9 +++++++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 09b39d4b42..44a439493c 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -25,7 +25,7 @@ env: SCCACHE_CACHE_SIZE: 30G SCCACHE_AZURE_KEY_PREFIX: genai/ubuntu/22_04/x64 HF_HOME: /mount/caches/huggingface/lin - OV_CACHE: /mount/caches/huggingface/.ov_cache/lin + OV_CACHE: /mount/caches/huggingface/.ov_cache/lin/775cf1/ OPENVINO_LOG_LEVEL: 4 GENAI_ARCHIVE_NAME: genai.tar.gz GENAI_SAMPLES_NAME: genai_samples.tar.gz diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 249519057d..fc4b9261aa 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -22,7 +22,7 @@ env: BASE_PRODUCT_TYPE: public_macos_arm64 CCACHE_MAXSIZE: 500Mi HF_HOME: ~/.cache/hf - OV_CACHE: ~/.cache/ov_cache/194c936 + OV_CACHE: ~/.cache/ov_cache/775cf1/ CLEANUP_CACHE: 1 OPENVINO_LOG_LEVEL: 4 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 0d646578bc..5502fec5c0 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -23,7 +23,7 @@ env: CMAKE_C_COMPILER_LAUNCHER: ccache CCACHE_MAXSIZE: 500Mi HF_HOME: C:/mount/caches/huggingface/win - OV_CACHE: C:/mount/caches/huggingface/.ov_cache/win/ + OV_CACHE: C:/mount/caches/huggingface/.ov_cache/win/775cf1/ OPENVINO_LOG_LEVEL: 2 # Windows fails with out of memory because of too verbose logging ARTIFACTS_SHARE: '/mount/build-artifacts' BASE_PRODUCT_TYPE: public_windows_vs2022 diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index 2e136e0897..fe1222f779 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -324,15 +324,20 @@ DecodedResults LLMPipeline::generate( const ChatHistory& history, OptionalGenerationConfig generation_config, StreamerVariant streamer) { - return m_pimpl->generate(history, generation_config, streamer); + return run_generate_with_parsers(generation_config, streamer, [&]() -> DecodedResults { + return m_pimpl->generate(history, generation_config, streamer); + }); } DecodedResults LLMPipeline::generate(const ChatHistory& history, const ov::AnyMap& config_map) { auto config_arg = utils::get_config_from_map(config_map); GenerationConfig config = config_arg.value_or(get_generation_config()); config.update_generation_config(config_map); + auto streamer = utils::get_streamer_from_map(config_map); - return m_pimpl->generate(history, config, utils::get_streamer_from_map(config_map)); + return run_generate_with_parsers(config, streamer, [&]() -> DecodedResults { + return m_pimpl->generate(history, config, streamer); + }); } EncodedResults LLMPipeline::generate( From 8ddc2c42f46cc2ee5596b5f1a7df96bed10dd6ad Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 23 Oct 2025 09:30:28 +0200 Subject: [PATCH 37/43] use json_container_to_py_object when `parse()` is called from Python as well --- src/python/py_parsers.cpp | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index b92454855c..0352a36554 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -88,18 +88,8 @@ void init_parsers(py::module_& m) { [](Parser& self, py::dict& message) { auto msg_cpp = pyutils::py_object_to_json_container(message); self.parse(msg_cpp); - - // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, - py::object json_mod = py::module_::import("json"); - - // since it create a new object instead of updating existing dict. - auto json_str = msg_cpp.to_json_string(); - py::dict result = json_mod.attr("loads")(json_str); - - // update msg with result - for (auto item : result) { - message[item.first] = item.second; - } + py::dict result = pyutils::json_container_to_py_object(msg_cpp); + message.attr("update")(result); }, py::arg("message"), "Parse is called with the full text. Returns a dict with parsed content."); @@ -125,16 +115,8 @@ void init_parsers(py::module_& m) { const std::optional>& delta_tokens = std::nullopt) { auto msg_cpp = pyutils::py_object_to_json_container(message); auto res = self.parse(msg_cpp, delta_text, delta_tokens); - auto json_str = msg_cpp.to_json_string(); - - // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here, - // since it create a new object instead of updating existing dict. - py::object json_mod = py::module_::import("json"); - py::dict result = json_mod.attr("loads")(json_str); - // update msg with result - for (auto item : result) { - message[item.first] = item.second; - } + auto result = pyutils::json_container_to_py_object(msg_cpp); + message.attr("update")(result); return res; }, py::arg("message"), py::arg("delta_text"), py::arg("delta_tokens") = std::nullopt, "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.") From d82c66c6352ff1048cd1ba15351b9b192b3151b5 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 24 Oct 2025 13:21:50 +0200 Subject: [PATCH 38/43] add reset() tests, corrected pybinding, added Phi4, DeepSeek predefined static parsers --- src/cpp/include/openvino/genai/parsers.hpp | 39 ++++++++--- src/python/openvino_genai/__init__.py | 4 +- src/python/openvino_genai/__init__.pyi | 4 +- .../openvino_genai/py_openvino_genai.pyi | 12 +++- src/python/py_parsers.cpp | 10 ++- src/python/py_streamers.cpp | 9 ++- src/python/py_utils.cpp | 11 +++ tests/python_tests/test_parsers.py | 68 +++++++++++++++---- 8 files changed, 127 insertions(+), 30 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index c4163f8201..d72f3b8d62 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -62,6 +62,26 @@ class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser { std::unique_ptr m_impl; }; +/** + * @brief Parser for DeepSeek R1 model reasoning format. + * + * DeepSeekR1ReasoningParser is configured for the DeepSeek R1 model's reasoning format, which doesn't expect an opening tag. + */ +class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningParser : public ReasoningParser { +public: + DeepSeekR1ReasoningParser() : ReasoningParser(/*expect_open_tag=*/false) {}; +}; + +/** + * @brief Parser for Phi-4 model reasoning format. + * + * Phi4ReasoningParser is configured specifically for the Phi-4 model's reasoning format, which expects an opening tag by default. + */ +class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser { +public: + Phi4ReasoningParser() : ReasoningParser(/*expect_open_tag=*/true) {}; +}; + /** * @brief Parser for Llama 3 Pythonic tool calls format. * @@ -72,7 +92,7 @@ class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser { */ class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser { public: - explicit Llama3PythonicToolParser(); + Llama3PythonicToolParser(); ~Llama3PythonicToolParser(); /** @@ -98,7 +118,7 @@ class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser { */ class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser { public: - explicit Llama3JsonToolParser(); + Llama3JsonToolParser(); ~Llama3JsonToolParser(); /** @@ -196,26 +216,23 @@ class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalPars }; /** - * @brief Specialized incremental parser for DeepSeek R1 model reasoning format. + * @brief Incremental parser for DeepSeek R1 model reasoning format. * - * DeepSeekR1ReasoningIncrementalParser is a specialized version of ReasoningIncrementalParser - * configured specifically for the DeepSeek R1 model's reasoning format, which doesn't expect an opening tag. + * DeepSeekR1ReasoningIncrementalParser is configured for the DeepSeek R1 model's reasoning format, which doesn't expect an opening tag. */ class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningIncrementalParser : public ReasoningIncrementalParser { public: - explicit DeepSeekR1ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/false) {}; + DeepSeekR1ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/false) {}; }; /** - * @brief Specialized incremental parser for Phi-4 model reasoning format. + * @brief Incremental parser for Phi-4 model reasoning format. * - * Phi4ReasoningIncrementalParser is a specialized version of ReasoningIncrementalParser - * configured specifically for the Phi-4 model's reasoning format, which typically - * expects an opening tag by default. + * Phi4ReasoningIncrementalParser is configured specifically for the Phi-4 model's reasoning format, which expects an opening tag by default. */ class OPENVINO_GENAI_EXPORTS Phi4ReasoningIncrementalParser : public ReasoningIncrementalParser { public: - explicit Phi4ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/true) {}; + Phi4ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/true) {}; }; } // namespace genai diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index 5673b941cb..b10aadd062 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -25,12 +25,14 @@ from .py_openvino_genai import ( Parser, ReasoningParser, + DeepSeekR1ReasoningParser, + Phi4ReasoningParser, Llama3JsonToolParser, Llama3PythonicToolParser, IncrementalParser, ReasoningIncrementalParser, - Phi4ReasoningIncrementalParser, DeepSeekR1ReasoningIncrementalParser, + Phi4ReasoningIncrementalParser, ) __version__ = get_version() diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index f92c55f4fd..c1d1f1dc30 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -16,6 +16,7 @@ from openvino_genai.py_openvino_genai import ContinuousBatchingPipeline from openvino_genai.py_openvino_genai import CppStdGenerator from openvino_genai.py_openvino_genai import DecodedResults from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningIncrementalParser +from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningParser from openvino_genai.py_openvino_genai import EncodedResults from openvino_genai.py_openvino_genai import FluxTransformer2DModel from openvino_genai.py_openvino_genai import GenerationConfig @@ -36,6 +37,7 @@ from openvino_genai.py_openvino_genai import Llama3PythonicToolParser from openvino_genai.py_openvino_genai import Parser from openvino_genai.py_openvino_genai import PerfMetrics from openvino_genai.py_openvino_genai import Phi4ReasoningIncrementalParser +from openvino_genai.py_openvino_genai import Phi4ReasoningParser from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import RawPerfMetrics from openvino_genai.py_openvino_genai import ReasoningIncrementalParser @@ -74,5 +76,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index c1c8fd612f..0444146334 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import collections.abc import openvino._pyopenvino import typing -__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -585,6 +585,9 @@ class DecodedResults: class DeepSeekR1ReasoningIncrementalParser(IncrementalParser): def __init__(self) -> None: ... +class DeepSeekR1ReasoningParser(ReasoningParser): + def __init__(self) -> None: + ... class EncodedGenerationResult: """ @@ -1962,6 +1965,9 @@ class PerfMetrics: class Phi4ReasoningIncrementalParser(IncrementalParser): def __init__(self) -> None: ... +class Phi4ReasoningParser(ReasoningParser): + def __init__(self) -> None: + ... class PipelineMetrics: """ @@ -3422,6 +3428,10 @@ class TextParserStreamer(TextStreamer): """ Returns the accumulated message. """ + def reset(self) -> None: + """ + Resets the internal state of the parser streamer. + """ class TextRerankPipeline: """ Text rerank pipeline diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp index 0352a36554..9eae3c5e93 100644 --- a/src/python/py_parsers.cpp +++ b/src/python/py_parsers.cpp @@ -17,8 +17,10 @@ using ov::genai::IncrementalParser; using ov::genai::Parser; using ov::genai::ReasoningParser; using ov::genai::ReasoningIncrementalParser; -using ov::genai::Phi4ReasoningIncrementalParser; +using ov::genai::DeepSeekR1ReasoningParser; using ov::genai::DeepSeekR1ReasoningIncrementalParser; +using ov::genai::Phi4ReasoningIncrementalParser; +using ov::genai::Phi4ReasoningParser; using ov::genai::JsonContainer; using ov::genai::Llama3JsonToolParser; using ov::genai::Llama3PythonicToolParser; @@ -101,6 +103,12 @@ void init_parsers(py::module_& m) { py::arg("open_tag") = "", py::arg("close_tag") = ""); + py::class_, ReasoningParser>(m, "DeepSeekR1ReasoningParser") + .def(py::init<>()); + + py::class_, ReasoningParser>(m, "Phi4ReasoningParser") + .def(py::init<>()); + py::class_, Parser>(m, "Llama3JsonToolParser") .def(py::init<>()); diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index ba596ead92..f2df853e8b 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -85,8 +85,9 @@ class ConstructableTextParserStreamer: public TextParserStreamer { StreamingStatus write(JsonContainer& message) override { // Since c++ calls function with JsonContainer while python override expects py::dict, // this function is a wrapper to call Python implementation of 'write' with py::dict - py::dict message_py; - message_py = pyutils::json_container_to_py_object(message); + py::gil_scoped_acquire acquire; + + py::dict message_py = pyutils::json_container_to_py_object(message); // Call python implementation which accepts py::dict instead of JsonContainer // And convert back the resulting message back to JsonContainer @@ -172,5 +173,7 @@ void init_streamers(py::module_& m) { [](TextParserStreamer& self) -> py::dict{ return pyutils::json_container_to_py_object(self.get_parsed_message()); - }, "Returns the accumulated message."); + }, "Returns the accumulated message.") + + .def("reset", &TextParserStreamer::reset, "Resets the internal state of the parser streamer."); } diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp index bee695ac0b..ffb8f7849f 100644 --- a/src/python/py_utils.cpp +++ b/src/python/py_utils.cpp @@ -149,6 +149,17 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) { } } return structural_tags; + } else if (property_name == "parsers") { + auto property_list = py_obj.cast(); + std::vector> parsers; + for (const auto& item : property_list) { + if (py::isinstance(item)) { + parsers.push_back(item.cast>()); + } else { + OPENVINO_THROW("Incorrect value in \"", property_name, "\". Expected Parser."); + } + } + return parsers; } else { auto _list = py_obj.cast(); enum class PY_TYPE : int { UNKNOWN = 0, STR, INT, FLOAT, BOOL, PARTIAL_SHAPE, TENSOR, DICT}; diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 53b79a027e..8bc23c1f7d 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -4,7 +4,7 @@ from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model from utils.ov_genai_pipelines import create_ov_pipeline import pytest -from openvino_genai import Tokenizer, IncrementalParser, Parser, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningIncrementalParser, DeepSeekR1ReasoningIncrementalParser, GenerationConfig, ReasoningIncrementalParser +from openvino_genai import Tokenizer, IncrementalParser, Parser, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, Phi4ReasoningIncrementalParser, DeepSeekR1ReasoningIncrementalParser, GenerationConfig, ReasoningIncrementalParser from transformers import AutoTokenizer import re @@ -205,6 +205,7 @@ def test_incremental_phi4_reason_parser_nostreamer(answer): @pytest.mark.precommit @pytest.mark.parametrize("keep_original_content", [True, False]) +@pytest.mark.parametrize("do_reset", [True, False]) @pytest.mark.parametrize( "hf_ov_genai_models", ["katuni4ka/tiny-random-phi3"], # this tokenizer is used as a stub only @@ -213,7 +214,7 @@ def test_incremental_phi4_reason_parser_nostreamer(answer): @pytest.mark.parametrize("answer", [ "\nOkay, the user is asking for the answer to 2 + 1.\n\nThe answer to 2 + 1 is \boxed{3}.", ]) -def test_reasoning_parser_cut_content(hf_ov_genai_models, answer, keep_original_content): +def test_reasoning_parser_cut_content(hf_ov_genai_models, answer, keep_original_content, do_reset): hf_tokenizer, genai_tokenizer = hf_ov_genai_models stream_string = re.split(r"(\s+)", answer) @@ -223,16 +224,26 @@ def write(self, message): msg.update(message) return StreamingStatus.RUNNING streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningIncrementalParser(expect_open_tag=True, keep_original_content=keep_original_content)]) - - msg = {} - for subword in stream_string: - streamer._write(subword) - think_content = answer.split("")[0].replace("", "") - content = answer - - assert msg['reasoning_content'] == think_content - assert msg['content'] == (content if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.") + num_runs = 2 + for i in range(num_runs): + if do_reset: + streamer.reset() + + msg = {} + for subword in stream_string: + streamer._write(subword) + + think_content = answer.split("")[0].replace("", "") + content = answer + + if do_reset: + # If has been reset, check that content is parsed correctly + assert msg['reasoning_content'] == think_content + assert msg['content'] == (content if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.") + else: + # If has not been reset(), then content will contine to accumulate thinking parts from the next runs + msg['content'].find("") def test_incremental_deepseek_parser(): @@ -362,4 +373,37 @@ def parse(self, msg: dict): assert res.parsed[0]['reasoning_content'] != "" assert res.parsed[0]['reasoning_content'] == think_text -# TODO; add test for reseting incremental parser at generation start + +@pytest.mark.parametrize("model_id", ["microsoft/Phi-4-mini-reasoning"]) +@pytest.mark.nightly +def test_reset_incremental_parser(tmp_path, model_id): + _, _, models_path = download_and_convert_model(model_id, padding_side="left") + pipe = create_ov_pipeline(models_path) + tok = pipe.get_tokenizer() + + class CustomStreamer(TextParserStreamer): + def write(self, message): + return StreamingStatus.RUNNING + streamer = CustomStreamer(tok, parsers=[Phi4ReasoningIncrementalParser()]) + + prompt = "Please say \"hello\"" + res = pipe.generate([prompt], max_new_tokens=600, parsers=[Phi4ReasoningParser()]) + + # extract manually reasoning content from the parsed result + content = res.texts[0] + think_start = content.find("") + think_end = content.find("") + if think_start != -1 and think_end != -1 and think_end > think_start: + think_text = content[think_start + len(""):think_end] + + assert 'reasoning_content' in res.parsed[0] + assert res.parsed[0]['reasoning_content'] != "" + assert res.parsed[0]['reasoning_content'] == think_text + + res_streamer_1 = pipe.generate([prompt], max_new_tokens=600, streamer=streamer) + res_streamer_2 = pipe.generate([prompt], max_new_tokens=600, streamer=streamer) + # Check that results from streamer generation are the same as from non-streamer generation. + assert res_streamer_1.parsed == res.parsed + + # Also asserts that resetting streamer between generations works correctly. + assert res_streamer_2.parsed == res.parsed From 99ed0a98fe18c887bb38710b750d69072be02c1c Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 24 Oct 2025 13:39:15 +0200 Subject: [PATCH 39/43] wip --- src/cpp/src/parsers.cpp | 169 +++++++++++++++++++++------------------- 1 file changed, 89 insertions(+), 80 deletions(-) diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index ad8a9a1cc1..15a5e965d6 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -18,7 +18,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl { // Values with default member initializers are reset on each reset() call. bool m_first_run = true; bool m_think_tag_opened = false; - std::string m_text_cache = ""; + std::string m_text_cache; bool m_deactivated = false; public: ReasoningParserImpl() = default; @@ -30,7 +30,9 @@ class ReasoningIncrementalParser::ReasoningParserImpl { : m_expect_open_tag(expect_open_tag), m_keep_original_content(keep_original_content), m_open_tag(open_tag), - m_close_tag(close_tag) {} + m_close_tag(close_tag) { + m_text_cache.reserve(close_tag.size()); + } std::string parse( JsonContainer& message, @@ -40,11 +42,15 @@ class ReasoningIncrementalParser::ReasoningParserImpl { if (m_deactivated) { return delta_text; } - if (!m_expect_open_tag && m_first_run) { - m_think_tag_opened = true; + + if (m_first_run) { + m_first_run = false; + if (!m_expect_open_tag) { + m_think_tag_opened = true; + } } - m_first_run = false; + // Initialize message fields if needed if (!message.contains("reasoning_content")) { message["reasoning_content"] = ""; } @@ -52,105 +58,108 @@ class ReasoningIncrementalParser::ReasoningParserImpl { message["content"] = ""; } - - auto txt_chunk = m_text_cache + delta_text; + // Combine cached text with new delta + m_text_cache += delta_text; + const std::string& txt_chunk = m_text_cache; + auto reason_str = message["reasoning_content"].get_string(); - auto content_str = message["content"].get_string(); - if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && m_expect_open_tag) { - // Thinking has started - auto open_idx = txt_chunk.find(m_open_tag); - - reason_str += txt_chunk.substr(open_idx + m_open_tag.size(), txt_chunk.size() - (open_idx + m_open_tag.size())); - if (!m_keep_original_content) { - delta_text = ""; + if (!m_think_tag_opened && m_expect_open_tag) { + // Look for opening tag + size_t open_idx = txt_chunk.find(m_open_tag); + if (open_idx != std::string::npos) { + // Thinking has started + m_think_tag_opened = true; + size_t content_start = open_idx + m_open_tag.size(); + + // Check if closing tag is also present + size_t close_idx = txt_chunk.find(m_close_tag, content_start); + if (close_idx != std::string::npos) { + // Both tags in same chunk + reason_str = txt_chunk.substr(content_start, close_idx - content_start); + message["reasoning_content"] = reason_str; + + if (!m_keep_original_content) { + delta_text = txt_chunk.substr(close_idx + m_close_tag.size()); + } + + m_think_tag_opened = false; + m_deactivated = true; + m_text_cache.clear(); + } else { + // Only opening tag found + reason_str += txt_chunk.substr(content_start); + message["reasoning_content"] = reason_str; + + if (!m_keep_original_content) { + delta_text.clear(); + } + m_text_cache.clear(); + } + return delta_text; } - - m_think_tag_opened = true; - message["reasoning_content"] = reason_str; - m_text_cache = ""; - - if (txt_chunk.find(m_close_tag) != std::string::npos) { - // If and are in the same txt_chunk + delta_text - auto close_idx = txt_chunk.find(m_close_tag); - reason_str = txt_chunk.substr(open_idx + m_open_tag.size(), close_idx - (open_idx + m_open_tag.size())); - content_str = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size())); + // Opening tag not found, keep accumulating + return delta_text; + } + + if (m_think_tag_opened) { + // Look for closing tag + size_t close_idx = txt_chunk.find(m_close_tag); + if (close_idx != std::string::npos) { + // Thinking tag was closed + reason_str += txt_chunk.substr(0, close_idx); + message["reasoning_content"] = reason_str; + if (!m_keep_original_content) { - delta_text = content_str; + delta_text = txt_chunk.substr(close_idx + m_close_tag.size()); } + + m_text_cache.clear(); m_think_tag_opened = false; m_deactivated = true; - message["reasoning_content"] = reason_str; - } - } else if (m_think_tag_opened && txt_chunk.find(m_close_tag) != std::string::npos) { - // Thinking tag was closed - auto close_idx = txt_chunk.find(m_close_tag); - - reason_str += txt_chunk.substr(0, close_idx); - if (!m_keep_original_content) { - // Cut from the txt_chunk which is before and leave only what is after . - // Example if m_text_cache + delta_text = "...some textAnswer is 3" = "...some textAnswer is 3" - // we want to keep in delta_txt only "Answer is 3". - // We can operate with txt_chunk since final characters closing the tag ("ink>") are always in delta_text. - delta_text = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size())); + return delta_text; } - - message["reasoning_content"] = reason_str; - m_text_cache = ""; - m_think_tag_opened = false; - m_deactivated = true; - } else if (m_think_tag_opened) { - // Thinking tag was already opened and not closed yet - // If we have subsequently "sdf The" - // Then we put "sdf" to reason_str and "" - // then we put "i" to m_text_cache since m_text_cache + delta_text = "" - // then (in the closing tag IF-block) we leave only " The" in delta_text. - - // If we have "ing. <", " 20 ", "40>" - // Then we put "ing. " to reason_str and "<" to m_text_cache since it's a substring of close tag "" - // but since continuation " 20 " is not a substring of "", we will end up in this IF-block again - // and put " 20 " to reason_str and clear m_text_cache. - - // number of characters from the end of txt_chunk which can be part of the closing tag - size_t num_chars_to_keep = 0; - // We must be sure that no chunks with the closing tag are included to reason_str. - for (size_t i = txt_chunk.size(); i >= 1; --i) { - // Get the substring of the i last characters of txt_chunk - auto suffix = txt_chunk.substr(txt_chunk.size() - i, i); - // If this suffix is a prefix of m_close_tag, we need to keep it in the cache. - if (m_close_tag.find(suffix) == 0) { - num_chars_to_keep = i; - break; - } - } - - // If the suffix is a prefix of m_close_tag, we store it in the cache to detect if is split between several delta_text pieces. + // Closing tag not found - check if end might be partial match + size_t num_chars_to_keep = find_prefix_match_length(txt_chunk, m_close_tag); + if (num_chars_to_keep > 0) { - m_text_cache = txt_chunk.substr(txt_chunk.size() - num_chars_to_keep, num_chars_to_keep); + // Keep potential partial match in cache reason_str += txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep); + message["reasoning_content"] = reason_str; + m_text_cache = txt_chunk.substr(txt_chunk.size() - num_chars_to_keep); } else { + // No partial match - add all to reasoning reason_str += txt_chunk; - m_text_cache = ""; + message["reasoning_content"] = reason_str; + m_text_cache.clear(); } - + if (!m_keep_original_content) { - delta_text = ""; + delta_text.clear(); } - message["reasoning_content"] = reason_str; - } else { - // Think tag was not opened yet and not found in the current delta_text. - // Accumulate text in the cache to detect if is split between several delta_text pieces. - m_text_cache += delta_text; } + // else: accumulating text before opening tag return delta_text; } + // Find the longest suffix of txt that is a prefix of close_tag + size_t find_prefix_match_length(const std::string& txt, const std::string& close_tag) const { + size_t max_check = std::min(txt.size(), close_tag.size() - 1); + for (size_t len = max_check; len >= 1; --len) { + if (txt.compare(txt.size() - len, len, close_tag, 0, len) == 0) { + return len; + } + } + return 0; + } + +public: void reset() { m_first_run = true; m_think_tag_opened = false; - m_text_cache = ""; + m_text_cache.clear(); m_deactivated = false; } }; From fbd57e39bef4f2a6b5fc01fa7cb271042a99d4af Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 24 Oct 2025 13:46:32 +0200 Subject: [PATCH 40/43] optimize parse() --- src/cpp/src/parsers.cpp | 252 +++++++++++++++++++++++----------------- 1 file changed, 144 insertions(+), 108 deletions(-) diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index 15a5e965d6..cd3ca11adc 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -18,8 +18,120 @@ class ReasoningIncrementalParser::ReasoningParserImpl { // Values with default member initializers are reset on each reset() call. bool m_first_run = true; bool m_think_tag_opened = false; - std::string m_text_cache; + std::string m_text_cache = ""; bool m_deactivated = false; + + /** + * @brief Ensure required fields exist in the message container. + */ + void ensure_message_fields(JsonContainer& message) { + if (!message.contains("reasoning_content")) { + message["reasoning_content"] = ""; + } + if (!message.contains("content")) { + message["content"] = ""; + } + } + + /** + * @brief Find the longest suffix of text that is a prefix of the close tag. + * + * This is used to detect if the close tag is split across multiple chunks. + * For example, if text ends with "", + * this returns 4 (the length of "= 1; --i) { + // Compare the last i characters of text with the first i characters of m_close_tag + if (text.compare(text.size() - i, i, m_close_tag, 0, i) == 0) { + return i; + } + } + return 0; + } + + /** + * @brief Handle the case where both open and close tags are found in the same chunk. + */ + void handle_complete_reasoning(JsonContainer& message, std::string_view txt_chunk, + size_t open_idx, size_t close_idx, std::string& delta_text) { + // Extract reasoning content between tags + message["reasoning_content"] = std::string(txt_chunk.substr(open_idx + m_open_tag.size(), + close_idx - (open_idx + m_open_tag.size()))); + + if (!m_keep_original_content) { + delta_text = std::string(txt_chunk.substr(close_idx + m_close_tag.size())); + } + + m_think_tag_opened = false; + m_deactivated = true; + m_text_cache.clear(); + } + + /** + * @brief Handle the case where only the open tag is found. + */ + void handle_open_tag(JsonContainer& message, std::string& reason_str, + std::string_view txt_chunk, size_t open_idx, std::string& delta_text) { + // Start accumulating reasoning content + reason_str.append(txt_chunk.substr(open_idx + m_open_tag.size())); + message["reasoning_content"] = std::move(reason_str); + + if (!m_keep_original_content) { + delta_text.clear(); + } + + m_think_tag_opened = true; + m_text_cache.clear(); + } + + /** + * @brief Handle the case where the close tag is found. + */ + void handle_close_tag(JsonContainer& message, std::string& reason_str, + std::string_view txt_chunk, size_t close_idx, std::string& delta_text) { + // Append text before close tag to reasoning content + reason_str.append(txt_chunk.substr(0, close_idx)); + message["reasoning_content"] = std::move(reason_str); + + if (!m_keep_original_content) { + delta_text = std::string(txt_chunk.substr(close_idx + m_close_tag.size())); + } + + m_text_cache.clear(); + m_think_tag_opened = false; + m_deactivated = true; + } + + /** + * @brief Handle accumulating text while inside reasoning tags. + */ + void handle_inside_reasoning(JsonContainer& message, std::string& reason_str, + std::string_view txt_chunk, std::string& delta_text) { + // Find if the end of txt_chunk might be the start of a close tag + const size_t num_chars_to_keep = find_close_tag_prefix_length(txt_chunk); + + if (num_chars_to_keep > 0) { + // Keep potential partial close tag in cache + m_text_cache = std::string(txt_chunk.substr(txt_chunk.size() - num_chars_to_keep)); + reason_str.append(txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep)); + } else { + // No partial close tag, accumulate all text + reason_str.append(txt_chunk); + m_text_cache.clear(); + } + + if (!m_keep_original_content) { + delta_text.clear(); + } + message["reasoning_content"] = std::move(reason_str); + } + public: ReasoningParserImpl() = default; @@ -30,9 +142,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl { : m_expect_open_tag(expect_open_tag), m_keep_original_content(keep_original_content), m_open_tag(open_tag), - m_close_tag(close_tag) { - m_text_cache.reserve(close_tag.size()); - } + m_close_tag(close_tag) {} std::string parse( JsonContainer& message, @@ -42,124 +152,50 @@ class ReasoningIncrementalParser::ReasoningParserImpl { if (m_deactivated) { return delta_text; } - - if (m_first_run) { - m_first_run = false; - if (!m_expect_open_tag) { - m_think_tag_opened = true; - } + if (!m_expect_open_tag && m_first_run) { + m_think_tag_opened = true; } + m_first_run = false; - // Initialize message fields if needed - if (!message.contains("reasoning_content")) { - message["reasoning_content"] = ""; - } - if (!message.contains("content")) { - message["content"] = ""; - } - - // Combine cached text with new delta - m_text_cache += delta_text; - const std::string& txt_chunk = m_text_cache; + ensure_message_fields(message); - auto reason_str = message["reasoning_content"].get_string(); - - if (!m_think_tag_opened && m_expect_open_tag) { - // Look for opening tag - size_t open_idx = txt_chunk.find(m_open_tag); - if (open_idx != std::string::npos) { - // Thinking has started - m_think_tag_opened = true; - size_t content_start = open_idx + m_open_tag.size(); - - // Check if closing tag is also present - size_t close_idx = txt_chunk.find(m_close_tag, content_start); - if (close_idx != std::string::npos) { - // Both tags in same chunk - reason_str = txt_chunk.substr(content_start, close_idx - content_start); - message["reasoning_content"] = reason_str; - - if (!m_keep_original_content) { - delta_text = txt_chunk.substr(close_idx + m_close_tag.size()); - } - - m_think_tag_opened = false; - m_deactivated = true; - m_text_cache.clear(); - } else { - // Only opening tag found - reason_str += txt_chunk.substr(content_start); - message["reasoning_content"] = reason_str; - - if (!m_keep_original_content) { - delta_text.clear(); - } - m_text_cache.clear(); - } - return delta_text; - } - // Opening tag not found, keep accumulating - return delta_text; - } - - if (m_think_tag_opened) { - // Look for closing tag - size_t close_idx = txt_chunk.find(m_close_tag); - if (close_idx != std::string::npos) { - // Thinking tag was closed - reason_str += txt_chunk.substr(0, close_idx); - message["reasoning_content"] = reason_str; - - if (!m_keep_original_content) { - delta_text = txt_chunk.substr(close_idx + m_close_tag.size()); - } - - m_text_cache.clear(); - m_think_tag_opened = false; - m_deactivated = true; - return delta_text; - } - - // Closing tag not found - check if end might be partial match - size_t num_chars_to_keep = find_prefix_match_length(txt_chunk, m_close_tag); + const std::string txt_chunk = m_text_cache + delta_text; + std::string reason_str; + if (message.contains("reasoning_content")) { + reason_str = std::move(message["reasoning_content"].get_string()); + } + + // Cache find() results to avoid redundant searches + const auto open_idx = txt_chunk.find(m_open_tag); + const auto close_idx = txt_chunk.find(m_close_tag); + + if (!m_think_tag_opened && open_idx != std::string::npos && m_expect_open_tag) { + // Check if close tag is also present after the open tag + const auto close_idx_after_open = (close_idx != std::string::npos && close_idx > open_idx) + ? close_idx : std::string::npos; - if (num_chars_to_keep > 0) { - // Keep potential partial match in cache - reason_str += txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep); - message["reasoning_content"] = reason_str; - m_text_cache = txt_chunk.substr(txt_chunk.size() - num_chars_to_keep); + if (close_idx_after_open != std::string::npos) { + handle_complete_reasoning(message, txt_chunk, open_idx, close_idx_after_open, delta_text); } else { - // No partial match - add all to reasoning - reason_str += txt_chunk; - message["reasoning_content"] = reason_str; - m_text_cache.clear(); - } - - if (!m_keep_original_content) { - delta_text.clear(); + handle_open_tag(message, reason_str, txt_chunk, open_idx, delta_text); } + } else if (m_think_tag_opened && close_idx != std::string::npos) { + handle_close_tag(message, reason_str, txt_chunk, close_idx, delta_text); + } else if (m_think_tag_opened) { + handle_inside_reasoning(message, reason_str, txt_chunk, delta_text); + } else { + // Think tag was not opened yet and not found in the current delta_text. + // Accumulate text in the cache to detect if is split between several delta_text pieces. + m_text_cache += delta_text; } - // else: accumulating text before opening tag return delta_text; } - // Find the longest suffix of txt that is a prefix of close_tag - size_t find_prefix_match_length(const std::string& txt, const std::string& close_tag) const { - size_t max_check = std::min(txt.size(), close_tag.size() - 1); - for (size_t len = max_check; len >= 1; --len) { - if (txt.compare(txt.size() - len, len, close_tag, 0, len) == 0) { - return len; - } - } - return 0; - } - -public: void reset() { m_first_run = true; m_think_tag_opened = false; - m_text_cache.clear(); + m_text_cache = ""; m_deactivated = false; } }; From 27b7bae9b1daa93b3283e29cb744289ce90dbb90 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 24 Oct 2025 16:07:39 +0200 Subject: [PATCH 41/43] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/cpp/include/openvino/genai/parsers.hpp | 21 +++++++++++++++++++++ src/cpp/src/llm/pipeline.cpp | 1 + src/cpp/src/text_streamer.cpp | 7 +++---- src/python/py_generation_config.cpp | 2 +- src/python/py_streamers.cpp | 2 +- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp index d72f3b8d62..156d158aca 100644 --- a/src/cpp/include/openvino/genai/parsers.hpp +++ b/src/cpp/include/openvino/genai/parsers.hpp @@ -137,6 +137,27 @@ class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser { /** * @brief Abstract base class for incremental parsers that process text during streaming. + * + * Derived classes must implement both the `parse()` and `reset()` methods, as these are pure virtual. + * + * Use `IncrementalParser` when you need to process text as it is generated (e.g., in streaming scenarios), + * handling partial content and maintaining internal state between increments. Use `Parser` when you only + * need to process the complete text after generation has finished. + * + * Example: + * @code + * class MyIncrementalParser : public ov::genai::IncrementalParser { + * public: + * std::string parse(JsonContainer& message, std::string& delta_text, + * const std::optional>& delta_tokens = std::nullopt) override { + * // Implement incremental parsing logic here + * return delta_text; // Example: simply return the input + * } + * void reset() override { + * // Reset internal state here + * } + * }; + * @endcode */ class OPENVINO_GENAI_EXPORTS IncrementalParser { public: diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index fe1222f779..de05b5b950 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -30,6 +30,7 @@ ov::genai::DecodedResults run_generate_with_parsers(const ov::genai::OptionalGen } // determine from generation config when 'need_to_reset_parser' will be available + // TODO: Determine 'need_to_reset_parser' from generation_config when available. bool need_to_reset_parser = true; if (parser_streamer && need_to_reset_parser) { parser_streamer->reset(); diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp index fca30b7aea..ee9db529cf 100644 --- a/src/cpp/src/text_streamer.cpp +++ b/src/cpp/src/text_streamer.cpp @@ -34,7 +34,6 @@ StreamingStatus TextStreamer::write(int64_t token) { if (!text.empty() && '\n' == text.back() && text.size() > m_printed_len) { // Flush the cache after the new line symbol res << std::string_view{text.data() + m_printed_len, text.size() - m_printed_len}; - // Get the list of tokens decoded for this chunk or rest of text. auto res_status = run_callback_if_needed(res.str()); m_tokens_cache.clear(); @@ -62,7 +61,7 @@ StreamingStatus TextStreamer::write(int64_t token) { if (print_until > -1 && print_until > m_printed_len) { // It is possible to have a shorter text after adding new token. - // Print to output only if text length is increaesed. + // Print to output only if text length is increased. res << std::string_view{text.data() + m_printed_len, print_until - m_printed_len} << std::flush; } @@ -150,11 +149,11 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector(parsers)} {} CallbackTypeVariant TextParserStreamer::write(std::string message) { - // When 'write' is called with string, it means new chunck of tokens is decoded into text + // When 'write' is called with string, it means new chunk of tokens is decoded into text auto flushed_tokens = std::vector(); if (message.back() == '\n') { - // Flush all tokens // TODO: m_decoded_lengths[m_decoded_lengths.size() - 1] = -1; + // Flush all tokens flushed_tokens.assign(m_tokens_cache.begin(), m_tokens_cache.end()); } else if (m_decoded_lengths.size() >= delay_n_tokens) { // prompt = "I was waiting for the bus.\n" diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp index 86ad684aa7..18ddbab248 100644 --- a/src/python/py_generation_config.cpp +++ b/src/python/py_generation_config.cpp @@ -445,7 +445,7 @@ void init_generation_config(py::module_& m) { .def_readwrite("include_stop_str_in_output", &GenerationConfig::include_stop_str_in_output) .def_readwrite("stop_token_ids", &GenerationConfig::stop_token_ids) .def_readwrite("structured_output_config", &GenerationConfig::structured_output_config) - .def_readwrite("parsers", &GenerationConfig::parsers) // TODO: add keep_alive + .def_readwrite("parsers", &GenerationConfig::parsers, py::keep_alive<1, 2>()) .def_readwrite("adapters", &GenerationConfig::adapters) .def_readwrite("apply_chat_template", &GenerationConfig::apply_chat_template) .def("set_eos_token_id", &GenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id")) diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp index f2df853e8b..349ec2a8c0 100644 --- a/src/python/py_streamers.cpp +++ b/src/python/py_streamers.cpp @@ -83,7 +83,7 @@ class ConstructableTextParserStreamer: public TextParserStreamer { using TextParserStreamer::TextParserStreamer; // inherit base constructors StreamingStatus write(JsonContainer& message) override { - // Since c++ calls function with JsonContainer while python override expects py::dict, + // Since C++ calls function with JsonContainer while python override expects py::dict, // this function is a wrapper to call Python implementation of 'write' with py::dict py::gil_scoped_acquire acquire; From 6e847c0d6fcb93bb25373147bd0e5a67d2bc6710 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 24 Oct 2025 17:01:15 +0200 Subject: [PATCH 42/43] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/cpp/src/llm/pipeline.cpp | 2 +- src/cpp/src/parsers.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp index de05b5b950..1f1f1e339d 100644 --- a/src/cpp/src/llm/pipeline.cpp +++ b/src/cpp/src/llm/pipeline.cpp @@ -17,7 +17,7 @@ namespace { -// This is an decorator function that wraps a generation callable to apply parsers and reset them before generation if needed. +// This is a decorator function that wraps a generation callable to apply parsers and reset them before generation if needed. ov::genai::DecodedResults run_generate_with_parsers(const ov::genai::OptionalGenerationConfig& generation_config, const ov::genai::StreamerVariant& streamer, std::function generate_callable) { diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp index cd3ca11adc..d359fad2fc 100644 --- a/src/cpp/src/parsers.cpp +++ b/src/cpp/src/parsers.cpp @@ -45,14 +45,14 @@ class ReasoningIncrementalParser::ReasoningParserImpl { */ size_t find_close_tag_prefix_length(std::string_view text) const { const size_t max_check = std::min(text.size(), m_close_tag.size()); - - for (size_t i = max_check; i >= 1; --i) { + size_t longest_match = 0; + for (size_t i = 1; i <= max_check; ++i) { // Compare the last i characters of text with the first i characters of m_close_tag if (text.compare(text.size() - i, i, m_close_tag, 0, i) == 0) { - return i; + longest_match = i; } } - return 0; + return longest_match; } /** From cbce8e6bddb661014d354bea8e3a51786cd92206 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 24 Oct 2025 17:20:31 +0200 Subject: [PATCH 43/43] improve test_reasoning_parser_cut_content --- tests/python_tests/test_parsers.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py index 8bc23c1f7d..3155e17fe1 100644 --- a/tests/python_tests/test_parsers.py +++ b/tests/python_tests/test_parsers.py @@ -205,7 +205,7 @@ def test_incremental_phi4_reason_parser_nostreamer(answer): @pytest.mark.precommit @pytest.mark.parametrize("keep_original_content", [True, False]) -@pytest.mark.parametrize("do_reset", [True, False]) +@pytest.mark.parametrize("do_reset", [False]) @pytest.mark.parametrize( "hf_ov_genai_models", ["katuni4ka/tiny-random-phi3"], # this tokenizer is used as a stub only @@ -224,26 +224,25 @@ def write(self, message): msg.update(message) return StreamingStatus.RUNNING streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningIncrementalParser(expect_open_tag=True, keep_original_content=keep_original_content)]) - + num_runs = 2 + msg = {} for i in range(num_runs): if do_reset: streamer.reset() - msg = {} for subword in stream_string: streamer._write(subword) think_content = answer.split("")[0].replace("", "") - content = answer - - if do_reset: - # If has been reset, check that content is parsed correctly - assert msg['reasoning_content'] == think_content - assert msg['content'] == (content if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.") - else: - # If has not been reset(), then content will contine to accumulate thinking parts from the next runs - msg['content'].find("") + + if do_reset: + # If has been reset, check that content is parsed correctly + assert msg['reasoning_content'] == think_content + assert msg['content'] == (answer if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.") + else: + # If has not been reset(), then content msg['content'] will continue to accumulate thinking parts from the next runs + assert msg['content'].find("") >= 0 def test_incremental_deepseek_parser(): @@ -311,7 +310,7 @@ def write(self, message): for subword in stream_string: streamer._write(subword) - assert msg['main_text'] == ''.join(" world ") + assert msg['main_text'] == " world " @pytest.mark.precommit